[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
branch master updated: * tp/Texinfo/ParserNonXS.pm (_end_line, _handle_m
From: |
Patrice Dumas |
Subject: |
branch master updated: * tp/Texinfo/ParserNonXS.pm (_end_line, _handle_macro) (_handle_menu_entry_separators, _handle_open_brace) (_process_remaining_on_line, _parse_texi): use exactly the whitespace_chars_except_newline chracters from the XS parser in matching. |
Date: |
Fri, 14 Jul 2023 03:55:07 -0400 |
This is an automated email from the git hooks/post-receive script.
pertusus pushed a commit to branch master
in repository texinfo.
The following commit(s) were added to refs/heads/master by this push:
new 9832527a73 * tp/Texinfo/ParserNonXS.pm (_end_line, _handle_macro)
(_handle_menu_entry_separators, _handle_open_brace)
(_process_remaining_on_line, _parse_texi): use exactly the
whitespace_chars_except_newline chracters from the XS parser in matching.
9832527a73 is described below
commit 9832527a73903cade619ffcebd6d7382f15f9313
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri Jul 14 09:54:55 2023 +0200
* tp/Texinfo/ParserNonXS.pm (_end_line, _handle_macro)
(_handle_menu_entry_separators, _handle_open_brace)
(_process_remaining_on_line, _parse_texi): use exactly the
whitespace_chars_except_newline chracters from the XS parser
in matching.
* tp/Texinfo/ParserNonXS.pm (_debug_protect_eol)
(_process_remaining_on_line): use a function to protect eol in debug
messages.
* tp/Texinfo/ParserNonXS.pm (_process_remaining_on_line),
tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line): add
debug message for form feeds.
---
ChangeLog | 16 +++++++++++++++
tp/Texinfo/ParserNonXS.pm | 42 ++++++++++++++++++++++++++++------------
tp/Texinfo/XS/parsetexi/parser.c | 9 ++++++++-
3 files changed, 54 insertions(+), 13 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index c1d74ec8fc..5344f279f6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2023-07-14 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/ParserNonXS.pm (_end_line, _handle_macro)
+ (_handle_menu_entry_separators, _handle_open_brace)
+ (_process_remaining_on_line, _parse_texi): use exactly the
+ whitespace_chars_except_newline chracters from the XS parser
+ in matching.
+
+ * tp/Texinfo/ParserNonXS.pm (_debug_protect_eol)
+ (_process_remaining_on_line): use a function to protect eol in debug
+ messages.
+
+ * tp/Texinfo/ParserNonXS.pm (_process_remaining_on_line),
+ tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line): add
+ debug message for form feeds.
+
2023-07-13 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/ParserNonXS.pm (_expand_macro_arguments),
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index f364259945..a5263e52e5 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -1238,6 +1238,13 @@ sub _transfer_source_marks($$)
}
}
+sub _debug_protect_eol($)
+{
+ my $line = shift;
+ $line =~ s/\n/\\n/g;
+ return $line;
+}
+
# parse a @macro line
sub _parse_macro_command_line($$$$$;$)
{
@@ -4422,7 +4429,8 @@ sub _end_line($$$)
sub _start_empty_line_after_command($$$) {
my ($line, $current, $command) = @_;
- $line =~ s/^([^\S\r\n]*)//;
+ # based on whitespace_chars_except_newline in XS parser
+ $line =~ s/^([ \t\cK\f]*)//;
my $spaces_after_command = { 'type' => 'ignorable_spaces_after_command',
'text' => $1,
'parent' => $current,
@@ -4817,7 +4825,8 @@ sub _handle_macro($$$$$)
last;
}
} else {
- if (not $arg_elt->{'contents'} and $line =~ s/^([^\S\r\n]+)//) {
+ # based on whitespace_chars_except_newline in XS parser
+ if (not $arg_elt->{'contents'} and $line =~ s/^([ \t\cK\f]+)//) {
my $internal_space = {'type' => 'internal_spaces_before_argument',
'text' => $1,
'parent' => $arg_elt,
@@ -5021,7 +5030,8 @@ sub _handle_menu_entry_separators($$$$$$)
$current = $current->{'contents'}->[-1];
$current = _merge_text($self, $current, $separator, $popped_element);
# here we collect spaces following separators.
- } elsif ($$line_ref =~ s/^([^\S\r\n]+)//) {
+ # based on whitespace_chars_except_newline in XS parser
+ } elsif ($$line_ref =~ s/^([ \t\cK\f]+)//) {
# NOTE a trailing end of line could be considered to be part
# of the separator. Right now it is part of the description,
# since it is catched (in the next while) as one of the case below
@@ -5774,7 +5784,8 @@ sub _handle_open_brace($$$$)
} else {
$self->_push_context('ct_brace_command', $command);
}
- $line =~ s/([^\S\f\n]*)//;
+ # based on whitespace_chars_except_newline in XS parser
+ $line =~ s/([ \t\cK\f]*)//;
$current->{'type'} = 'brace_command_context';
# internal_spaces_before_argument is a transient internal type,
# which should end up in info spaces_before_argument.
@@ -6307,6 +6318,8 @@ sub _process_remaining_on_line($$$$)
my $retval = $STILL_MORE_TO_PROCESS;
+ #print STDERR "PROCESS "._debug_protect_eol($line)."\n" if
($self->{'DEBUG'});
+
# in a 'raw' (verbatim, ignore, (r)macro)
if ($current->{'cmdname'}
and $block_commands{$current->{'cmdname'}}
@@ -6492,7 +6505,8 @@ sub _process_remaining_on_line($$$$)
# start a new line for the @end line, this is normally done
# at the beginning of a line, but not here, as we directly
# got the lines.
- $line =~ s/^([^\S\r\n]*)//;
+ # based on whitespace_chars_except_newline in XS parser
+ $line =~ s/^([ \t\cK\f]*)//;
push @{$current->{'contents'}}, { 'type' => 'empty_line',
'text' => $1,
'parent' => $current };
@@ -6698,12 +6712,11 @@ sub _process_remaining_on_line($$$$)
if ($current->{'cmdname'}
and defined($self->{'brace_commands'}->{$current->{'cmdname'}})
and !$open_brace) {
- if ($self->{'DEBUG'}) {
- my $line_str = $line;
- $line_str =~ s/\n/\\n/g;
- print STDERR "BRACE CMD: no brace after \@$current->{'cmdname'}"
- ."||| $line_str\n";
- }
+
+ print STDERR "BRACE CMD: no brace after \@$current->{'cmdname'}"
+ ."||| "._debug_protect_eol($line)."\n"
+ if ($self->{'DEBUG'});
+
# Note that non ascii spaces do not count as spaces
if ($line =~ /^(\s+)/
and ($accent_commands{$current->{'cmdname'}}
@@ -7000,6 +7013,10 @@ sub _process_remaining_on_line($$$$)
}
} elsif ($form_feed) {
substr ($line, 0, 1) = '';
+ print STDERR "FORM FEED in "
+ .Texinfo::Common::debug_print_element($current, 1).": "
+ ._debug_protect_eol($line)."\n"
+ if ($self->{'DEBUG'});
if ($current->{'type'}
and $current->{'type'} eq 'paragraph') {
# A form feed stops and restart a paragraph.
@@ -7126,7 +7143,8 @@ sub _parse_texi($$$)
# Remove this element and update 'extra' values.
_abort_empty_line($self, $current);
}
- $line =~ s/^([^\S\r\n]*)//;
+ # based on whitespace_chars_except_newline in XS parser
+ $line =~ s/^([ \t\cK\f]*)//;
push @{$current->{'contents'}}, { 'type' => 'empty_line',
'text' => $1,
'parent' => $current };
diff --git a/tp/Texinfo/XS/parsetexi/parser.c b/tp/Texinfo/XS/parsetexi/parser.c
index cdd12ff79d..7fe2813849 100644
--- a/tp/Texinfo/XS/parsetexi/parser.c
+++ b/tp/Texinfo/XS/parsetexi/parser.c
@@ -32,7 +32,8 @@
const char *whitespace_chars = " \t\v\f\r\n";
const char *digit_chars = "0123456789";
-// [^\S\r\n] in Perl
+/* in the perl parser, comments including whitespace_chars_except_newline
+ show where code should be changed if the list of characters changes here */
const char *whitespace_chars_except_newline = " \t\v\f";
/* count characters, not bytes. */
@@ -1333,6 +1334,10 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
/* remains set only if command is unknown, otherwise cmd is used */
char *command = 0;
+ /*
+ debug_nonl("PROCESS "); debug_print_protected_string (line); debug ("");
+ */
+
/********* BLOCK_raw ******************/
if (command_flags(current) & CF_block
&& (command_data(current->cmd).data == BLOCK_raw))
@@ -2358,6 +2363,8 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
else if (*line == '\f')
{
char separator = *line++;
+ debug_nonl ("FORM FEED in "); debug_print_element (current, 1);
+ debug_nonl (": "); debug_print_protected_string (line); debug ("");
if (current->type == ET_paragraph)
{
ELEMENT *e;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- branch master updated: * tp/Texinfo/ParserNonXS.pm (_end_line, _handle_macro) (_handle_menu_entry_separators, _handle_open_brace) (_process_remaining_on_line, _parse_texi): use exactly the whitespace_chars_except_newline chracters from the XS parser in matching.,
Patrice Dumas <=