texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: * tp/Texinfo/ParserNonXS.pm (_end_line, _handle_m


From: Patrice Dumas
Subject: branch master updated: * tp/Texinfo/ParserNonXS.pm (_end_line, _handle_macro) (_handle_menu_entry_separators, _handle_open_brace) (_process_remaining_on_line, _parse_texi): use exactly the whitespace_chars_except_newline chracters from the XS parser in matching.
Date: Fri, 14 Jul 2023 03:55:07 -0400

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 9832527a73 * tp/Texinfo/ParserNonXS.pm (_end_line, _handle_macro) 
(_handle_menu_entry_separators, _handle_open_brace) 
(_process_remaining_on_line, _parse_texi): use exactly the 
whitespace_chars_except_newline chracters from the XS parser in matching.
9832527a73 is described below

commit 9832527a73903cade619ffcebd6d7382f15f9313
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri Jul 14 09:54:55 2023 +0200

    * tp/Texinfo/ParserNonXS.pm (_end_line, _handle_macro)
    (_handle_menu_entry_separators, _handle_open_brace)
    (_process_remaining_on_line, _parse_texi): use exactly the
    whitespace_chars_except_newline chracters from the XS parser
    in matching.
    
    * tp/Texinfo/ParserNonXS.pm (_debug_protect_eol)
    (_process_remaining_on_line): use a function to protect eol in debug
    messages.
    
    * tp/Texinfo/ParserNonXS.pm (_process_remaining_on_line),
    tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line): add
    debug message for form feeds.
---
 ChangeLog                        | 16 +++++++++++++++
 tp/Texinfo/ParserNonXS.pm        | 42 ++++++++++++++++++++++++++++------------
 tp/Texinfo/XS/parsetexi/parser.c |  9 ++++++++-
 3 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index c1d74ec8fc..5344f279f6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2023-07-14  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/ParserNonXS.pm (_end_line, _handle_macro)
+       (_handle_menu_entry_separators, _handle_open_brace)
+       (_process_remaining_on_line, _parse_texi): use exactly the
+       whitespace_chars_except_newline chracters from the XS parser
+       in matching.
+
+       * tp/Texinfo/ParserNonXS.pm (_debug_protect_eol)
+       (_process_remaining_on_line): use a function to protect eol in debug
+       messages.
+
+       * tp/Texinfo/ParserNonXS.pm (_process_remaining_on_line),
+       tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line): add
+       debug message for form feeds.
+
 2023-07-13  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/ParserNonXS.pm (_expand_macro_arguments),
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index f364259945..a5263e52e5 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -1238,6 +1238,13 @@ sub _transfer_source_marks($$)
   }
 }
 
+sub _debug_protect_eol($)
+{
+  my $line = shift;
+  $line =~ s/\n/\\n/g;
+  return $line;
+}
+
 # parse a @macro line
 sub _parse_macro_command_line($$$$$;$)
 {
@@ -4422,7 +4429,8 @@ sub _end_line($$$)
 sub _start_empty_line_after_command($$$) {
   my ($line, $current, $command) = @_;
 
-  $line =~ s/^([^\S\r\n]*)//;
+  # based on whitespace_chars_except_newline in XS parser
+  $line =~ s/^([ \t\cK\f]*)//;
   my $spaces_after_command = { 'type' => 'ignorable_spaces_after_command',
                                'text' => $1,
                                'parent' => $current,
@@ -4817,7 +4825,8 @@ sub _handle_macro($$$$$)
             last;
           }
         } else {
-          if (not $arg_elt->{'contents'} and $line =~ s/^([^\S\r\n]+)//) {
+          # based on whitespace_chars_except_newline in XS parser
+          if (not $arg_elt->{'contents'} and $line =~ s/^([ \t\cK\f]+)//) {
             my $internal_space = {'type' => 'internal_spaces_before_argument',
                                   'text' => $1,
                                   'parent' => $arg_elt,
@@ -5021,7 +5030,8 @@ sub _handle_menu_entry_separators($$$$$$)
       $current = $current->{'contents'}->[-1];
       $current = _merge_text($self, $current, $separator, $popped_element);
     # here we collect spaces following separators.
-    } elsif ($$line_ref =~ s/^([^\S\r\n]+)//) {
+    # based on whitespace_chars_except_newline in XS parser
+    } elsif ($$line_ref =~ s/^([ \t\cK\f]+)//) {
       # NOTE a trailing end of line could be considered to be part
       # of the separator. Right now it is part of the description,
       # since it is catched (in the next while) as one of the case below
@@ -5774,7 +5784,8 @@ sub _handle_open_brace($$$$)
       } else {
         $self->_push_context('ct_brace_command', $command);
       }
-      $line =~ s/([^\S\f\n]*)//;
+      # based on whitespace_chars_except_newline in XS parser
+      $line =~ s/([ \t\cK\f]*)//;
       $current->{'type'} = 'brace_command_context';
       # internal_spaces_before_argument is a transient internal type,
       # which should end up in info spaces_before_argument.
@@ -6307,6 +6318,8 @@ sub _process_remaining_on_line($$$$)
 
   my $retval = $STILL_MORE_TO_PROCESS;
 
+  #print STDERR "PROCESS "._debug_protect_eol($line)."\n" if 
($self->{'DEBUG'});
+
   # in a 'raw' (verbatim, ignore, (r)macro)
   if ($current->{'cmdname'}
       and $block_commands{$current->{'cmdname'}}
@@ -6492,7 +6505,8 @@ sub _process_remaining_on_line($$$$)
     # start a new line for the @end line, this is normally done
     # at the beginning of a line, but not here, as we directly
     # got the lines.
-    $line =~ s/^([^\S\r\n]*)//;
+    # based on whitespace_chars_except_newline in XS parser
+    $line =~ s/^([ \t\cK\f]*)//;
     push @{$current->{'contents'}}, { 'type' => 'empty_line',
                                       'text' => $1,
                                       'parent' => $current };
@@ -6698,12 +6712,11 @@ sub _process_remaining_on_line($$$$)
   if ($current->{'cmdname'}
         and defined($self->{'brace_commands'}->{$current->{'cmdname'}})
         and !$open_brace) {
-    if ($self->{'DEBUG'}) {
-      my $line_str = $line;
-      $line_str =~ s/\n/\\n/g;
-      print STDERR "BRACE CMD: no brace after \@$current->{'cmdname'}"
-          ."||| $line_str\n";
-    }
+
+    print STDERR "BRACE CMD: no brace after \@$current->{'cmdname'}"
+       ."||| "._debug_protect_eol($line)."\n"
+           if ($self->{'DEBUG'});
+
     # Note that non ascii spaces do not count as spaces
     if ($line =~ /^(\s+)/
         and ($accent_commands{$current->{'cmdname'}}
@@ -7000,6 +7013,10 @@ sub _process_remaining_on_line($$$$)
     }
   } elsif ($form_feed) {
     substr ($line, 0, 1) = '';
+    print STDERR "FORM FEED in "
+          .Texinfo::Common::debug_print_element($current, 1).": "
+           ._debug_protect_eol($line)."\n"
+      if ($self->{'DEBUG'});
     if ($current->{'type'}
         and $current->{'type'} eq 'paragraph') {
       # A form feed stops and restart a paragraph.
@@ -7126,7 +7143,8 @@ sub _parse_texi($$$)
         # Remove this element and update 'extra' values.
         _abort_empty_line($self, $current);
       }
-      $line =~ s/^([^\S\r\n]*)//;
+      # based on whitespace_chars_except_newline in XS parser
+      $line =~ s/^([ \t\cK\f]*)//;
       push @{$current->{'contents'}}, { 'type' => 'empty_line',
                                         'text' => $1,
                                         'parent' => $current };
diff --git a/tp/Texinfo/XS/parsetexi/parser.c b/tp/Texinfo/XS/parsetexi/parser.c
index cdd12ff79d..7fe2813849 100644
--- a/tp/Texinfo/XS/parsetexi/parser.c
+++ b/tp/Texinfo/XS/parsetexi/parser.c
@@ -32,7 +32,8 @@
 const char *whitespace_chars = " \t\v\f\r\n";
 const char *digit_chars = "0123456789";
 
-// [^\S\r\n] in Perl
+/* in the perl parser, comments including whitespace_chars_except_newline
+   show where code should be changed if the list of characters changes here */
 const char *whitespace_chars_except_newline = " \t\v\f";
 
 /* count characters, not bytes. */
@@ -1333,6 +1334,10 @@ process_remaining_on_line (ELEMENT **current_inout, char 
**line_inout)
   /* remains set only if command is unknown, otherwise cmd is used */
   char *command = 0;
 
+  /*
+  debug_nonl("PROCESS "); debug_print_protected_string (line); debug ("");
+  */
+
   /********* BLOCK_raw ******************/
   if (command_flags(current) & CF_block
       && (command_data(current->cmd).data == BLOCK_raw))
@@ -2358,6 +2363,8 @@ process_remaining_on_line (ELEMENT **current_inout, char 
**line_inout)
   else if (*line == '\f')
     {
       char separator = *line++;
+      debug_nonl ("FORM FEED in "); debug_print_element (current, 1);
+      debug_nonl (": "); debug_print_protected_string (line); debug ("");
       if (current->type == ET_paragraph)
         {
           ELEMENT *e;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]