texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Patrice Dumas
Date: Sun, 29 Sep 2024 08:24:40 -0400 (EDT)

branch: master
commit 2895e12c78a9ce6d27c64186e4f9befeb65efdb5
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sun Jun 23 20:08:23 2024 +0200

    * tp/Texinfo/ParserNonXS.pm (_begin_paragraph_p, _begin_paragraph),
    tp/Texinfo/XS/parsetexi/parser.c (begin_paragraph): add
    _begin_paragraph_p in Perl code.  Do not call begin_paragraph_p in
    begin_paragraph, instead let the caller call begin_paragraph is
    begin_paragraph_p returns true.  Update callers.
    
    * tp/Texinfo/ParserNonXS.pm (_merge_text): rename
    $last_element_type as $last_elt_type.
    
    * tp/Texinfo/ParserNonXS.pm (_merge_text),
    tp/Texinfo/XS/parsetexi/parser.c (merge_text): instead of calling
    *abort_empty_line, inline the corresponding code, mixing with
    paragraph opening, and without the case of last_element text empty,
    already handled.
---
 ChangeLog                        |  17 +++++
 tp/Texinfo/ParserNonXS.pm        | 145 +++++++++++++++++++++----------------
 tp/Texinfo/XS/parsetexi/parser.c | 153 +++++++++++++++++++++------------------
 3 files changed, 182 insertions(+), 133 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 3d9dc6ade4..6a8a12aea2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2024-06-23  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/ParserNonXS.pm (_begin_paragraph_p, _begin_paragraph),
+       tp/Texinfo/XS/parsetexi/parser.c (begin_paragraph): add
+       _begin_paragraph_p in Perl code.  Do not call begin_paragraph_p in
+       begin_paragraph, instead let the caller call begin_paragraph is
+       begin_paragraph_p returns true.  Update callers.
+
+       * tp/Texinfo/ParserNonXS.pm (_merge_text): rename
+       $last_element_type as $last_elt_type.
+
+       * tp/Texinfo/ParserNonXS.pm (_merge_text),
+       tp/Texinfo/XS/parsetexi/parser.c (merge_text): instead of calling
+       *abort_empty_line, inline the corresponding code, mixing with
+       paragraph opening, and without the case of last_element text empty,
+       already handled.
+
 2024-06-23  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Makefile.tres, tp/t/03coverage_braces.t
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index f758a42e81..d6a041ff9e 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -1358,50 +1358,54 @@ sub _parse_macro_command_line($$$$$;$)
   return $macro;
 }
 
-# start a paragraph if in a context where paragraphs are to be started.
-sub _begin_paragraph($$;$)
+# return true if in a context where paragraphs are to be started.
+sub _begin_paragraph_p($$)
 {
-  my ($self, $current, $source_info) = @_;
-
   # we want to avoid
   # brace_container, brace_arg, root_line (ct_line),
   # paragraphs (ct_paragraph), line_arg (ct_line, ct_def), balanced_braces
   # (only in ct_math, ct_rawpreformatted, ct_inlineraw), block_line_arg
   # (ct_line, ct_def), preformatted (ct_preformatted).
-  if ($begin_paragraph_contexts{$self->_top_context()}
-      and not ($current->{'type'}
-               and $type_without_paragraph{$current->{'type'}})) {
-    # find whether an @indent precedes the paragraph
-    my $indent;
-    if ($current->{'contents'}) {
-      my $index = scalar(@{$current->{'contents'}}) -1;
-      while ($index >= 0
-            and !($current->{'contents'}->[$index]->{'type'}
-              and ($current->{'contents'}->[$index]->{'type'} eq 'empty_line'
-                   or $current->{'contents'}->[$index]->{'type'} eq 
'paragraph'))
-            and !($current->{'contents'}->[$index]->{'cmdname'}
-                  and $close_paragraph_commands
-                           {$current->{'contents'}->[$index]->{'cmdname'}})) {
-        if ($current->{'contents'}->[$index]->{'cmdname'}
-          and ($current->{'contents'}->[$index]->{'cmdname'} eq 'indent'
-              or $current->{'contents'}->[$index]->{'cmdname'} eq 'noindent')) 
{
-          $indent = $current->{'contents'}->[$index]->{'cmdname'};
-          last;
-        }
-        $index--;
+  my ($self, $current) = @_;
+  return ($begin_paragraph_contexts{$self->_top_context()}
+          and not ($current->{'type'}
+                   and $type_without_paragraph{$current->{'type'}}));
+}
+
+# start a paragraph.
+sub _begin_paragraph($$)
+{
+  my ($self, $current) = @_;
+
+  # find whether an @indent precedes the paragraph
+  my $indent;
+  if ($current->{'contents'}) {
+    my $index = scalar(@{$current->{'contents'}}) -1;
+    while ($index >= 0
+          and !($current->{'contents'}->[$index]->{'type'}
+            and ($current->{'contents'}->[$index]->{'type'} eq 'empty_line'
+                 or $current->{'contents'}->[$index]->{'type'} eq 'paragraph'))
+          and !($current->{'contents'}->[$index]->{'cmdname'}
+                and $close_paragraph_commands
+                         {$current->{'contents'}->[$index]->{'cmdname'}})) {
+      if ($current->{'contents'}->[$index]->{'cmdname'}
+        and ($current->{'contents'}->[$index]->{'cmdname'} eq 'indent'
+            or $current->{'contents'}->[$index]->{'cmdname'} eq 'noindent')) {
+        $indent = $current->{'contents'}->[$index]->{'cmdname'};
+        last;
       }
+      $index--;
     }
-    push @{$current->{'contents'}},
-            { 'type' => 'paragraph', 'parent' => $current };
-    $current = $current->{'contents'}->[-1];
-    if ($indent) {
-      $current->{'extra'} = {$indent => 1};
-    }
-    $self->_push_context('ct_paragraph', undef);
-    print STDERR "PARAGRAPH\n" if ($self->{'conf'}->{'DEBUG'});
-    return $current;
   }
-  return 0;
+  push @{$current->{'contents'}},
+          { 'type' => 'paragraph', 'parent' => $current };
+  $current = $current->{'contents'}->[-1];
+  if ($indent) {
+    $current->{'extra'} = {$indent => 1};
+  }
+  $self->_push_context('ct_paragraph', undef);
+  print STDERR "PARAGRAPH\n" if ($self->{'conf'}->{'DEBUG'});
+  return $current;
 }
 
 sub _begin_preformatted($$)
@@ -2247,24 +2251,23 @@ sub _merge_text {
 
   my $paragraph;
 
-  my $no_merge_with_following_text = 0;
   if ($text =~ /\S/) {
     my $leading_spaces;
     if ($text =~ /^(\s+)/) {
       $leading_spaces = $1;
     }
     if ($last_element->{'type'}) {
-      my $last_element_type = $last_element->{'type'};
-      if ($last_element_type eq 'empty_line'
-          or $last_element_type eq 'ignorable_spaces_after_command'
-          or $last_element_type eq 'internal_spaces_after_command'
-          or $last_element_type eq 'internal_spaces_before_argument'
-          or $last_element_type eq 'internal_spaces_before_context_argument'
-          or $last_element_type eq 'spaces_after_close_brace') {
+      my $last_elt_type = $last_element->{'type'};
+      if ($last_elt_type eq 'empty_line'
+          or $last_elt_type eq 'ignorable_spaces_after_command'
+          or $last_elt_type eq 'internal_spaces_after_command'
+          or $last_elt_type eq 'internal_spaces_before_argument'
+          or $last_elt_type eq 'internal_spaces_before_context_argument'
+          or $last_elt_type eq 'spaces_after_close_brace') {
 
         if ($leading_spaces) {
           print STDERR "MERGE_TEXT ADD leading empty |$leading_spaces|"
-                    ." to $last_element_type\n"
+                    ." to $last_elt_type\n"
                          if ($self->{'conf'}->{'DEBUG'});
 
           $last_element->{'text'} .= $leading_spaces;
@@ -2277,9 +2280,8 @@ sub _merge_text {
           my $popped_element = _pop_element_from_contents($self, $current);
           delete $popped_element->{'type'};
           $popped_element->{'text'} = $text;
-          $paragraph = _begin_paragraph($self, $current);
-          if ($paragraph) {
-            $current = $paragraph;
+          if (_begin_paragraph_p($self, $current)) {
+            $current = _begin_paragraph($self, $current);
           }
           # do not jump with a goto as in C, as it is not possible
           # in Perl to use a goto to go further than the calling scope
@@ -2292,22 +2294,40 @@ sub _merge_text {
           return $current;
         }
 
-        # since last_element cannot be empty as this case is
-        # handled just above, the last_element is
-        # always kept in current in _abort_empty_line
-        # for an empty_line; its type may have changed
-        _abort_empty_line($self, $current);
-
-        if ($last_element_type ne 'empty_line') {
-          # we do not merge these special types, unset last_element
+        # following is similar to _abort_empty_line, except
+        # for the empty text already handled above, and with
+        # paragraph opening mixed in
+        if ($last_elt_type eq 'internal_spaces_after_command'
+            or $last_elt_type eq 'internal_spaces_before_argument') {
+          _move_last_space_to_element($self, $current);
+          # we do not merge these special types
+          $last_element = undef;
+        } elsif ($last_elt_type eq 'empty_line') {
+          if (_begin_paragraph_p($self, $current)) {
+            $last_element->{'type'} = 'spaces_before_paragraph';
+            $paragraph = _begin_paragraph($self, $current);
+            $current = $paragraph;
+          } else {
+            # in that case, we can merge
+            delete $last_element->{'type'};
+          }
+        } else {
+          # other special spaces, in general in paragraph begin context
+          if ($last_elt_type eq 'internal_spaces_before_context_argument') {
+            _move_last_space_to_element($self, $current);
+          }
+          if (_begin_paragraph_p($self, $current)) {
+            $current = _begin_paragraph($self, $current);
+          }
+          # we do not merge these special types
           $last_element = undef;
         }
       }
-    }
-
-    $paragraph = _begin_paragraph($self, $current);
-    if ($paragraph) {
-      $current = $paragraph;
+    } else {
+      if (_begin_paragraph_p($self, $current)) {
+        $paragraph = _begin_paragraph($self, $current);
+        $current = $paragraph;
+      }
     }
   }
 
@@ -7436,8 +7456,9 @@ sub _process_remaining_on_line($$$$)
     }
 
     unless ($self->{'no_paragraph_commands'}->{$data_cmdname}) {
-      my $paragraph = _begin_paragraph($self, $current, $source_info);
-      $current = $paragraph if ($paragraph);
+      if (_begin_paragraph_p($self, $current)) {
+        $current = _begin_paragraph($self, $current);
+      }
     }
 
     my $command_element;
diff --git a/tp/Texinfo/XS/parsetexi/parser.c b/tp/Texinfo/XS/parsetexi/parser.c
index 4256e271e6..174a21415b 100644
--- a/tp/Texinfo/XS/parsetexi/parser.c
+++ b/tp/Texinfo/XS/parsetexi/parser.c
@@ -564,6 +564,8 @@ parse_texi_document (void)
 }
 
 
+/* If in a context where paragraphs are to be started, return 1,
+   else return 0 */
 static int
 begin_paragraph_p (const ELEMENT *current)
 {
@@ -582,64 +584,59 @@ begin_paragraph_p (const ELEMENT *current)
           && current->type != ET_brace_container);
 }
 
-/* If in a context where paragraphs are to be started, start a new
-   paragraph and return it.  Else return 0 */
+/* Start a new paragraph and return it */
 static ELEMENT *
 begin_paragraph (ELEMENT *current)
 {
-  if (begin_paragraph_p (current))
-    {
-      ELEMENT *e;
-      enum command_id indent = 0;
+  ELEMENT *e;
+  enum command_id indent = 0;
 
-      /* Check if an @indent precedes the paragraph (to record it
-         in the 'extra' key). */
-      if (current->e.c->contents.number > 0)
+  /* Check if an @indent precedes the paragraph (to record it
+     in the 'extra' key). */
+  if (current->e.c->contents.number > 0)
+    {
+      int i = current->e.c->contents.number - 1;
+      while (i >= 0)
         {
-          int i = current->e.c->contents.number - 1;
-          while (i >= 0)
+          ELEMENT *child = contents_child_by_index (current, i);
+          if (child->type == ET_empty_line
+              || child->type == ET_paragraph)
+            break;
+          if (type_data[child->type].flags & TF_at_command
+              && command_data(child->e.c->cmd).flags & CF_close_paragraph)
+            break;
+          /* after an indent there are ignorable_spaces_after_command
+             skip through spaces only text element that could be there */
+          if (type_data[child->type].flags & TF_text) {}
+          else if (child->e.c->cmd == CM_indent
+              || child->e.c->cmd == CM_noindent)
             {
-              ELEMENT *child = contents_child_by_index (current, i);
-              if (child->type == ET_empty_line
-                  || child->type == ET_paragraph)
-                break;
-              if (type_data[child->type].flags & TF_at_command
-                  && command_data(child->e.c->cmd).flags & CF_close_paragraph)
-                break;
-              /* after an indent there are ignorable_spaces_after_command
-                 skip through spaces only text element that could be there */
-              if (type_data[child->type].flags & TF_text) {}
-              else if (child->e.c->cmd == CM_indent
-                  || child->e.c->cmd == CM_noindent)
-                {
-                  indent = child->e.c->cmd;
-                  break;
-                }
-         /* skip through @macro definitions, raw block commands, ignored
-            conditional block commands, @author, informational commands,
-            commands meant for titlepage such as @vskip or @title, index
-            commands and types such as def_line (but cannot find an @*indent
-            before), a few brace commands that can be out of paragraphs and
-            do not close paragraphs such as @anchor or @image
-              else
-                fprintf(stderr, "INDENT search skipping through %s\n",
-                        print_element_debug_parser(child, 0));
-                */
-              i--;
+              indent = child->e.c->cmd;
+              break;
             }
+     /* skip through @macro definitions, raw block commands, ignored
+        conditional block commands, @author, informational commands,
+        commands meant for titlepage such as @vskip or @title, index
+        commands and types such as def_line (but cannot find an @*indent
+        before), a few brace commands that can be out of paragraphs and
+        do not close paragraphs such as @anchor or @image
+          else
+            fprintf(stderr, "INDENT search skipping through %s\n",
+                    print_element_debug_parser(child, 0));
+            */
+          i--;
         }
+    }
 
-      e = new_element (ET_paragraph);
-      if (indent)
-        e->flags |= (indent == CM_indent ? EF_indent : EF_noindent);
-      add_to_element_contents (current, e);
+  e = new_element (ET_paragraph);
+  if (indent)
+    e->flags |= (indent == CM_indent ? EF_indent : EF_noindent);
+  add_to_element_contents (current, e);
 
-      push_context (ct_paragraph, 0);
-      debug ("PARAGRAPH");
+  push_context (ct_paragraph, 0);
+  debug ("PARAGRAPH");
 
-      return e;
-    }
-  return 0;
+  return e;
 }
 
 /* Begin a preformatted element if in a preformatted context. */
@@ -794,7 +791,6 @@ merge_text (ELEMENT *current, const char *text, size_t 
len_text,
   /* Is there a non-whitespace character in the line? */
   if (leading_spaces < len_text)
     {
-      ELEMENT *paragraph;
       if ((last_elt_type == ET_empty_line
            || last_elt_type == ET_ignorable_spaces_after_command
            || last_elt_type == ET_internal_spaces_after_command
@@ -825,27 +821,45 @@ merge_text (ELEMENT *current, const char *text, size_t 
len_text,
               e = pop_element_from_contents (current);
               e->type = ET_normal_text;
 
-              paragraph = begin_paragraph (current);
-              if (paragraph)
+              if (begin_paragraph_p (current))
                 {
-                  current = paragraph;
+                  current = begin_paragraph (current);
                 }
               goto add_to_empty_text;
             }
 
-          /* since last_element cannot be empty as this case is
-             handled just above, the last_element is
-             always kept in current in do_abort_empty_line
-             for an empty_line; its type may change */
-          do_abort_empty_line (current, last_element);
-
-          if (last_elt_type != ET_empty_line)
-         /* we do not merge these special types, unset last_element */
-            last_element = 0;
+          /* following is similar to do_abort_empty_line, except
+             for the empty text already handled above, and with
+             paragraph opening mixed in */
+          if (last_elt_type == ET_internal_spaces_after_command
+              || last_elt_type == ET_internal_spaces_before_argument)
+            {
+              move_last_space_to_element (current);
+              /* we do not merge these special types */
+              goto new_text;
+            }
+          else if (last_elt_type == ET_empty_line)
+            {
+              if (begin_paragraph_p (current))
+                {
+                  last_element->type = ET_spaces_before_paragraph;
+                  current = begin_paragraph (current);
+                  goto new_text;
+                }
+              /* in that case, we can merge */
+              last_element->type = ET_normal_text;
+            }
+          else
+            {/* other special spaces, in general in paragraph begin context */
+              if (last_elt_type == ET_internal_spaces_before_context_argument)
+                move_last_space_to_element (current);
+              if (begin_paragraph_p (current))
+                current = begin_paragraph (current);
+              /* we do not merge these special types */
+              goto new_text;
+            }
         }
-
-      paragraph = begin_paragraph (current);
-      if (paragraph)
+      else if (begin_paragraph_p (current))
         {
           /* NOTE a new paragraph happens necessarily after a special
              space as handled just above, or after a no_paragraph
@@ -856,16 +870,15 @@ merge_text (ELEMENT *current, const char *text, size_t 
len_text,
                 command, we are already in a paragraph if a paragraph can
                 be opened.
             */
-          current = paragraph;
+          current = begin_paragraph (current);
           /* shortcut the case with text as last content child as
              it cannot happen if a new paragraph is started */
           goto new_text;
         }
     }
 
-  if (last_element
-      /* can actually be normal_text, and some space elements */
-      && type_data[last_elt_type].flags & TF_text
+  if (/* can actually be normal_text, and some space elements */
+      type_data[last_elt_type].flags & TF_text
       && !strchr (last_element->e.text->text, '\n'))
     {
       /* Transfer source marks */
@@ -2436,10 +2449,8 @@ process_remaining_on_line (ELEMENT **current_inout, 
const char **line_inout)
       /* check command doesn't start a paragraph */
       if (!(command_data(data_cmd).flags & CF_no_paragraph))
         {
-          ELEMENT *paragraph;
-          paragraph = begin_paragraph (current);
-          if (paragraph)
-            current = paragraph;
+          if (begin_paragraph_p (current))
+            current = begin_paragraph (current);
         }
 
       /* No-brace command */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]