[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Sun, 29 Sep 2024 08:24:40 -0400 (EDT) |
branch: master
commit 2895e12c78a9ce6d27c64186e4f9befeb65efdb5
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sun Jun 23 20:08:23 2024 +0200
* tp/Texinfo/ParserNonXS.pm (_begin_paragraph_p, _begin_paragraph),
tp/Texinfo/XS/parsetexi/parser.c (begin_paragraph): add
_begin_paragraph_p in Perl code. Do not call begin_paragraph_p in
begin_paragraph, instead let the caller call begin_paragraph is
begin_paragraph_p returns true. Update callers.
* tp/Texinfo/ParserNonXS.pm (_merge_text): rename
$last_element_type as $last_elt_type.
* tp/Texinfo/ParserNonXS.pm (_merge_text),
tp/Texinfo/XS/parsetexi/parser.c (merge_text): instead of calling
*abort_empty_line, inline the corresponding code, mixing with
paragraph opening, and without the case of last_element text empty,
already handled.
---
ChangeLog | 17 +++++
tp/Texinfo/ParserNonXS.pm | 145 +++++++++++++++++++++----------------
tp/Texinfo/XS/parsetexi/parser.c | 153 +++++++++++++++++++++------------------
3 files changed, 182 insertions(+), 133 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 3d9dc6ade4..6a8a12aea2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2024-06-23 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/ParserNonXS.pm (_begin_paragraph_p, _begin_paragraph),
+ tp/Texinfo/XS/parsetexi/parser.c (begin_paragraph): add
+ _begin_paragraph_p in Perl code. Do not call begin_paragraph_p in
+ begin_paragraph, instead let the caller call begin_paragraph is
+ begin_paragraph_p returns true. Update callers.
+
+ * tp/Texinfo/ParserNonXS.pm (_merge_text): rename
+ $last_element_type as $last_elt_type.
+
+ * tp/Texinfo/ParserNonXS.pm (_merge_text),
+ tp/Texinfo/XS/parsetexi/parser.c (merge_text): instead of calling
+ *abort_empty_line, inline the corresponding code, mixing with
+ paragraph opening, and without the case of last_element text empty,
+ already handled.
+
2024-06-23 Patrice Dumas <pertusus@free.fr>
* tp/Makefile.tres, tp/t/03coverage_braces.t
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index f758a42e81..d6a041ff9e 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -1358,50 +1358,54 @@ sub _parse_macro_command_line($$$$$;$)
return $macro;
}
-# start a paragraph if in a context where paragraphs are to be started.
-sub _begin_paragraph($$;$)
+# return true if in a context where paragraphs are to be started.
+sub _begin_paragraph_p($$)
{
- my ($self, $current, $source_info) = @_;
-
# we want to avoid
# brace_container, brace_arg, root_line (ct_line),
# paragraphs (ct_paragraph), line_arg (ct_line, ct_def), balanced_braces
# (only in ct_math, ct_rawpreformatted, ct_inlineraw), block_line_arg
# (ct_line, ct_def), preformatted (ct_preformatted).
- if ($begin_paragraph_contexts{$self->_top_context()}
- and not ($current->{'type'}
- and $type_without_paragraph{$current->{'type'}})) {
- # find whether an @indent precedes the paragraph
- my $indent;
- if ($current->{'contents'}) {
- my $index = scalar(@{$current->{'contents'}}) -1;
- while ($index >= 0
- and !($current->{'contents'}->[$index]->{'type'}
- and ($current->{'contents'}->[$index]->{'type'} eq 'empty_line'
- or $current->{'contents'}->[$index]->{'type'} eq
'paragraph'))
- and !($current->{'contents'}->[$index]->{'cmdname'}
- and $close_paragraph_commands
- {$current->{'contents'}->[$index]->{'cmdname'}})) {
- if ($current->{'contents'}->[$index]->{'cmdname'}
- and ($current->{'contents'}->[$index]->{'cmdname'} eq 'indent'
- or $current->{'contents'}->[$index]->{'cmdname'} eq 'noindent'))
{
- $indent = $current->{'contents'}->[$index]->{'cmdname'};
- last;
- }
- $index--;
+ my ($self, $current) = @_;
+ return ($begin_paragraph_contexts{$self->_top_context()}
+ and not ($current->{'type'}
+ and $type_without_paragraph{$current->{'type'}}));
+}
+
+# start a paragraph.
+sub _begin_paragraph($$)
+{
+ my ($self, $current) = @_;
+
+ # find whether an @indent precedes the paragraph
+ my $indent;
+ if ($current->{'contents'}) {
+ my $index = scalar(@{$current->{'contents'}}) -1;
+ while ($index >= 0
+ and !($current->{'contents'}->[$index]->{'type'}
+ and ($current->{'contents'}->[$index]->{'type'} eq 'empty_line'
+ or $current->{'contents'}->[$index]->{'type'} eq 'paragraph'))
+ and !($current->{'contents'}->[$index]->{'cmdname'}
+ and $close_paragraph_commands
+ {$current->{'contents'}->[$index]->{'cmdname'}})) {
+ if ($current->{'contents'}->[$index]->{'cmdname'}
+ and ($current->{'contents'}->[$index]->{'cmdname'} eq 'indent'
+ or $current->{'contents'}->[$index]->{'cmdname'} eq 'noindent')) {
+ $indent = $current->{'contents'}->[$index]->{'cmdname'};
+ last;
}
+ $index--;
}
- push @{$current->{'contents'}},
- { 'type' => 'paragraph', 'parent' => $current };
- $current = $current->{'contents'}->[-1];
- if ($indent) {
- $current->{'extra'} = {$indent => 1};
- }
- $self->_push_context('ct_paragraph', undef);
- print STDERR "PARAGRAPH\n" if ($self->{'conf'}->{'DEBUG'});
- return $current;
}
- return 0;
+ push @{$current->{'contents'}},
+ { 'type' => 'paragraph', 'parent' => $current };
+ $current = $current->{'contents'}->[-1];
+ if ($indent) {
+ $current->{'extra'} = {$indent => 1};
+ }
+ $self->_push_context('ct_paragraph', undef);
+ print STDERR "PARAGRAPH\n" if ($self->{'conf'}->{'DEBUG'});
+ return $current;
}
sub _begin_preformatted($$)
@@ -2247,24 +2251,23 @@ sub _merge_text {
my $paragraph;
- my $no_merge_with_following_text = 0;
if ($text =~ /\S/) {
my $leading_spaces;
if ($text =~ /^(\s+)/) {
$leading_spaces = $1;
}
if ($last_element->{'type'}) {
- my $last_element_type = $last_element->{'type'};
- if ($last_element_type eq 'empty_line'
- or $last_element_type eq 'ignorable_spaces_after_command'
- or $last_element_type eq 'internal_spaces_after_command'
- or $last_element_type eq 'internal_spaces_before_argument'
- or $last_element_type eq 'internal_spaces_before_context_argument'
- or $last_element_type eq 'spaces_after_close_brace') {
+ my $last_elt_type = $last_element->{'type'};
+ if ($last_elt_type eq 'empty_line'
+ or $last_elt_type eq 'ignorable_spaces_after_command'
+ or $last_elt_type eq 'internal_spaces_after_command'
+ or $last_elt_type eq 'internal_spaces_before_argument'
+ or $last_elt_type eq 'internal_spaces_before_context_argument'
+ or $last_elt_type eq 'spaces_after_close_brace') {
if ($leading_spaces) {
print STDERR "MERGE_TEXT ADD leading empty |$leading_spaces|"
- ." to $last_element_type\n"
+ ." to $last_elt_type\n"
if ($self->{'conf'}->{'DEBUG'});
$last_element->{'text'} .= $leading_spaces;
@@ -2277,9 +2280,8 @@ sub _merge_text {
my $popped_element = _pop_element_from_contents($self, $current);
delete $popped_element->{'type'};
$popped_element->{'text'} = $text;
- $paragraph = _begin_paragraph($self, $current);
- if ($paragraph) {
- $current = $paragraph;
+ if (_begin_paragraph_p($self, $current)) {
+ $current = _begin_paragraph($self, $current);
}
# do not jump with a goto as in C, as it is not possible
# in Perl to use a goto to go further than the calling scope
@@ -2292,22 +2294,40 @@ sub _merge_text {
return $current;
}
- # since last_element cannot be empty as this case is
- # handled just above, the last_element is
- # always kept in current in _abort_empty_line
- # for an empty_line; its type may have changed
- _abort_empty_line($self, $current);
-
- if ($last_element_type ne 'empty_line') {
- # we do not merge these special types, unset last_element
+ # following is similar to _abort_empty_line, except
+ # for the empty text already handled above, and with
+ # paragraph opening mixed in
+ if ($last_elt_type eq 'internal_spaces_after_command'
+ or $last_elt_type eq 'internal_spaces_before_argument') {
+ _move_last_space_to_element($self, $current);
+ # we do not merge these special types
+ $last_element = undef;
+ } elsif ($last_elt_type eq 'empty_line') {
+ if (_begin_paragraph_p($self, $current)) {
+ $last_element->{'type'} = 'spaces_before_paragraph';
+ $paragraph = _begin_paragraph($self, $current);
+ $current = $paragraph;
+ } else {
+ # in that case, we can merge
+ delete $last_element->{'type'};
+ }
+ } else {
+ # other special spaces, in general in paragraph begin context
+ if ($last_elt_type eq 'internal_spaces_before_context_argument') {
+ _move_last_space_to_element($self, $current);
+ }
+ if (_begin_paragraph_p($self, $current)) {
+ $current = _begin_paragraph($self, $current);
+ }
+ # we do not merge these special types
$last_element = undef;
}
}
- }
-
- $paragraph = _begin_paragraph($self, $current);
- if ($paragraph) {
- $current = $paragraph;
+ } else {
+ if (_begin_paragraph_p($self, $current)) {
+ $paragraph = _begin_paragraph($self, $current);
+ $current = $paragraph;
+ }
}
}
@@ -7436,8 +7456,9 @@ sub _process_remaining_on_line($$$$)
}
unless ($self->{'no_paragraph_commands'}->{$data_cmdname}) {
- my $paragraph = _begin_paragraph($self, $current, $source_info);
- $current = $paragraph if ($paragraph);
+ if (_begin_paragraph_p($self, $current)) {
+ $current = _begin_paragraph($self, $current);
+ }
}
my $command_element;
diff --git a/tp/Texinfo/XS/parsetexi/parser.c b/tp/Texinfo/XS/parsetexi/parser.c
index 4256e271e6..174a21415b 100644
--- a/tp/Texinfo/XS/parsetexi/parser.c
+++ b/tp/Texinfo/XS/parsetexi/parser.c
@@ -564,6 +564,8 @@ parse_texi_document (void)
}
+/* If in a context where paragraphs are to be started, return 1,
+ else return 0 */
static int
begin_paragraph_p (const ELEMENT *current)
{
@@ -582,64 +584,59 @@ begin_paragraph_p (const ELEMENT *current)
&& current->type != ET_brace_container);
}
-/* If in a context where paragraphs are to be started, start a new
- paragraph and return it. Else return 0 */
+/* Start a new paragraph and return it */
static ELEMENT *
begin_paragraph (ELEMENT *current)
{
- if (begin_paragraph_p (current))
- {
- ELEMENT *e;
- enum command_id indent = 0;
+ ELEMENT *e;
+ enum command_id indent = 0;
- /* Check if an @indent precedes the paragraph (to record it
- in the 'extra' key). */
- if (current->e.c->contents.number > 0)
+ /* Check if an @indent precedes the paragraph (to record it
+ in the 'extra' key). */
+ if (current->e.c->contents.number > 0)
+ {
+ int i = current->e.c->contents.number - 1;
+ while (i >= 0)
{
- int i = current->e.c->contents.number - 1;
- while (i >= 0)
+ ELEMENT *child = contents_child_by_index (current, i);
+ if (child->type == ET_empty_line
+ || child->type == ET_paragraph)
+ break;
+ if (type_data[child->type].flags & TF_at_command
+ && command_data(child->e.c->cmd).flags & CF_close_paragraph)
+ break;
+ /* after an indent there are ignorable_spaces_after_command
+ skip through spaces only text element that could be there */
+ if (type_data[child->type].flags & TF_text) {}
+ else if (child->e.c->cmd == CM_indent
+ || child->e.c->cmd == CM_noindent)
{
- ELEMENT *child = contents_child_by_index (current, i);
- if (child->type == ET_empty_line
- || child->type == ET_paragraph)
- break;
- if (type_data[child->type].flags & TF_at_command
- && command_data(child->e.c->cmd).flags & CF_close_paragraph)
- break;
- /* after an indent there are ignorable_spaces_after_command
- skip through spaces only text element that could be there */
- if (type_data[child->type].flags & TF_text) {}
- else if (child->e.c->cmd == CM_indent
- || child->e.c->cmd == CM_noindent)
- {
- indent = child->e.c->cmd;
- break;
- }
- /* skip through @macro definitions, raw block commands, ignored
- conditional block commands, @author, informational commands,
- commands meant for titlepage such as @vskip or @title, index
- commands and types such as def_line (but cannot find an @*indent
- before), a few brace commands that can be out of paragraphs and
- do not close paragraphs such as @anchor or @image
- else
- fprintf(stderr, "INDENT search skipping through %s\n",
- print_element_debug_parser(child, 0));
- */
- i--;
+ indent = child->e.c->cmd;
+ break;
}
+ /* skip through @macro definitions, raw block commands, ignored
+ conditional block commands, @author, informational commands,
+ commands meant for titlepage such as @vskip or @title, index
+ commands and types such as def_line (but cannot find an @*indent
+ before), a few brace commands that can be out of paragraphs and
+ do not close paragraphs such as @anchor or @image
+ else
+ fprintf(stderr, "INDENT search skipping through %s\n",
+ print_element_debug_parser(child, 0));
+ */
+ i--;
}
+ }
- e = new_element (ET_paragraph);
- if (indent)
- e->flags |= (indent == CM_indent ? EF_indent : EF_noindent);
- add_to_element_contents (current, e);
+ e = new_element (ET_paragraph);
+ if (indent)
+ e->flags |= (indent == CM_indent ? EF_indent : EF_noindent);
+ add_to_element_contents (current, e);
- push_context (ct_paragraph, 0);
- debug ("PARAGRAPH");
+ push_context (ct_paragraph, 0);
+ debug ("PARAGRAPH");
- return e;
- }
- return 0;
+ return e;
}
/* Begin a preformatted element if in a preformatted context. */
@@ -794,7 +791,6 @@ merge_text (ELEMENT *current, const char *text, size_t
len_text,
/* Is there a non-whitespace character in the line? */
if (leading_spaces < len_text)
{
- ELEMENT *paragraph;
if ((last_elt_type == ET_empty_line
|| last_elt_type == ET_ignorable_spaces_after_command
|| last_elt_type == ET_internal_spaces_after_command
@@ -825,27 +821,45 @@ merge_text (ELEMENT *current, const char *text, size_t
len_text,
e = pop_element_from_contents (current);
e->type = ET_normal_text;
- paragraph = begin_paragraph (current);
- if (paragraph)
+ if (begin_paragraph_p (current))
{
- current = paragraph;
+ current = begin_paragraph (current);
}
goto add_to_empty_text;
}
- /* since last_element cannot be empty as this case is
- handled just above, the last_element is
- always kept in current in do_abort_empty_line
- for an empty_line; its type may change */
- do_abort_empty_line (current, last_element);
-
- if (last_elt_type != ET_empty_line)
- /* we do not merge these special types, unset last_element */
- last_element = 0;
+ /* following is similar to do_abort_empty_line, except
+ for the empty text already handled above, and with
+ paragraph opening mixed in */
+ if (last_elt_type == ET_internal_spaces_after_command
+ || last_elt_type == ET_internal_spaces_before_argument)
+ {
+ move_last_space_to_element (current);
+ /* we do not merge these special types */
+ goto new_text;
+ }
+ else if (last_elt_type == ET_empty_line)
+ {
+ if (begin_paragraph_p (current))
+ {
+ last_element->type = ET_spaces_before_paragraph;
+ current = begin_paragraph (current);
+ goto new_text;
+ }
+ /* in that case, we can merge */
+ last_element->type = ET_normal_text;
+ }
+ else
+ {/* other special spaces, in general in paragraph begin context */
+ if (last_elt_type == ET_internal_spaces_before_context_argument)
+ move_last_space_to_element (current);
+ if (begin_paragraph_p (current))
+ current = begin_paragraph (current);
+ /* we do not merge these special types */
+ goto new_text;
+ }
}
-
- paragraph = begin_paragraph (current);
- if (paragraph)
+ else if (begin_paragraph_p (current))
{
/* NOTE a new paragraph happens necessarily after a special
space as handled just above, or after a no_paragraph
@@ -856,16 +870,15 @@ merge_text (ELEMENT *current, const char *text, size_t
len_text,
command, we are already in a paragraph if a paragraph can
be opened.
*/
- current = paragraph;
+ current = begin_paragraph (current);
/* shortcut the case with text as last content child as
it cannot happen if a new paragraph is started */
goto new_text;
}
}
- if (last_element
- /* can actually be normal_text, and some space elements */
- && type_data[last_elt_type].flags & TF_text
+ if (/* can actually be normal_text, and some space elements */
+ type_data[last_elt_type].flags & TF_text
&& !strchr (last_element->e.text->text, '\n'))
{
/* Transfer source marks */
@@ -2436,10 +2449,8 @@ process_remaining_on_line (ELEMENT **current_inout,
const char **line_inout)
/* check command doesn't start a paragraph */
if (!(command_data(data_cmd).flags & CF_no_paragraph))
{
- ELEMENT *paragraph;
- paragraph = begin_paragraph (current);
- if (paragraph)
- current = paragraph;
+ if (begin_paragraph_p (current))
+ current = begin_paragraph (current);
}
/* No-brace command */