texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: * tp/Texinfo/ParserNonXS.pm (_input_push_file, _e


From: Patrice Dumas
Subject: branch master updated: * tp/Texinfo/ParserNonXS.pm (_input_push_file, _end_line_misc_line), tp/Texinfo/XS/parsetexi/end_line.c (end_line_misc_line), tp/Texinfo/XS/parsetexi/input.c (next_text, input_push_file): keep the information on the full include file path with input info. Use the full input path in include file opening and closing errors.
Date: Tue, 21 Feb 2023 13:17:58 -0500

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 98f20859d6 * tp/Texinfo/ParserNonXS.pm (_input_push_file, 
_end_line_misc_line), tp/Texinfo/XS/parsetexi/end_line.c (end_line_misc_line), 
tp/Texinfo/XS/parsetexi/input.c (next_text, input_push_file): keep the 
information on the full include file path with input info. Use the full input 
path in include file opening and closing errors.
98f20859d6 is described below

commit 98f20859d6e7eb9302a8ceea0de2c5a978c1c96d
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Tue Feb 21 19:17:50 2023 +0100

    * tp/Texinfo/ParserNonXS.pm (_input_push_file, _end_line_misc_line),
    tp/Texinfo/XS/parsetexi/end_line.c (end_line_misc_line),
    tp/Texinfo/XS/parsetexi/input.c (next_text, input_push_file):
    keep the information on the full include file path with input info.
    Use the full input path in include file opening and closing errors.
    
    * tp/Texinfo/ParserNonXS.pm (_input_push_file): return the error
    message.  Update callers.
    
    * tp/Texinfo/ParserNonXS.pm (_new_text_input): die if opening the
    internal text handler fails.
    (_next_text): show the close error message.
---
 ChangeLog                          | 15 +++++++++
 tp/Texinfo/ParserNonXS.pm          | 66 +++++++++++++++++++++++++-------------
 tp/Texinfo/XS/parsetexi/end_line.c |  7 ++--
 tp/Texinfo/XS/parsetexi/input.c    | 31 ++++++++++++++----
 tp/Texinfo/XS/parsetexi/input.h    |  1 +
 5 files changed, 88 insertions(+), 32 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 8a6a4d0149..fec6346504 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2023-02-21  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/ParserNonXS.pm (_input_push_file, _end_line_misc_line),
+       tp/Texinfo/XS/parsetexi/end_line.c (end_line_misc_line),
+       tp/Texinfo/XS/parsetexi/input.c (next_text, input_push_file):
+       keep the information on the full include file path with input info.
+       Use the full input path in include file opening and closing errors.
+
+       * tp/Texinfo/ParserNonXS.pm (_input_push_file): return the error
+       message.  Update callers.
+
+       * tp/Texinfo/ParserNonXS.pm (_new_text_input): die if opening the
+       internal text handler fails.
+       (_next_text): show the close error message.
+
 2023-02-21  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/XS/xspara.c (xspara__add_next): check output of mbrtowc,
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index 4ee7d774a6..62aa1c6fc4 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -407,9 +407,6 @@ foreach my $no_close_preformatted('sp') {
 
 foreach my $block_command (keys(%block_commands)) {
   $begin_line_commands{$block_command} = 1;
-  # FIXME to close preformated or not to close?
-  #$close_preformatted_commands{$format_raw_command} = 1
-  #  if ($brace_commands{$format_raw_command}) eq 'format_raw');
 }
 
 # commands that may appear in commands containing plain text only
@@ -679,15 +676,15 @@ sub _new_text_input($$)
   my $input_source_info = shift;
 
   my $texthandle = do { local *FH };
-  # FIXME in-memory scalar strings are considered a stream of bytes, so need
-  # to encode/decode.  Is it a performance issue?  Do we care?
+  # In-memory scalar strings are considered a stream of bytes, so need
+  # to encode/decode.
   $text = Encode::encode("utf8", $text);
   # Could fail with error like
   # Strings with code points over 0xFF may not be mapped into in-memory file 
handles
   if (!open ($texthandle, '<', \$text)) {
     my $error_message = $!;
-    print STDERR "ERROR: open on a reference failed: $error_message\n";
-    #return undef, $error_message;
+    # Better die now than later reading on a closed filehandle.
+    die "BUG? open on a reference failed: $error_message\n";
   }
   return {'th' => $texthandle,
           'input_source_info' => $input_source_info};
@@ -793,7 +790,7 @@ sub _input_push_file
 
   my $filehandle = do { local *FH };
   if (!open($filehandle, $input_file_path)) {
-    return 0, undef, undef;
+    return 0, undef, undef, $!;
   }
 
   if (defined($self->{'info'}->{'input_perl_encoding'})) {
@@ -818,7 +815,8 @@ sub _input_push_file
           'line_nr' => 0,
           'macro' => '',
        },
-       'fh' => $filehandle
+       'fh' => $filehandle,
+       'input_file_path' => $input_file_path,
     };
 
   $file_input->{'file_name_encoding'} = $file_name_encoding
@@ -826,7 +824,7 @@ sub _input_push_file
   $self->{'input'} = [] if (!defined($self->{'input'}));
   unshift @{$self->{'input'}}, $file_input;
 
-  return 1, $file_name, $directories;
+  return 1, $file_name, $directories, undef;
 }
 
 # parse a texi file
@@ -835,7 +833,7 @@ sub parse_texi_file($$)
 {
   my ($self, $input_file_path) = @_;
 
-  my ($status, $file_name, $directories)
+  my ($status, $file_name, $directories, $error_message)
     = _input_push_file($self, $input_file_path);
   if (!$status) {
     my $input_file_name = $input_file_path;
@@ -845,7 +843,7 @@ sub parse_texi_file($$)
     }
     $self->{'registrar'}->document_error($self,
                  sprintf(__("could not open %s: %s"),
-                                  $input_file_name, $!));
+                                  $input_file_name, $error_message));
     return undef;
   }
 
@@ -2335,6 +2333,7 @@ sub _next_text($;$)
         }
         # DEL as comment character
         if ($line =~ s/\x{7F}(.*\s*)//) {
+          # push empty text to place a source mark
           _input_push_text($self, '',
                            $input->{'input_source_info'}->{'line_nr'});
           my $delcomment_source_mark = {'sourcemark_type' => 'delcomment'};
@@ -2345,11 +2344,22 @@ sub _next_text($;$)
         $input->{'input_source_info'}->{'line_nr'}++;
         return ($line, { %{$input->{'input_source_info'}} });
       }
+    } else {
+      # TODO currently possible if called for lines after @bye but
+      # there is nothing anymore
+      #$self->_bug_message("Unexpected input $input ("
+      #                                .scalar(@{$self->{'input'}}).")",
+      #                    $input->{'input_source_info'}, $current);
+      #Texinfo::Common::debug_hash($input);
     }
     # Top input source failed.  Close, pop, and try the next one.
     if (exists($input->{'th'})) {
       # End of text reached.
-      # FIXME close $input->{'th'} explicitly?
+      if (!close($input->{'th'})) {
+        my $error_message = $!;
+        warn "BUG? close text reference failed: $error_message\n";
+      }
+      delete $input->{'th'};
       if ($input->{'input_source_info'}->{'macro'} ne '') {
         my $top_macro = shift @{$self->{'macro_stack'}};
         print STDERR "SHIFT MACRO_STACK(@{$self->{'macro_stack'}}):"
@@ -2365,20 +2375,25 @@ sub _next_text($;$)
     } elsif ($input->{'fh'}
              and $input->{'input_source_info'}->{'file_name'} ne '-') {
       if (!close($input->{'fh'})) {
-        # need to decode for error message
+        # decode for the message, to have character strings in perl
+        # that will be encoded on output to the locale encoding.
+        # Done differently for the file names in source_info
+        # which are byte strings and end up unmodified in output error
+        # messages.
         my $file_name_encoding;
         if (defined($input->{'file_name_encoding'})) {
           $file_name_encoding = $input->{'file_name_encoding'};
         } else {
           $file_name_encoding = $self->get_conf('COMMAND_LINE_ENCODING');
         }
-        my $file_name = $input->{'input_source_info'}->{'file_name'};
+        my $decoded_file_name = $input->{'input_file_path'};
         if (defined($file_name_encoding)) {
-          $file_name = decode($file_name_encoding, $file_name);
+          $decoded_file_name = decode($file_name_encoding,
+                                      $input->{'input_file_path'});
         }
         $self->{'registrar'}->document_warn($self,
                              sprintf(__("error on closing %s: %s"),
-                                     $file_name, $!));
+                                     $decoded_file_name, $!));
       }
       delete $input->{'fh'};
     }
@@ -2412,6 +2427,8 @@ sub _next_text($;$)
     # source_info, even when nothing is returned and the first input
     # file is closed.
     if (scalar(@{$self->{'input'}}) == 1) {
+      print STDERR "INPUT FINISHED\n" if ($self->{'DEBUG'});
+      #cluck();
       return (undef, { %{$input->{'input_source_info'}} });
     } else {
       shift @{$self->{'input'}};
@@ -3387,12 +3404,12 @@ sub _end_line_misc_line($$$)
         # @-command effects are ignored, an error message is issued below.
       } elsif ($command eq 'include') {
         # We want Perl binary strings representing sequences of bytes,
-        # not character strings of codepoints in the internal perl encoding.
+        # not character strings in the internal perl encoding.
         my ($file_path, $file_name_encoding) = _encode_file_name($self, $text);
         my $included_file_path
              = Texinfo::Common::locate_include_file($self, $file_path);
         if (defined($included_file_path)) {
-          my ($status, $file_name, $directories)
+          my ($status, $file_name, $directories, $error_message)
              = _input_push_file($self, $included_file_path, 
$file_name_encoding);
           if ($status) {
             $included_file = 1;
@@ -3401,12 +3418,11 @@ sub _end_line_misc_line($$$)
                                     'status' => 'start'};
             $self->{'input'}->[0]->{'input_source_mark'} = 
$include_source_mark;
           } else {
-            # FIXME $text does not show the include directory.  Using
-            # $included_file_path would require decoding to character string
-            # using $file_name_encoding
+            my $decoded_file_path
+                = Encode::decode($file_name_encoding, $included_file_path);
             $self->_command_error($current, $source_info,
                             __("\@%s: could not open %s: %s"),
-                            $command, $text, $!);
+                            $command, $decoded_file_path, $error_message);
           }
         } else {
           $self->_command_error($current, $source_info,
@@ -6692,6 +6708,8 @@ sub _parse_texi($$$)
       # can happen if there is macro expansion at the end of a text fragment.
       # Not sure that it can happen otherwise.
       if (! defined($line)) {
+        print STDERR "END LINE in line loop STILL_MORE_TO_PROCESS\n"
+                                                 if ($self->{'DEBUG'});
         $current = _end_line($self, $current, $source_info);
         last;
       }
@@ -6717,6 +6735,8 @@ sub _parse_texi($$$)
            .join('|', @context_stack)));
   }
 
+  # TODO only if $line is not undef and $status == $FINISHED_TOTALLY?
+
   # Gather text after @bye
   my $element_after_bye = {'type' => 'postamble_after_end', 'contents' => [],
                            'parent' => $current};
diff --git a/tp/Texinfo/XS/parsetexi/end_line.c 
b/tp/Texinfo/XS/parsetexi/end_line.c
index aa98111c24..2216da4f58 100644
--- a/tp/Texinfo/XS/parsetexi/end_line.c
+++ b/tp/Texinfo/XS/parsetexi/end_line.c
@@ -1568,10 +1568,13 @@ end_line_misc_line (ELEMENT *current)
                   status = input_push_file (fullpath);
                   if (status)
                     {
+                      char *decoded_file_path
+                         = convert_to_utf8 (strdup(fullpath));
                       command_error (current,
-                                     "@include: could not open %s:",
-                                     text,
+                                     "@include: could not open %s: %s",
+                                     decoded_file_path,
                                      strerror (status));
+                      free (decoded_file_path);
                     }
                   else
                     {
diff --git a/tp/Texinfo/XS/parsetexi/input.c b/tp/Texinfo/XS/parsetexi/input.c
index d873d1efdc..d2afdaf1d4 100644
--- a/tp/Texinfo/XS/parsetexi/input.c
+++ b/tp/Texinfo/XS/parsetexi/input.c
@@ -45,6 +45,7 @@ typedef struct {
 
     FILE *file;
     SOURCE_INFO source_info;
+    char *input_file_path; /* for IN_file type, the full input file path */
 
     char *text;  /* Input text to be parsed as Texinfo. */
     char *ptext; /* How far we are through 'text'.  Used to split 'text'
@@ -213,12 +214,14 @@ encode_with_iconv (iconv_t our_iconv,  char *s)
     }
 
   t.text[t.end] = '\0';
+  /* FIXME freeing t.text leads to invalid memory access in the loop
+     above, both in the case E2BIG and in text_buffer_iconv */
   return strdup (t.text);
 }
 
 /* Return conversion of S according to input_encoding.  This function
    frees S. */
-static char *
+char *
 convert_to_utf8 (char *s)
 {
   iconv_t our_iconv = (iconv_t) -1;
@@ -490,9 +493,20 @@ next_text (ELEMENT *current)
           if (file != stdin)
             {
               if (fclose (input->file) == EOF)
-                fprintf (stderr, "error on closing %s: %s",
-                        input->source_info.file_name,
-                        strerror (errno));
+                {
+          /* convert to UTF-8 for the messages, to have character strings in 
perl
+             that will be encoded on output to the locale encoding.
+             Done differently for the file names in source_info
+             which are byte strings and end up unmodified in output error
+             messages.
+          */
+                  char *decoded_file_name
+                          = convert_to_utf8 (strdup(input->input_file_path));
+                  line_warn ("error on closing %s: %s",
+                             decoded_file_name,
+                             strerror (errno));
+                  free (decoded_file_name);
+                }
             }
         }
       else
@@ -550,6 +564,7 @@ input_push_text (char *text, int line_number, char *macro, 
char *value_flag)
 
   input_stack[input_number].type = IN_text;
   input_stack[input_number].file = 0;
+  input_stack[input_number].input_file_path = 0;
   input_stack[input_number].text = text;
   input_stack[input_number].ptext = text;
 
@@ -715,6 +730,7 @@ input_push_file (char *filename)
 {
   FILE *stream = 0;
   char *p, *q;
+  char *base_filename;
 
   if (!strcmp (filename, "-"))
     stream = stdin;
@@ -741,13 +757,14 @@ input_push_file (char *filename)
       q = strchr (q + 1, '/');
     }
   if (p)
-    filename = save_string (p+1);
+    base_filename = save_string (p+1);
   else
-    filename = save_string (filename);
+    base_filename = save_string (filename);
 
   input_stack[input_number].type = IN_file;
   input_stack[input_number].file = stream;
-  input_stack[input_number].source_info.file_name = filename;
+  input_stack[input_number].input_file_path = filename;
+  input_stack[input_number].source_info.file_name = base_filename;
   input_stack[input_number].source_info.line_nr = 0;
   input_stack[input_number].source_info.macro = 0;
   input_stack[input_number].input_source_mark = 0;
diff --git a/tp/Texinfo/XS/parsetexi/input.h b/tp/Texinfo/XS/parsetexi/input.h
index 311a9179e4..c56a900c86 100644
--- a/tp/Texinfo/XS/parsetexi/input.h
+++ b/tp/Texinfo/XS/parsetexi/input.h
@@ -20,6 +20,7 @@ int top_file_index (void);
 
 char *locate_include_file (char *filename);
 char *encode_file_name (char *filename);
+char *convert_to_utf8 (char *s);
 void set_input_encoding (char *encoding);
 void add_include_directory (char *filename);
 void clear_include_directories (void);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]