texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: * tp/Texinfo/XS/parsetexi/end_line.c (isascii_alp


From: Patrice Dumas
Subject: branch master updated: * tp/Texinfo/XS/parsetexi/end_line.c (isascii_alpha) (parse_line_command_args, end_line_starting_block) (end_line_misc_line), tp/Texinfo/XS/parsetexi/handle_commands.c (parse_rawline_command), tp/Texinfo/XS/parsetexi/macro.c (parse_macro_command_line), tp/Texinfo/XS/parsetexi/parser.c (isascii_alnum, read_command_name, read_flag_name): add isascii_alpha to replace isalpha and isascii_alnum to replace isalnum to make sure that the character is also ascii, both because we want to select asci [...]
Date: Sat, 22 Jul 2023 11:21:05 -0400

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new dfedd47f0c * tp/Texinfo/XS/parsetexi/end_line.c (isascii_alpha) 
(parse_line_command_args, end_line_starting_block) (end_line_misc_line), 
tp/Texinfo/XS/parsetexi/handle_commands.c (parse_rawline_command), 
tp/Texinfo/XS/parsetexi/macro.c (parse_macro_command_line), 
tp/Texinfo/XS/parsetexi/parser.c (isascii_alnum, read_command_name, 
read_flag_name): add isascii_alpha to replace isalpha and isascii_alnum to 
replace isalnum to make sure that the character is also ascii, both because w 
[...]
dfedd47f0c is described below

commit dfedd47f0cc1a597da68d65356b6ddde34db60f7
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sat Jul 22 17:20:54 2023 +0200

    * tp/Texinfo/XS/parsetexi/end_line.c (isascii_alpha)
    (parse_line_command_args, end_line_starting_block)
    (end_line_misc_line), tp/Texinfo/XS/parsetexi/handle_commands.c
    (parse_rawline_command), tp/Texinfo/XS/parsetexi/macro.c
    (parse_macro_command_line), tp/Texinfo/XS/parsetexi/parser.c
    (isascii_alnum, read_command_name, read_flag_name): add isascii_alpha
    to replace isalpha and isascii_alnum to replace isalnum to make sure
    that the character is also ascii, both because we want to select ascii
    characters only, and because we do not want to depend on the locale.
    
    * tp/Texinfo/XS/parsetexi/end_line.c (end_line_misc_line): replace
    uses of isspace by other str* functions applied on whitespace_chars
    to avoid being dependent on the locale as isspace is.
    
    * tp/Texinfo/ParserNonXS.pm, tp/Texinfo/XS/parsetexi/end_line.c,
    tp/Texinfo/XS/parsetexi/parser.c: use more consistently \d instead
    of [0-9] or [[:digit:]] in the perl parser, and use consistently
    digit_chars in the XS parser for a more consistent code and easier
    comparison between the two codes.
---
 ChangeLog                                 | 22 +++++++++++++
 tp/Texinfo/ParserNonXS.pm                 | 12 +++----
 tp/Texinfo/XS/parsetexi/end_line.c        | 52 +++++++++++++++++--------------
 tp/Texinfo/XS/parsetexi/handle_commands.c |  2 +-
 tp/Texinfo/XS/parsetexi/macro.c           |  2 +-
 tp/Texinfo/XS/parsetexi/parser.c          | 16 +++++++---
 tp/Texinfo/XS/parsetexi/parser.h          |  1 +
 7 files changed, 70 insertions(+), 37 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0afc65560c..afbd90f798 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+2023-07-22  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/XS/parsetexi/end_line.c (isascii_alpha)
+       (parse_line_command_args, end_line_starting_block)
+       (end_line_misc_line), tp/Texinfo/XS/parsetexi/handle_commands.c
+       (parse_rawline_command), tp/Texinfo/XS/parsetexi/macro.c
+       (parse_macro_command_line), tp/Texinfo/XS/parsetexi/parser.c
+       (isascii_alnum, read_command_name, read_flag_name): add isascii_alpha
+       to replace isalpha and isascii_alnum to replace isalnum to make sure
+       that the character is also ascii, both because we want to select ascii
+       characters only, and because we do not want to depend on the locale.
+
+       * tp/Texinfo/XS/parsetexi/end_line.c (end_line_misc_line): replace
+       uses of isspace by other str* functions applied on whitespace_chars
+       to avoid being dependent on the locale as isspace is.
+
+       * tp/Texinfo/ParserNonXS.pm, tp/Texinfo/XS/parsetexi/end_line.c,
+       tp/Texinfo/XS/parsetexi/parser.c: use more consistently \d instead
+       of [0-9] or [[:digit:]] in the perl parser, and use consistently
+       digit_chars in the XS parser for a more consistent code and easier
+       comparison between the two codes.
+
 2023-07-22  Patrice Dumas  <pertusus@free.fr>
 
        Same parsing of @macro line for both parsers
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index a51e4eee20..5f6a135dff 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -4028,7 +4028,7 @@ sub _end_line_starting_block($$$)
         }
         my $arg = $current->{'args'}->[0]->{'contents'}->[0];
         if (!defined($arg->{'text'})
-            or $arg->{'text'} !~ /^(([[:digit:]]+)|([[:alpha:]]))$/) {
+            or $arg->{'text'} !~ /^((\d+)|([[:alpha:]]))$/) {
           $self->_command_error($current, $source_info,
                       __("bad argument to \@%s"), $command);
         } else {
@@ -7547,7 +7547,7 @@ sub _parse_line_command_args($$$)
       }
     }
   } elsif ($command eq 'sp') {
-    if ($line =~ /^([0-9]+)$/) {
+    if ($line =~ /^(\d+)$/) {
       $args = [$1];
     } else {
       $self->_line_error(sprintf(__("\@sp arg must be numeric, not `%s'"),
@@ -7692,8 +7692,8 @@ sub _parse_line_command_args($$$)
                                  $line), $source_info);
     }
   } elsif ($command eq 'need') { # only a warning
-    if (($line =~ /^([0-9]+(\.[0-9]*)?)$/) or
-             ($line =~ /^(\.[0-9]+)$/)) {
+    if (($line =~ /^(\d+(\.\d*)?)$/) or
+             ($line =~ /^(\.\d+)$/)) {
       $args = [$1];
     } else {
       $self->_line_error(sprintf(__("bad argument to \@need: %s"),
@@ -7702,7 +7702,7 @@ sub _parse_line_command_args($$$)
   } elsif ($command eq 'paragraphindent') {
     if ($line =~ /^([\w\-]+)$/) {
       my $value = $1;
-      if ($value =~ /^([0-9]+)$/ or $value eq 'none' or $value eq 'asis') {
+      if ($value =~ /^(\d+)$/ or $value eq 'none' or $value eq 'asis') {
         $args = [$1];
       } else {
         $self->_line_error(sprintf(__(
@@ -7723,7 +7723,7 @@ sub _parse_line_command_args($$$)
                                            $line), $source_info);
     }
   } elsif ($command eq 'exampleindent') {
-    if ($line =~ /^([0-9]+)$/) {
+    if ($line =~ /^(\d+)$/) {
       $args = [$1];
     } elsif ($line =~ /^(asis)$/) {
       $args = [$1];
diff --git a/tp/Texinfo/XS/parsetexi/end_line.c 
b/tp/Texinfo/XS/parsetexi/end_line.c
index d4c1afa236..8f360d3685 100644
--- a/tp/Texinfo/XS/parsetexi/end_line.c
+++ b/tp/Texinfo/XS/parsetexi/end_line.c
@@ -31,6 +31,12 @@
 #include "source_marks.h"
 #include "handle_commands.h"
 
+static int
+isascii_alpha (int c)
+{
+  return (((c & ~0x7f) == 0) && isalpha(c));
+}
+
 static int
 is_decimal_number (char *string)
 {
@@ -124,7 +130,7 @@ parse_line_command_args (ELEMENT *line_command)
         line++;
         line += strspn (line, whitespace_chars);
 
-        if (!isalnum (*line))
+        if (!isascii_alnum (*line))
           goto alias_invalid;
         existing = read_command_name (&line);
         if (!existing)
@@ -341,7 +347,7 @@ parse_line_command_args (ELEMENT *line_command)
         INDEX *from_index, *to_index;
         char *p = line;
 
-        if (!isalnum (*p))
+        if (!isascii_alnum (*p))
           goto synindex_invalid;
         from = read_command_name (&p);
         if (!from)
@@ -349,7 +355,7 @@ parse_line_command_args (ELEMENT *line_command)
 
         p += strspn (p, whitespace_chars);
 
-        if (!isalnum (*p))
+        if (!isascii_alnum (*p))
           goto synindex_invalid;
         to = read_command_name (&p);
         if (!to)
@@ -881,10 +887,10 @@ end_line_starting_block (ELEMENT *current)
               /* Check if @enumerate specification is either a single
                  letter or a string of digits. */
               if (g->text.end == 1
-                    && isalpha ((unsigned char) g->text.text[0])
+                    && isascii_alpha ((unsigned char) g->text.text[0])
                   || (g->text.end > 0
                       && !*(g->text.text
-                            + strspn (g->text.text, "0123456789"))))
+                            + strspn (g->text.text, digit_chars))))
                 {
                   spec = g->text.text;
                 }
@@ -1324,20 +1330,17 @@ end_line_misc_line (ELEMENT *current)
                  non - _ characters */
               for (p = text; *p; p++)
                 {
-                  /* check if ascii */
-                  if ((*p & ~0x7f) == 0)
+                  /* check if ascii and alphanumeric */
+                  if (isascii_alnum(*p))
                     {
-                      if (isalnum (*p))
-                        {
-                          possible_encoding = 1;
-                          *q = tolower (*p);
-                          q++;
-                        }
-                      else if (*p == '_' || *p == '-')
-                        {
-                          *q = *p;
-                          q++;
-                        }
+                      possible_encoding = 1;
+                      *q = tolower (*p);
+                      q++;
+                    }
+                  else if (*p == '_' || *p == '-')
+                    {
+                      *q = *p;
+                      q++;
                     }
                 }
               *q = '\0';
@@ -1361,7 +1364,8 @@ end_line_misc_line (ELEMENT *current)
 
                     text_lc = strdup (text);
                     for (p = text_lc; *p; p++)
-                      *p = tolower (*p);
+                      if (isascii_alpha (*p))
+                        *p = tolower (*p);
 
                     for (i = 0; (canonical_encodings[i]); i++)
                       {
@@ -1444,7 +1448,7 @@ end_line_misc_line (ELEMENT *current)
                  just check if the language code looks right. */
 
               p = text;
-              while (isalpha ((unsigned char) *p))
+              while (isascii_alpha ((unsigned char) *p))
                 p++;
               if (*p && *p != '_')
                 {
@@ -1469,7 +1473,7 @@ end_line_misc_line (ELEMENT *current)
                       p = q;
                       /* Language code should be of the form LL_CC,
                          language code followed by country code. */
-                      while (isalpha ((unsigned char) *p))
+                      while (isascii_alpha ((unsigned char) *p))
                         p++;
                       if (*p || p - q > 4)
                         {
@@ -1489,14 +1493,14 @@ end_line_misc_line (ELEMENT *current)
           p = convert_to_texinfo (args_child_by_index(current, 0));
 
           texi_line = p;
-          while (isspace ((unsigned char) *texi_line))
-            texi_line++;
+
+          texi_line += strspn (texi_line, whitespace_chars);
 
           /* Trim leading and trailing whitespace. */
           p1 = strchr (texi_line, '\0');
           if (p1 > texi_line)
             {
-              while (p1 > texi_line && isspace ((unsigned char) p1[-1]))
+              while (p1 > texi_line && strchr (whitespace_chars, p1[-1]))
                 p1--;
               *p1 = '\0';
             }
diff --git a/tp/Texinfo/XS/parsetexi/handle_commands.c 
b/tp/Texinfo/XS/parsetexi/handle_commands.c
index 3d0ef5eac4..e7ac2b14c8 100644
--- a/tp/Texinfo/XS/parsetexi/handle_commands.c
+++ b/tp/Texinfo/XS/parsetexi/handle_commands.c
@@ -177,7 +177,7 @@ parse_rawline_command (char *line, enum command_id cmd,
       p += strspn (p, whitespace_chars);
       if (!*p)
         goto set_no_name;
-      if (!isalnum (*p) && *p != '-' && *p != '_')
+      if (!isascii_alnum (*p) && *p != '-' && *p != '_')
         goto set_invalid;
       q = strpbrk (p,
                    " \t\f\r\n"       /* whitespace */
diff --git a/tp/Texinfo/XS/parsetexi/macro.c b/tp/Texinfo/XS/parsetexi/macro.c
index 7dd7175bbd..89648f8fa2 100644
--- a/tp/Texinfo/XS/parsetexi/macro.c
+++ b/tp/Texinfo/XS/parsetexi/macro.c
@@ -204,7 +204,7 @@ parse_macro_command_line (enum command_id cmd, char 
**line_inout,
               char *p;
               for (p = args_ptr; p < q2; p++)
                 {
-                  if (!isalnum (*p) && *p != '_' && *p != '-')
+                  if (!isascii_alnum (*p) && *p != '_' && *p != '-')
                     {
                       char c = *q2; *q2 = 0;
                       line_error ("bad or empty @%s formal argument: %s",
diff --git a/tp/Texinfo/XS/parsetexi/parser.c b/tp/Texinfo/XS/parsetexi/parser.c
index 01a818b636..5cea32e31c 100644
--- a/tp/Texinfo/XS/parsetexi/parser.c
+++ b/tp/Texinfo/XS/parsetexi/parser.c
@@ -62,6 +62,12 @@ looking_at (char *s1, char *s2)
   return !strncmp (s1, s2, strlen (s2));
 }
 
+int
+isascii_alnum (int c)
+{
+  return (((c & ~0x7f) == 0) && isalnum(c));
+}
+
 /* Look for a sequence of alphanumeric characters or hyphens, where the
    first isn't a hyphen.  This is the format of (non-single-character) Texinfo 
    commands, but is also used elsewhere.  Return value to be freed by caller.
@@ -73,10 +79,10 @@ read_command_name (char **ptr)
   char *ret = 0;
 
   q = p;
-  if (!isalnum ((unsigned char) *q))
+  if (!isascii_alnum ((unsigned char) *q))
     return 0; /* Invalid. */
 
-  while (isalnum ((unsigned char) *q) || *q == '-' || *q == '_')
+  while (isascii_alnum ((unsigned char) *q) || *q == '-' || *q == '_')
     q++;
   ret = strndup (p, q - p);
   p = q;
@@ -125,7 +131,7 @@ read_flag_name (char **ptr)
   char *ret = 0;
 
   q = p;
-  if (!isalnum ((unsigned char) *q) && *q != '-' && *q != '_')
+  if (!isascii_alnum ((unsigned char) *q) && *q != '-' && *q != '_')
     return 0; /* Invalid. */
 
   while (!strchr (whitespace_chars, *q)
@@ -2444,7 +2450,7 @@ check_line_directive (char *line)
   p += strspn (p, " \t");
 
   /* p should now be at the line number */
-  if (!strchr ("0123456789", *p))
+  if (!strchr (digit_chars, *p))
     return 0;
   line_no = strtoul (p, &p, 10);
 
@@ -2463,7 +2469,7 @@ check_line_directive (char *line)
       p = q + 1;
       p += strspn (p, " \t");
 
-      p += strspn (p, "0123456789");
+      p += strspn (p, digit_chars);
       p += strspn (p, " \t");
     }
   if (*p && *p != '\n')
diff --git a/tp/Texinfo/XS/parsetexi/parser.h b/tp/Texinfo/XS/parsetexi/parser.h
index 47c23f1a50..79cb4a9879 100644
--- a/tp/Texinfo/XS/parsetexi/parser.h
+++ b/tp/Texinfo/XS/parsetexi/parser.h
@@ -150,6 +150,7 @@ typedef struct {
 } CONDITIONAL_STACK_ITEM;
 
 size_t count_convert_u8 (char *text);
+int isascii_alnum (int c);
 ELEMENT *parse_texi (ELEMENT *root_elt, ELEMENT *current_elt);
 void push_conditional_stack (enum command_id cond, SOURCE_MARK *source_mark);
 CONDITIONAL_STACK_ITEM *pop_conditional_stack (void);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]