bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Rational Range Interpretation patches, 1/3


From: Aharon Robbins
Subject: Rational Range Interpretation patches, 1/3
Date: Mon, 16 Jan 2012 22:24:44 +0200
User-agent: Heirloom mailx 12.4 7/29/08

Hello All.

Here is my 2nd try at RRI.  The 3 patches are for dfa.c, grep.texi,
and gnulib/reg*.c.  

Paolo - I still think that when compiling the dfa my change is correct,
since the regex routines were called only for checking if the range is
valid.

Thanks,

Arnold
---------------
>From 5d4a1e56345e14c19224b47424a01b51120cf234 Mon Sep 17 00:00:00 2001
From: Arnold D. Robbins <address@hidden>
Date: Mon, 16 Jan 2012 22:04:11 +0200
Subject: [PATCH 1/2] Rational Range Interpretation implemented.

* dfa.c (hard_LC_COLLATE): Removed.
  (parse_bracket_exp): Compare lower and upper range bounds directly.
  (dfaparse): Don't set hard_LC_COLLATE.
  (match_mb_charset): Test wide character directly instead of using wcscoll.
---
 src/dfa.c |   41 ++++++-----------------------------------
 1 files changed, 6 insertions(+), 35 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 6ab0ab4..edc6bd9 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -29,6 +29,7 @@
 #include <limits.h>
 #include <string.h>
 #include <locale.h>
+#include <stdbool.h>
 
 #define STREQ(a, b) (strcmp (a, b) == 0)
 
@@ -46,7 +47,7 @@
 #include "gettext.h"
 #define _(str) gettext (str)
 
-#include "mbsupport.h"  /* defines MBS_SUPPORT if appropriate */
+#include "mbsupport.h"  /* defines MBS_SUPPORT to 1 or 0, as appropriate */
 #include <wchar.h>
 #include <wctype.h>
 
@@ -56,7 +57,6 @@
 
 #include "regex.h"
 #include "dfa.h"
-#include "hard-locale.h"
 #include "xalloc.h"
 
 /* HPUX, define those as macros in sys/param.h */
@@ -657,7 +657,6 @@ static int laststart;               /* True if we're 
separated from beginning or (, |
                                    only by zero-width characters. */
 static int parens;             /* Count of outstanding left parens. */
 static int minrep, maxrep;     /* Repeat counts for {m,n}. */
-static int hard_LC_COLLATE;    /* Nonzero if LC_COLLATE is hard.  */
 
 static int cur_mb_len = 1;     /* Length of the multibyte representation of
                                    wctok.  */
@@ -992,26 +991,8 @@ parse_bracket_exp (void)
                   c1 = tolower (c1);
                   c2 = tolower (c2);
                 }
-              if (!hard_LC_COLLATE)
-                for (c = c1; c <= c2; c++)
-                  setbit_case_fold_c (c, ccl);
-              else
-                {
-                  /* Defer to the system regex library about the meaning
-                     of range expressions.  */
-                  regex_t re;
-                  char pattern[6] = { '[', c1, '-', c2, ']', 0 };
-                  char subject[2] = { 0, 0 };
-                  regcomp (&re, pattern, REG_NOSUB);
-                  for (c = 0; c < NOTCHAR; ++c)
-                    {
-                      subject[0] = c;
-                      if (!(case_fold && isupper (c))
-                          && regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH)
-                        setbit_case_fold_c (c, ccl);
-                    }
-                  regfree (&re);
-                }
+              for (c = c1; c <= c2; c++)
+                setbit_case_fold_c (c, ccl);
             }
 
           colon_warning_state |= 8;
@@ -1792,9 +1773,6 @@ dfaparse (char const *s, size_t len, struct dfa *d)
   lasttok = END;
   laststart = 1;
   parens = 0;
-#ifdef LC_COLLATE
-  hard_LC_COLLATE = hard_locale (LC_COLLATE);
-#endif
   if (MB_CUR_MAX > 1)
     {
       cur_mb_len = 0;
@@ -2884,7 +2862,6 @@ match_mb_charset (struct dfa *d, int s, position pos, int 
idx)
                            with which this operator match.  */
   int op_len;          /* Length of the operator.  */
   char buffer[128];
-  wchar_t wcbuf[6];
 
   /* Pointer to the structure to which we are currently refering.  */
   struct mb_char_classes *work_mbc;
@@ -2961,17 +2938,11 @@ match_mb_charset (struct dfa *d, int s, position pos, 
int idx)
         }
     }
 
-  wcbuf[0] = wc;
-  wcbuf[1] = wcbuf[3] = wcbuf[5] = '\0';
-
   /* match with a range?  */
   for (i = 0; i<work_mbc->nranges; i++)
     {
-      wcbuf[2] = work_mbc->range_sts[i];
-      wcbuf[4] = work_mbc->range_ends[i];
-
-      if (wcscoll(wcbuf, wcbuf+2) >= 0 &&
-          wcscoll(wcbuf+4, wcbuf) >= 0)
+      if (work_mbc->range_sts[i] <= wc &&
+          wc <= work_mbc->range_ends[i])
         goto charset_matched;
     }
 
-- 
1.7.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]