bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v3] dfa/grep: fix compilation with MBS_SUPPORT


From: Paolo Bonzini
Subject: [PATCH v3] dfa/grep: fix compilation with MBS_SUPPORT
Date: Thu, 25 Mar 2010 13:38:20 +0100

From: Norihirio Tanaka <address@hidden>

Unfortunately, using wchar.h unconditionally would not be okay for gawk.
Until Arnold is prodded strongly enough, we need some slightly ugly
hacks.

I think MBS_SUPPORT should become a configure-time option now that
gnulib is in use (because it will always be on otherwise).  There is
still value in removing the expensive multibyte support from src/dfa.c
if only C-locale matching is interesting.  Its implementation can
also be simplified to something like:

    #if MBS_SUPPORT
    #define GREP_MB_CUR_MAX MB_CUR_MAX
    #else
    #define GREP_MB_CUR_MAX 1
    #endif

Anyway, in the meanwhile this patch fixes the bitrot; with !MBS_SUPPORT
three tests are skipped and none is failed.

* src/dfa.c (cur_mb_len): Initialize to 1 and always make it available.
(setbit_case_fold): Do not use wint_t in prototype if !MBS_SUPPORT.
(parse_bracket_exp): Fix compilation with !MBS_SUPPORT.
* src/kwsearch.c (kwsinit): Do not use mbtolower and MB_CUR_MAX
if !MBS_SUPPORT.
* src/searchutils.c (kwsinit): Do not refer to MB_CUR_MAX if !MBS_SUPPORT.

* tests/char-class-multibyte: Skip if UTF-8 matching does not work.
* tests/fmbtest.sh: Likewise.
---
        I already pushed the other fixes.

 src/dfa.c                  |   24 ++++++++++++++++++------
 src/kwsearch.c             |    2 ++
 src/searchutils.c          |    6 +++++-
 tests/char-class-multibyte |    4 ++++
 tests/fmbtest.sh           |    8 ++++++++
 5 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 951c3b7..c2ef18c 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -243,7 +243,13 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
    For MB_CUR_MAX > 1, one or both of the two cases may not be set,
    so the resulting charset may only be used as an optimization.  */
 static void
-setbit_case_fold (wint_t b, charclass c)
+setbit_case_fold (
+#ifdef MBS_SUPPORT
+                  wint_t b,
+#else
+                  unsigned int b,
+#endif
+                  charclass c)
 {
   if (case_fold)
     {
@@ -309,11 +315,11 @@ static int parens;                /* Count of outstanding 
left parens. */
 static int minrep, maxrep;     /* Repeat counts for {m,n}. */
 static int hard_LC_COLLATE;    /* Nonzero if LC_COLLATE is hard.  */
 
+static int cur_mb_len = 1;     /* Length of the multibyte representation of
+                                  wctok.  */
 #ifdef MBS_SUPPORT
 /* These variables are used only if (MB_CUR_MAX > 1).  */
 static mbstate_t mbs;          /* Mbstate for mbrlen().  */
-static int cur_mb_len;         /* Length of the multibyte representation of
-                                  wctok.  */
 static wchar_t wctok;          /* Wide character representation of the current
                                   multibyte character.  */
 static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec().
@@ -691,9 +697,9 @@ parse_bracket_exp (void)
          continue;
        }
 
-      setbit_case_fold (wc, ccl);
 #ifdef MBS_SUPPORT
       /* Build normal characters.  */
+      setbit_case_fold (wc, ccl);
       if (MB_CUR_MAX > 1)
         {
           if (case_fold && iswalpha(wc))
@@ -719,10 +725,16 @@ parse_bracket_exp (void)
                                    work_mbc->nchars + 1);
               work_mbc->chars[work_mbc->nchars++] = wc;
             }
-#endif
         }
+#else
+      setbit_case_fold (c, ccl);
+#endif
     }
-  while ((wc = wc1, (c = c1) != L']'));
+  while ((
+#ifdef MBS_SUPPORT
+        wc = wc1,
+#endif
+        (c = c1) != ']'));
 
 #ifdef MBS_SUPPORT
   if (MB_CUR_MAX > 1
diff --git a/src/kwsearch.c b/src/kwsearch.c
index 2f8e7b7..fa801e6 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -37,9 +37,11 @@ Fcompile (char const *pattern, size_t size)
 
   kwsinit (&kwset);
   psize = size;
+#ifdef MBS_SUPPORT
   if (match_icase && MB_CUR_MAX > 1)
     pat = mbtolower (pattern, &psize);
   else
+#endif
     pat = pattern;
 
   beg = pat;
diff --git a/src/searchutils.c b/src/searchutils.c
index 08cf420..e30355d 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -27,7 +27,11 @@ kwsinit (kwset_t *kwset)
   static char trans[NCHAR];
   int i;
 
-  if (match_icase && MB_CUR_MAX == 1)
+  if (match_icase
+#ifdef MBS_SUPPORT
+      && MB_CUR_MAX == 1
+#endif
+     )
     {
       for (i = 0; i < NCHAR; ++i)
         trans[i] = TOLOWER (i);
diff --git a/tests/char-class-multibyte b/tests/char-class-multibyte
index fccf13d..d7ed399 100644
--- a/tests/char-class-multibyte
+++ b/tests/char-class-multibyte
@@ -3,6 +3,10 @@
 : ${srcdir=.}
 . "$srcdir/init.sh"; path_prepend_ ../src
 
+if printf '\xc3\n' | LC_ALL=en_US.UTF-8 grep -q '[é]'; then
+  skip_ UTF-8 matching seems not to work
+fi
+
 printf 'é\n'      > exp1 || framework_failure_
 fail=0
 
diff --git a/tests/fmbtest.sh b/tests/fmbtest.sh
index 1b3a111..2f44879 100755
--- a/tests/fmbtest.sh
+++ b/tests/fmbtest.sh
@@ -11,6 +11,14 @@
 LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \
   || exit 77
 
+# If matching is done in single-byte mode, skip this test too
+printf 'é\n' | LC_ALL=cz_CZ.UTF-8 grep -Eq '^[é]{2}$'
+case $? in
+  0) exit 77;;
+  1) ;;
+  *) exit 1;;
+esac
+
 failures=0
 
 cat > csinput <<EOF
-- 
1.6.6.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]