[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3] dfa/grep: fix compilation with MBS_SUPPORT
From: |
Paolo Bonzini |
Subject: |
[PATCH v3] dfa/grep: fix compilation with MBS_SUPPORT |
Date: |
Thu, 25 Mar 2010 13:38:20 +0100 |
From: Norihirio Tanaka <address@hidden>
Unfortunately, using wchar.h unconditionally would not be okay for gawk.
Until Arnold is prodded strongly enough, we need some slightly ugly
hacks.
I think MBS_SUPPORT should become a configure-time option now that
gnulib is in use (because it will always be on otherwise). There is
still value in removing the expensive multibyte support from src/dfa.c
if only C-locale matching is interesting. Its implementation can
also be simplified to something like:
#if MBS_SUPPORT
#define GREP_MB_CUR_MAX MB_CUR_MAX
#else
#define GREP_MB_CUR_MAX 1
#endif
Anyway, in the meanwhile this patch fixes the bitrot; with !MBS_SUPPORT
three tests are skipped and none is failed.
* src/dfa.c (cur_mb_len): Initialize to 1 and always make it available.
(setbit_case_fold): Do not use wint_t in prototype if !MBS_SUPPORT.
(parse_bracket_exp): Fix compilation with !MBS_SUPPORT.
* src/kwsearch.c (kwsinit): Do not use mbtolower and MB_CUR_MAX
if !MBS_SUPPORT.
* src/searchutils.c (kwsinit): Do not refer to MB_CUR_MAX if !MBS_SUPPORT.
* tests/char-class-multibyte: Skip if UTF-8 matching does not work.
* tests/fmbtest.sh: Likewise.
---
I already pushed the other fixes.
src/dfa.c | 24 ++++++++++++++++++------
src/kwsearch.c | 2 ++
src/searchutils.c | 6 +++++-
tests/char-class-multibyte | 4 ++++
tests/fmbtest.sh | 8 ++++++++
5 files changed, 37 insertions(+), 7 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 951c3b7..c2ef18c 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -243,7 +243,13 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
For MB_CUR_MAX > 1, one or both of the two cases may not be set,
so the resulting charset may only be used as an optimization. */
static void
-setbit_case_fold (wint_t b, charclass c)
+setbit_case_fold (
+#ifdef MBS_SUPPORT
+ wint_t b,
+#else
+ unsigned int b,
+#endif
+ charclass c)
{
if (case_fold)
{
@@ -309,11 +315,11 @@ static int parens; /* Count of outstanding
left parens. */
static int minrep, maxrep; /* Repeat counts for {m,n}. */
static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */
+static int cur_mb_len = 1; /* Length of the multibyte representation of
+ wctok. */
#ifdef MBS_SUPPORT
/* These variables are used only if (MB_CUR_MAX > 1). */
static mbstate_t mbs; /* Mbstate for mbrlen(). */
-static int cur_mb_len; /* Length of the multibyte representation of
- wctok. */
static wchar_t wctok; /* Wide character representation of the current
multibyte character. */
static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec().
@@ -691,9 +697,9 @@ parse_bracket_exp (void)
continue;
}
- setbit_case_fold (wc, ccl);
#ifdef MBS_SUPPORT
/* Build normal characters. */
+ setbit_case_fold (wc, ccl);
if (MB_CUR_MAX > 1)
{
if (case_fold && iswalpha(wc))
@@ -719,10 +725,16 @@ parse_bracket_exp (void)
work_mbc->nchars + 1);
work_mbc->chars[work_mbc->nchars++] = wc;
}
-#endif
}
+#else
+ setbit_case_fold (c, ccl);
+#endif
}
- while ((wc = wc1, (c = c1) != L']'));
+ while ((
+#ifdef MBS_SUPPORT
+ wc = wc1,
+#endif
+ (c = c1) != ']'));
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1
diff --git a/src/kwsearch.c b/src/kwsearch.c
index 2f8e7b7..fa801e6 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -37,9 +37,11 @@ Fcompile (char const *pattern, size_t size)
kwsinit (&kwset);
psize = size;
+#ifdef MBS_SUPPORT
if (match_icase && MB_CUR_MAX > 1)
pat = mbtolower (pattern, &psize);
else
+#endif
pat = pattern;
beg = pat;
diff --git a/src/searchutils.c b/src/searchutils.c
index 08cf420..e30355d 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -27,7 +27,11 @@ kwsinit (kwset_t *kwset)
static char trans[NCHAR];
int i;
- if (match_icase && MB_CUR_MAX == 1)
+ if (match_icase
+#ifdef MBS_SUPPORT
+ && MB_CUR_MAX == 1
+#endif
+ )
{
for (i = 0; i < NCHAR; ++i)
trans[i] = TOLOWER (i);
diff --git a/tests/char-class-multibyte b/tests/char-class-multibyte
index fccf13d..d7ed399 100644
--- a/tests/char-class-multibyte
+++ b/tests/char-class-multibyte
@@ -3,6 +3,10 @@
: ${srcdir=.}
. "$srcdir/init.sh"; path_prepend_ ../src
+if printf '\xc3\n' | LC_ALL=en_US.UTF-8 grep -q '[é]'; then
+ skip_ UTF-8 matching seems not to work
+fi
+
printf 'é\n' > exp1 || framework_failure_
fail=0
diff --git a/tests/fmbtest.sh b/tests/fmbtest.sh
index 1b3a111..2f44879 100755
--- a/tests/fmbtest.sh
+++ b/tests/fmbtest.sh
@@ -11,6 +11,14 @@
LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \
|| exit 77
+# If matching is done in single-byte mode, skip this test too
+printf 'é\n' | LC_ALL=cz_CZ.UTF-8 grep -Eq '^[é]{2}$'
+case $? in
+ 0) exit 77;;
+ 1) ;;
+ *) exit 1;;
+esac
+
failures=0
cat > csinput <<EOF
--
1.6.6.1
- [PATCH v3] dfa/grep: fix compilation with MBS_SUPPORT,
Paolo Bonzini <=