>From 7a67844524c0657fc395966536805d9736c0a88e Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Tue, 18 Mar 2014 21:01:47 +0900 Subject: [PATCH] grep: open CSET and transform into the upper case when MB_CUR_MAX == 1 in dfamust In MB_CUR_MAX == 1 and case-insensitive matching, KWSet doesn't so helpful, because through parse alphabets of single-byte are changed into CSET which isn't extracted by dfamust. This patch opens CSET in dfamust, and makes it possible to take out a longer character fixed string from tokens. * src/dfa.c (dfamust): open CSET and transform into the upper case when MB_CUR_MAX == 1 in dfamust. --- src/dfa.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index 5e60cd5..5258a21 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -4101,7 +4101,36 @@ dfamust (struct dfa *d) /* not on *my* shift */ goto done; } - else if (t >= CSET || !MBS_SUPPORT || t == ANYCHAR || t == MBCSET) + else if (t >= CSET) + { + charclass ccl; + int j; + copyset (d->charclasses[t - CSET], ccl); + for (j = 0; j < NOTCHAR; ++j) + if (tstbit (j, ccl)) + break; + if (j < NOTCHAR) + { + int c = (case_fold && MB_CUR_MAX == 1) ? toupper (j) : j; + for (; j < NOTCHAR; j++) + if (tstbit (j, ccl) + && (!(case_fold && MB_CUR_MAX == 1) || c != toupper (j))) + break; + if (j < NOTCHAR) + resetmust (mp); + else + { + mp->is[0] = mp->left[0] = mp->right[0] = c; + mp->is[1] = mp->left[1] = mp->right[1] = '\0'; + mp->in = enlist (mp->in, mp->is, (size_t) 1); + if (mp->in == NULL) + goto done; + } + } + else + resetmust (mp); + } + else if (!MBS_SUPPORT || t == ANYCHAR || t == MBCSET) { /* easy enough */ resetmust (mp); @@ -4110,7 +4139,8 @@ dfamust (struct dfa *d) { /* plain character */ resetmust (mp); - mp->is[0] = mp->left[0] = mp->right[0] = t; + mp->is[0] = mp->left[0] = mp->right[0] = + (case_fold && MB_CUR_MAX == 1) ? toupper (t) : t; mp->is[1] = mp->left[1] = mp->right[1] = '\0'; mp->in = enlist (mp->in, mp->is, (size_t) 1); if (mp->in == NULL) -- 1.9.0