[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 14/17] dfa: cache MB_CUR_MAX for dfaexec
From: |
Paolo Bonzini |
Subject: |
[PATCH 14/17] dfa: cache MB_CUR_MAX for dfaexec |
Date: |
Fri, 12 Mar 2010 18:49:15 +0100 |
* src/dfa.c (state_index, dfaexec): Use d->mb_cur_max.
(dfainit): Initialize it.
(free_mbdata): New, extracted out of dfafree.
(dfafree): Use it.
---
src/dfa.c | 73 ++++++++++++++++++++++++++++++++++++-------------------------
src/dfa.h | 2 +
2 files changed, 45 insertions(+), 30 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index f17f550..9703c4f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -2105,7 +2105,7 @@ dfastate (int s, struct dfa *d, int trans[])
insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
{
/* If a token in follows.elems is not 1st byte of a multibyte
character, or the states of follows must accept the bytes
@@ -2140,7 +2140,7 @@ dfastate (int s, struct dfa *d, int trans[])
/* If we are building a searching matcher, throw in the positions
of state 0 as well. */
#ifdef MBS_SUPPORT
- if (d->searchflag && (MB_CUR_MAX == 1 || !next_isnt_1st_byte))
+ if (d->searchflag && (d->mb_cur_max == 1 || !next_isnt_1st_byte))
#else
if (d->searchflag)
#endif
@@ -2764,7 +2764,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
*end = eol;
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
{
int remain_bytes, i;
buf_begin = (unsigned char *) begin;
@@ -2809,7 +2809,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
for (;;)
{
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
while ((t = trans[s]))
{
if ((char *) p > end)
@@ -2846,7 +2846,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (backref)
*backref = (d->states[s].backref != 0);
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
{
free(mblen_buf);
free(inputwcs);
@@ -2858,7 +2858,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
s1 = s;
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
{
/* Can match with a multibyte character (and multicharacter
collating element). Transition table might be updated. */
@@ -2879,7 +2879,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if ((char *) p > end)
{
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
{
free(mblen_buf);
free(inputwcs);
@@ -2906,6 +2906,37 @@ dfaexec (struct dfa *d, char const *begin, char *end,
}
}
+static void
+free_mbdata (struct dfa *d)
+{
+ int i;
+
+ free(d->multibyte_prop);
+ d->multibyte_prop = NULL;
+
+ for (i = 0; i < d->nmbcsets; ++i)
+ {
+ int j;
+ struct mb_char_classes *p = &(d->mbcsets[i]);
+ free(p->chars);
+ free(p->ch_classes);
+ free(p->range_sts);
+ free(p->range_ends);
+
+ for (j = 0; j < p->nequivs; ++j)
+ free(p->equivs[j]);
+ free(p->equivs);
+
+ for (j = 0; j < p->ncoll_elems; ++j)
+ free(p->coll_elems[j]);
+ free(p->coll_elems);
+ }
+
+ free(d->mbcsets);
+ d->mbcsets = NULL;
+ d->nmbcsets = 0;
+}
+
/* Initialize the components of a dfa that the other routines don't
initialize for themselves. */
void
@@ -2918,8 +2949,10 @@ dfainit (struct dfa *d)
d->talloc = 1;
MALLOC(d->tokens, token, d->talloc);
d->tindex = d->depth = d->nleaves = d->nregexps = 0;
+
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ d->mb_cur_max = MB_CUR_MAX;
+ if (d->mb_cur_max > 1)
{
d->nmultibyte_prop = 1;
MALLOC(d->multibyte_prop, int, d->nmultibyte_prop);
@@ -2964,28 +2997,8 @@ dfafree (struct dfa *d)
free(d->tokens);
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
- {
- free(d->multibyte_prop);
- for (i = 0; i < d->nmbcsets; ++i)
- {
- int j;
- struct mb_char_classes *p = &(d->mbcsets[i]);
- free(p->chars);
- free(p->ch_classes);
- free(p->range_sts);
- free(p->range_ends);
-
- for (j = 0; j < p->nequivs; ++j)
- free(p->equivs[j]);
- free(p->equivs);
-
- for (j = 0; j < p->ncoll_elems; ++j)
- free(p->coll_elems[j]);
- free(p->coll_elems);
- }
- free(d->mbcsets);
- }
+ if (d->mb_cur_max > 1)
+ free_mbdata(d);
#endif /* MBS_SUPPORT */
for (i = 0; i < d->sindex; ++i) {
diff --git a/src/dfa.h b/src/dfa.h
index b8eb0c2..89460aa 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -278,6 +278,8 @@ struct dfa
int nregexps; /* Count of parallel regexps being built
with dfaparse(). */
#ifdef MBS_SUPPORT
+ int mb_cur_max; /* Cached value of MB_CUR_MAX. */
+
/* These stuff are used only if MB_CUR_MAX > 1 or multibyte environments. */
int nmultibyte_prop;
int *multibyte_prop;
--
1.6.6
- Re: [PATCH 06/17] grep: fix error-message-uppercase, (continued)
- [PATCH 09/17] syntax-check: enable space-tab, Paolo Bonzini, 2010/03/12
- [PATCH 08/17] syntax-check: enable m4-quote-check, Paolo Bonzini, 2010/03/12
- [PATCH 10/17] tests: add more UTF-8 test cases, Paolo Bonzini, 2010/03/12
- [PATCH 13/17] dfa: optimize simple character sets under UTF-8 charsets, Paolo Bonzini, 2010/03/12
- [PATCH 12/17] dfa: speed up handling of brackets, Paolo Bonzini, 2010/03/12
- [PATCH 11/17] dfa: rewrite handling of multibyte case folding, Paolo Bonzini, 2010/03/12
- [PATCH 14/17] dfa: cache MB_CUR_MAX for dfaexec,
Paolo Bonzini <=
- [PATCH 15/17] dfa: run simple UTF-8 regexps as a single-byte character set, Paolo Bonzini, 2010/03/12
- [PATCH 16/17] grep: remove check_multibyte_string, fix non-UTF8 missed match, Paolo Bonzini, 2010/03/12
- Re: [PATCH 16/17] grep: remove check_multibyte_string, fix non-UTF8 missed match, Norihiro Tanaka, 2010/03/13
- Re: [PATCH 16/17] grep: remove check_multibyte_string, fix non-UTF8 missed match, Paolo Bonzini, 2010/03/14
- Re: [PATCH 16/17] grep: remove check_multibyte_string, fix non-UTF8 missed match, Norihiro Tanaka, 2010/03/14
- Re: [PATCH 16/17] grep: remove check_multibyte_string, fix non-UTF8 missed match, Paolo Bonzini, 2010/03/15
- Re: [PATCH 16/17] grep: remove check_multibyte_string, fix non-UTF8 missed match, Norihiro Tanaka, 2010/03/19
[PATCH 17/17] grep: match multibyte charsets line-by-line when using -i, Paolo Bonzini, 2010/03/12
Re: [PATCH 00/16] my last hefty patch drop, Jim Meyering, 2010/03/12