[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 6/9] dfa: cache MB_CUR_MAX for dfaexec
From: |
Paolo Bonzini |
Subject: |
[PATCH 6/9] dfa: cache MB_CUR_MAX for dfaexec |
Date: |
Sun, 14 Mar 2010 16:35:11 +0100 |
* src/dfa.c (state_index, dfaexec): Use d->mb_cur_max.
(dfainit): Initialize it.
(free_mbdata): New, extracted out of dfafree.
(dfafree): Use it.
---
src/dfa.c | 73 ++++++++++++++++++++++++++++++++++++-------------------------
src/dfa.h | 2 +
2 files changed, 45 insertions(+), 30 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index da70aa1..6a658c1 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -2130,7 +2130,7 @@ dfastate (int s, struct dfa *d, int trans[])
insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
{
/* If a token in follows.elems is not 1st byte of a multibyte
character, or the states of follows must accept the bytes
@@ -2165,7 +2165,7 @@ dfastate (int s, struct dfa *d, int trans[])
/* If we are building a searching matcher, throw in the positions
of state 0 as well. */
#ifdef MBS_SUPPORT
- if (d->searchflag && (MB_CUR_MAX == 1 || !next_isnt_1st_byte))
+ if (d->searchflag && (d->mb_cur_max == 1 || !next_isnt_1st_byte))
#else
if (d->searchflag)
#endif
@@ -2789,7 +2789,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
*end = eol;
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
{
int remain_bytes, i;
buf_begin = (unsigned char *) begin;
@@ -2834,7 +2834,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
for (;;)
{
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
while ((t = trans[s]))
{
if ((char *) p > end)
@@ -2871,7 +2871,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (backref)
*backref = (d->states[s].backref != 0);
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
{
free(mblen_buf);
free(inputwcs);
@@ -2883,7 +2883,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
s1 = s;
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
{
/* Can match with a multibyte character (and multicharacter
collating element). Transition table might be updated. */
@@ -2904,7 +2904,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if ((char *) p > end)
{
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (d->mb_cur_max > 1)
{
free(mblen_buf);
free(inputwcs);
@@ -2931,6 +2931,37 @@ dfaexec (struct dfa *d, char const *begin, char *end,
}
}
+static void
+free_mbdata (struct dfa *d)
+{
+ int i;
+
+ free(d->multibyte_prop);
+ d->multibyte_prop = NULL;
+
+ for (i = 0; i < d->nmbcsets; ++i)
+ {
+ int j;
+ struct mb_char_classes *p = &(d->mbcsets[i]);
+ free(p->chars);
+ free(p->ch_classes);
+ free(p->range_sts);
+ free(p->range_ends);
+
+ for (j = 0; j < p->nequivs; ++j)
+ free(p->equivs[j]);
+ free(p->equivs);
+
+ for (j = 0; j < p->ncoll_elems; ++j)
+ free(p->coll_elems[j]);
+ free(p->coll_elems);
+ }
+
+ free(d->mbcsets);
+ d->mbcsets = NULL;
+ d->nmbcsets = 0;
+}
+
/* Initialize the components of a dfa that the other routines don't
initialize for themselves. */
void
@@ -2943,8 +2974,10 @@ dfainit (struct dfa *d)
d->talloc = 1;
MALLOC(d->tokens, token, d->talloc);
d->tindex = d->depth = d->nleaves = d->nregexps = 0;
+
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ d->mb_cur_max = MB_CUR_MAX;
+ if (d->mb_cur_max > 1)
{
d->nmultibyte_prop = 1;
MALLOC(d->multibyte_prop, int, d->nmultibyte_prop);
@@ -2989,28 +3022,8 @@ dfafree (struct dfa *d)
free(d->tokens);
#ifdef MBS_SUPPORT
- if (MB_CUR_MAX > 1)
- {
- free(d->multibyte_prop);
- for (i = 0; i < d->nmbcsets; ++i)
- {
- int j;
- struct mb_char_classes *p = &(d->mbcsets[i]);
- free(p->chars);
- free(p->ch_classes);
- free(p->range_sts);
- free(p->range_ends);
-
- for (j = 0; j < p->nequivs; ++j)
- free(p->equivs[j]);
- free(p->equivs);
-
- for (j = 0; j < p->ncoll_elems; ++j)
- free(p->coll_elems[j]);
- free(p->coll_elems);
- }
- free(d->mbcsets);
- }
+ if (d->mb_cur_max > 1)
+ free_mbdata(d);
#endif /* MBS_SUPPORT */
for (i = 0; i < d->sindex; ++i) {
diff --git a/src/dfa.h b/src/dfa.h
index b8eb0c2..89460aa 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -278,6 +278,8 @@ struct dfa
int nregexps; /* Count of parallel regexps being built
with dfaparse(). */
#ifdef MBS_SUPPORT
+ int mb_cur_max; /* Cached value of MB_CUR_MAX. */
+
/* These stuff are used only if MB_CUR_MAX > 1 or multibyte environments. */
int nmultibyte_prop;
int *multibyte_prop;
--
1.6.6.1
- Re: [PATCH 3/9] dfa: rewrite handling of multibyte case_fold lexing, (continued)
- [PATCH 5/9] dfa: optimize simple character sets under UTF-8 charsets, Paolo Bonzini, 2010/03/14
- [PATCH 7/9] dfa: run simple UTF-8 regexps as a single-byte character set, Paolo Bonzini, 2010/03/14
- [PATCH 6/9] dfa: cache MB_CUR_MAX for dfaexec,
Paolo Bonzini <=
- [PATCH 8/9] grep: remove check_multibyte_string, fix non-UTF8 missed match, Paolo Bonzini, 2010/03/14
- [PATCH 9/9] grep: match multibyte charsets line-by-line when using -i, Paolo Bonzini, 2010/03/14