[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 05/11] dfa: change meaning of a state context
From: |
Paolo Bonzini |
Subject: |
[PATCH 05/11] dfa: change meaning of a state context |
Date: |
Wed, 4 Jan 2012 11:59:46 +0100 |
* src/dfa.c (MATCHES_NEWLINE_CONTEXT, MATCHES_LETTER_CONTEXT): New.
(state_index): Do not mask away CTX_NONE.
(dfaanalyze): Adjust call to state_index.
(dfastate): Adjust calls to state_index.
---
src/dfa.c | 39 ++++++++++++++++++++-------------------
1 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index a271883..002db2f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -93,11 +93,15 @@ static inline unsigned char to_uchar (char ch) { return ch;
}
/* Contexts tell us whether a character is a newline or a word constituent.
Word-constituent characters are those that satisfy iswalnum(), plus '_'.
+ Each character has a single CTX_* value; bitmasks of CTX_* values denote
+ a particular character class.
- A states also stores a context value, which is nonzero if its
- predecessors always matches a newline or a word constituent.
- The definition of a state's context is a bit unclear, but will be
- modified soon anyway. */
+ A state also stores a context value, which is a bitmask of CTX_* values.
+ A state's context represents a set of characters that the state's
+ predecessors must match. For example, a state whose context does not
+ include CTX_LETTER will never have transitions where the previous
+ character is a word constituent. A state whose context is CTX_ANY
+ might have transitions from any character. */
#define CTX_NONE 1
#define CTX_LETTER 2
@@ -121,15 +125,15 @@ static inline unsigned char to_uchar (char ch) { return
ch; }
bit 0 - neither previous nor current is word-constituent
The macro SUCCEEDS_IN_CONTEXT determines whether a given constraint
- succeeds in a particular context. Prev is the context value for
- the previous character, curr is the context value for the lookahead
- character. */
+ succeeds in a particular context. Prev is a bitmask of possible
+ context values for the previous character, curr is the bitmask of
+ possible context values for the lookahead character. */
#define MATCHES_NEWLINE_CONTEXT(constraint, prev, curr) \
((constraint) & \
- 1 << (((prev & CTX_NEWLINE) ? 2 : 0) + ((curr & CTX_NEWLINE) ? 1 : 0) + 4))
+ 1 << (((prev & ~CTX_NEWLINE) ? 0 : 2) + ((curr & ~CTX_NEWLINE) ? 0 : 1) +
4))
#define MATCHES_LETTER_CONTEXT(constraint, prev, curr) \
((constraint) & \
- 1 << (((prev & CTX_LETTER) ? 2 : 0) + ((curr & CTX_LETTER) ? 1 : 0)))
+ 1 << (((prev & ~CTX_LETTER) ? 0 : 2) + ((curr & ~CTX_LETTER) ? 0 : 1)))
#define SUCCEEDS_IN_CONTEXT(constraint, prev, curr) \
(MATCHES_NEWLINE_CONTEXT(constraint, prev, curr) \
&& MATCHES_LETTER_CONTEXT(constraint, prev, curr))
@@ -1977,7 +1981,6 @@ state_index (struct dfa *d, position_set const *s, int
context)
int constraint;
int i, j;
- context &= ~CTX_NONE;
for (i = 0; i < s->nelem; ++i)
hash ^= s->elems[i].index + s->elems[i].constraint;
@@ -2396,7 +2399,7 @@ dfaanalyze (struct dfa *d, int searchflag)
MALLOC(d->states, d->salloc);
wanted_context = state_wanted_contexts(&merged, CTX_NEWLINE);
- state_index(d, &merged, wanted_context);
+ state_index(d, &merged, wanted_context ? wanted_context : CTX_ANY);
free(o_nullable);
free(o_nfirst);
@@ -2493,20 +2496,18 @@ dfastate (int s, struct dfa *d, int trans[])
if (pos.constraint != 0xFF)
{
if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
- d->states[s].context & CTX_NEWLINE,
- CTX_NEWLINE))
+ d->states[s].context, CTX_NEWLINE))
clrbit(eolbyte, matches);
if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
- d->states[s].context & CTX_NEWLINE, 0))
+ d->states[s].context, ~CTX_NEWLINE))
for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= newline[j];
if (! MATCHES_LETTER_CONTEXT(pos.constraint,
- d->states[s].context & CTX_LETTER,
- CTX_LETTER))
+ d->states[s].context, CTX_LETTER))
for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= ~letters[j];
if (! MATCHES_LETTER_CONTEXT(pos.constraint,
- d->states[s].context & CTX_LETTER, 0))
+ d->states[s].context, ~CTX_LETTER))
for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= letters[j];
@@ -2591,7 +2592,7 @@ dfastate (int s, struct dfa *d, int trans[])
/* Find the state(s) corresponding to the positions of state 0. */
copy(&d->states[0].elems, &follows);
wanted_context = state_wanted_contexts(&follows, CTX_ANY);
- state = state_index(d, &follows, 0);
+ state = state_index(d, &follows, wanted_context ^ CTX_ANY);
if (wanted_context & CTX_NEWLINE)
state_newline = state_index(d, &follows, CTX_NEWLINE);
else
@@ -2663,7 +2664,7 @@ dfastate (int s, struct dfa *d, int trans[])
wanted_context = state_wanted_contexts(&follows, possible_contexts);
/* Find the state(s) corresponding to the union of the follows. */
- state = state_index(d, &follows, 0);
+ state = state_index(d, &follows, wanted_context ^ CTX_ANY);
if (wanted_context & CTX_NEWLINE)
state_newline = state_index(d, &follows, CTX_NEWLINE);
else
--
1.7.7.1
- [RFC/RFT PATCH 00/11] Differentiate ^/$ from \` and \' in grep -z mode, Paolo Bonzini, 2012/01/04
- [PATCH 01/11] dfa: fix corner case with anchors, Paolo Bonzini, 2012/01/04
- [PATCH 02/11] dfa: introduce contexts for the values in d->success, Paolo Bonzini, 2012/01/04
- [PATCH 04/11] dfa: refactor common context computations, Paolo Bonzini, 2012/01/04
- [PATCH 03/11] dfa: change newline/letter to a single context value, Paolo Bonzini, 2012/01/04
- [PATCH 05/11] dfa: change meaning of a state context,
Paolo Bonzini <=
- [PATCH 06/11] dfa: remove useless check, Paolo Bonzini, 2012/01/04
- [PATCH 07/11] dfa: make repetitive code *really* repetitive, Paolo Bonzini, 2012/01/04
- [PATCH 08/11] dfa: remove redundant line constraints, Paolo Bonzini, 2012/01/04
- [PATCH 11/11] dfa: introduce BEGBUF/ENDBUF, Paolo Bonzini, 2012/01/04
- [PATCH 09/11] dfa: rename "newline" to "buffer delimiter", Paolo Bonzini, 2012/01/04
- [PATCH 10/11] dfa: introduce bufdelim context, Paolo Bonzini, 2012/01/04
- Re: [RFC/RFT PATCH 00/11] Differentiate ^/$ from \` and \' in grep -z mode, Jim Meyering, 2012/01/04