[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#24259: [PATCH 4/6] dfa: thread-safety: move regex syntax configurati
From: |
Zev Weiss |
Subject: |
bug#24259: [PATCH 4/6] dfa: thread-safety: move regex syntax configuration into struct dfa |
Date: |
Thu, 18 Aug 2016 05:50:17 -0500 |
* src/dfa.c: move global variables holding regex syntax configuration
into a new struct (`struct regex_syntax') and add an instance of it to
struct dfa. All references to the globals are replaced with
references to the dfa struct's new member. As a side effect, a
`struct dfa' must be allocated with dfaalloc() and passed to
dfasyntax().
* src/dfa.h (dfasyntax): Add new struct dfa* parameter.
* src/dfasearch.c (GEAcompile): Allocate `dfa' earlier and pass it to
dfasyntax().
* tests/dfa-match-aux.c (main): Pass `dfa' to dfasyntax().
---
src/dfa.c | 244 +++++++++++++++++++++++++-------------------------
src/dfa.h | 8 +-
src/dfasearch.c | 5 +-
tests/dfa-match-aux.c | 2 +-
4 files changed, 132 insertions(+), 127 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 858bc55..ae1b340 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -328,6 +328,32 @@ struct mb_char_classes
size_t nchars;
};
+struct regex_syntax
+{
+ /* Syntax bits controlling the behavior of the lexical analyzer. */
+ reg_syntax_t syntax_bits;
+ bool syntax_bits_set;
+
+ /* Flag for case-folding letters into sets. */
+ bool case_fold;
+
+ /* End-of-line byte in data. */
+ unsigned char eolbyte;
+
+ /* Cache of char-context values. */
+ int sbit[NOTCHAR];
+
+ /* If never_trail[B], the byte B cannot be a non-initial byte in a
+ multibyte character. */
+ bool never_trail[NOTCHAR];
+
+ /* Set of characters considered letters. */
+ charclass letters;
+
+ /* Set of characters that are newline. */
+ charclass newline;
+};
+
/* Lexical analyzer. All the dross that deals with the obnoxious
GNU Regex syntax bits is located here. The poor, suffering
reader is referred to the GNU Regex documentation for the
@@ -366,6 +392,9 @@ struct parser_state
/* A compiled regular expression. */
struct dfa
{
+ /* Syntax configuration */
+ struct regex_syntax syntax;
+
/* Fields filled by the scanner. */
charclass *charclasses; /* Array of character sets for CSET tokens. */
size_t cindex; /* Index for adding new charclasses. */
@@ -711,29 +740,6 @@ dfa_charclass_index (struct dfa *d, charclass const s)
return i;
}
-/* Syntax bits controlling the behavior of the lexical analyzer. */
-static reg_syntax_t syntax_bits;
-static bool syntax_bits_set;
-
-/* Flag for case-folding letters into sets. */
-static bool case_fold;
-
-/* End-of-line byte in data. */
-static unsigned char eolbyte;
-
-/* Cache of char-context values. */
-static int sbit[NOTCHAR];
-
-/* If never_trail[B], the byte B cannot be a non-initial byte in a
- multibyte character. */
-static bool never_trail[NOTCHAR];
-
-/* Set of characters considered letters. */
-static charclass letters;
-
-/* Set of characters that are newline. */
-static charclass newline;
-
static bool
unibyte_word_constituent (unsigned char c)
{
@@ -741,9 +747,9 @@ unibyte_word_constituent (unsigned char c)
}
static int
-char_context (unsigned char c)
+char_context (struct dfa *dfa, unsigned char c)
{
- if (c == eolbyte)
+ if (c == dfa->syntax.eolbyte)
return CTX_NEWLINE;
if (unibyte_word_constituent (c))
return CTX_LETTER;
@@ -752,13 +758,13 @@ char_context (unsigned char c)
/* Entry point to set syntax options. */
void
-dfasyntax (reg_syntax_t bits, bool fold, unsigned char eol)
+dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold, unsigned char eol)
{
int i;
- syntax_bits_set = true;
- syntax_bits = bits;
- case_fold = fold;
- eolbyte = eol;
+ dfa->syntax.syntax_bits_set = true;
+ dfa->syntax.syntax_bits = bits;
+ dfa->syntax.case_fold = fold;
+ dfa->syntax.eolbyte = eol;
for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
{
@@ -769,21 +775,21 @@ dfasyntax (reg_syntax_t bits, bool fold, unsigned char
eol)
mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF;
/* Now that mbrtowc_cache[uc] is set, use it to calculate sbit. */
- sbit[uc] = char_context (uc);
- switch (sbit[uc])
+ dfa->syntax.sbit[uc] = char_context (dfa, uc);
+ switch (dfa->syntax.sbit[uc])
{
case CTX_LETTER:
- setbit (uc, letters);
+ setbit (uc, dfa->syntax.letters);
break;
case CTX_NEWLINE:
- setbit (uc, newline);
+ setbit (uc, dfa->syntax.newline);
break;
}
/* POSIX requires that the five bytes in "\n\r./" (including the
terminating NUL) cannot occur inside a multibyte character. */
- never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80
- : strchr ("\n\r./", uc) != NULL);
+ dfa->syntax.never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80
+ : strchr ("\n\r./", uc) != NULL);
}
}
@@ -1062,7 +1068,7 @@ parse_bracket_exp (struct dfa *dfa)
{
FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
- if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES))
+ if ((c1 == ':' && (dfa->syntax.syntax_bits & RE_CHAR_CLASSES))
|| c1 == '.' || c1 == '=')
{
enum { MAX_BRACKET_STRING_LEN = 32 };
@@ -1091,8 +1097,9 @@ parse_bracket_exp (struct dfa *dfa)
worry about that possibility. */
{
char const *class
- = (case_fold && (STREQ (str, "upper")
- || STREQ (str, "lower")) ? "alpha" : str);
+ = (dfa->syntax.case_fold && (STREQ (str, "upper")
+ || STREQ (str, "lower")) ?
+ "alpha" : str);
const struct dfa_ctype *pred = find_pred (class);
if (!pred)
dfaerror (_("invalid character class"));
@@ -1118,7 +1125,7 @@ parse_bracket_exp (struct dfa *dfa)
are already set up. */
}
- if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ if (c == '\\' && (dfa->syntax.syntax_bits &
RE_BACKSLASH_ESCAPE_IN_LISTS))
FETCH_WC (dfa, c, wc, _("unbalanced ["));
if (c1 == NOTCHAR)
@@ -1147,7 +1154,8 @@ parse_bracket_exp (struct dfa *dfa)
}
else
{
- if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ if (c2 == '\\' && (dfa->syntax.syntax_bits
+ & RE_BACKSLASH_ESCAPE_IN_LISTS))
FETCH_WC (dfa, c2, wc2, _("unbalanced ["));
colon_warning_state |= 8;
@@ -1163,7 +1171,7 @@ parse_bracket_exp (struct dfa *dfa)
int ci;
for (ci = c; ci <= c2; ci++)
setbit (ci, ccl);
- if (case_fold)
+ if (dfa->syntax.case_fold)
{
int uc = toupper (c);
int uc2 = toupper (c2);
@@ -1187,7 +1195,7 @@ parse_bracket_exp (struct dfa *dfa)
if (!dfa->multibyte)
{
- if (case_fold)
+ if (dfa->syntax.case_fold)
setbit_case_fold_c (c, ccl);
else
setbit (c, ccl);
@@ -1200,7 +1208,7 @@ parse_bracket_exp (struct dfa *dfa)
{
wchar_t folded[CASE_FOLDED_BUFSIZE + 1];
unsigned int i;
- unsigned int n = (case_fold
+ unsigned int n = (dfa->syntax.case_fold
? case_folded_counterparts (wc, folded + 1) + 1
: 1);
folded[0] = wc;
@@ -1233,7 +1241,7 @@ parse_bracket_exp (struct dfa *dfa)
{
assert (!dfa->multibyte);
notset (ccl);
- if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
+ if (dfa->syntax.syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
clrbit ('\n', ccl);
}
@@ -1285,7 +1293,7 @@ lex (struct dfa *dfa)
case '^':
if (backslash)
goto normal_char;
- if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
|| dfa->lexstate.lasttok == END || dfa->lexstate.lasttok ==
LPAREN
|| dfa->lexstate.lasttok == OR)
return dfa->lexstate.lasttok = BEGLINE;
@@ -1294,17 +1302,17 @@ lex (struct dfa *dfa)
case '$':
if (backslash)
goto normal_char;
- if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
|| dfa->lexstate.lexleft == 0
- || (syntax_bits & RE_NO_BK_PARENS
+ || (dfa->syntax.syntax_bits & RE_NO_BK_PARENS
? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == ')'
: dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] ==
'\\'
&& dfa->lexstate.lexptr[1] == ')')
- || (syntax_bits & RE_NO_BK_VBAR
+ || (dfa->syntax.syntax_bits & RE_NO_BK_VBAR
? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == '|'
: dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] ==
'\\'
&& dfa->lexstate.lexptr[1] == '|')
- || ((syntax_bits & RE_NEWLINE_ALT)
+ || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT)
&& dfa->lexstate.lexleft > 0
&& *dfa->lexstate.lexptr == '\n'))
return dfa->lexstate.lasttok = ENDLINE;
@@ -1319,7 +1327,7 @@ lex (struct dfa *dfa)
case '7':
case '8':
case '9':
- if (backslash && !(syntax_bits & RE_NO_BK_REFS))
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS))
{
dfa->lexstate.laststart = false;
return dfa->lexstate.lasttok = BACKREF;
@@ -1327,7 +1335,7 @@ lex (struct dfa *dfa)
goto normal_char;
case '`':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
{
/* FIXME: should be beginning of string */
return dfa->lexstate.lasttok = BEGLINE;
@@ -1335,7 +1343,7 @@ lex (struct dfa *dfa)
goto normal_char;
case '\'':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
{
/* FIXME: should be end of string */
return dfa->lexstate.lasttok = ENDLINE;
@@ -1343,56 +1351,60 @@ lex (struct dfa *dfa)
goto normal_char;
case '<':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
return dfa->lexstate.lasttok = BEGWORD;
goto normal_char;
case '>':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
return dfa->lexstate.lasttok = ENDWORD;
goto normal_char;
case 'b':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
return dfa->lexstate.lasttok = LIMWORD;
goto normal_char;
case 'B':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
return dfa->lexstate.lasttok = NOTLIMWORD;
goto normal_char;
case '?':
- if (syntax_bits & RE_LIMITED_OPS)
+ if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
goto normal_char;
- if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
goto normal_char;
- if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && dfa->lexstate.laststart)
+ if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+ && dfa->lexstate.laststart)
goto normal_char;
return dfa->lexstate.lasttok = QMARK;
case '*':
if (backslash)
goto normal_char;
- if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && dfa->lexstate.laststart)
+ if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+ && dfa->lexstate.laststart)
goto normal_char;
return dfa->lexstate.lasttok = STAR;
case '+':
- if (syntax_bits & RE_LIMITED_OPS)
+ if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
goto normal_char;
- if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
goto normal_char;
- if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && dfa->lexstate.laststart)
+ if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+ && dfa->lexstate.laststart)
goto normal_char;
return dfa->lexstate.lasttok = PLUS;
case '{':
- if (!(syntax_bits & RE_INTERVALS))
+ if (!(dfa->syntax.syntax_bits & RE_INTERVALS))
goto normal_char;
- if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0))
goto normal_char;
- if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && dfa->lexstate.laststart)
+ if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+ && dfa->lexstate.laststart)
goto normal_char;
/* Cases:
@@ -1439,7 +1451,7 @@ lex (struct dfa *dfa)
&& (dfa->lexstate.maxrep < 0
|| dfa->lexstate.minrep <= dfa->lexstate.maxrep)))
{
- if (syntax_bits & RE_INVALID_INTERVAL_ORD)
+ if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD)
goto normal_char;
dfaerror (_("invalid content of \\{\\}"));
}
@@ -1452,32 +1464,32 @@ lex (struct dfa *dfa)
return dfa->lexstate.lasttok = REPMN;
case '|':
- if (syntax_bits & RE_LIMITED_OPS)
+ if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
goto normal_char;
- if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0))
goto normal_char;
dfa->lexstate.laststart = true;
return dfa->lexstate.lasttok = OR;
case '\n':
- if (syntax_bits & RE_LIMITED_OPS
- || backslash || !(syntax_bits & RE_NEWLINE_ALT))
+ if (dfa->syntax.syntax_bits & RE_LIMITED_OPS
+ || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT))
goto normal_char;
dfa->lexstate.laststart = true;
return dfa->lexstate.lasttok = OR;
case '(':
- if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
goto normal_char;
++dfa->lexstate.parens;
dfa->lexstate.laststart = true;
return dfa->lexstate.lasttok = LPAREN;
case ')':
- if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
goto normal_char;
if (dfa->lexstate.parens == 0
- && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_char;
--dfa->lexstate.parens;
dfa->lexstate.laststart = false;
@@ -1495,16 +1507,16 @@ lex (struct dfa *dfa)
}
zeroset (ccl);
notset (ccl);
- if (!(syntax_bits & RE_DOT_NEWLINE))
+ if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
clrbit ('\n', ccl);
- if (syntax_bits & RE_DOT_NOT_NULL)
+ if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', ccl);
dfa->lexstate.laststart = false;
return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa, ccl);
case 's':
case 'S':
- if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+ if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
if (!dfa->multibyte)
{
@@ -1536,7 +1548,7 @@ lex (struct dfa *dfa)
case 'w':
case 'W':
- if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+ if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
if (!dfa->multibyte)
@@ -1581,7 +1593,7 @@ lex (struct dfa *dfa)
if (dfa->multibyte)
return dfa->lexstate.lasttok = WCHAR;
- if (case_fold && isalpha (c))
+ if (dfa->syntax.case_fold && isalpha (c))
{
zeroset (ccl);
setbit_case_fold_c (c, ccl);
@@ -1741,9 +1753,9 @@ add_utf8_anychar (struct dfa *dfa)
copyset (utf8_classes[i], c);
if (i == 1)
{
- if (!(syntax_bits & RE_DOT_NEWLINE))
+ if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
clrbit ('\n', c);
- if (syntax_bits & RE_DOT_NOT_NULL)
+ if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', c);
}
dfa->utf8_anychar_classes[i] = CSET + dfa_charclass_index (dfa, c);
@@ -1815,7 +1827,7 @@ atom (struct dfa *dfa)
{
addtok_wc (dfa, dfa->lexstate.wctok);
- if (case_fold)
+ if (dfa->syntax.case_fold)
{
wchar_t folded[CASE_FOLDED_BUFSIZE];
unsigned int i, n = case_folded_counterparts
(dfa->lexstate.wctok,
@@ -1985,7 +1997,7 @@ dfaparse (char const *s, size_t len, struct dfa *d)
memset (&d->mbs, 0, sizeof d->mbs);
}
- if (!syntax_bits_set)
+ if (!d->syntax.syntax_bits_set)
dfaerror (_("no syntax specified"));
d->parsestate.tok = lex (d);
@@ -2271,19 +2283,19 @@ epsclosure (position_set *s, struct dfa const *d, char
*visited)
character included in C. */
static int
-charclass_context (charclass c)
+charclass_context (struct dfa *dfa, charclass c)
{
int context = 0;
unsigned int j;
- if (tstbit (eolbyte, c))
+ if (tstbit (dfa->syntax.eolbyte, c))
context |= CTX_NEWLINE;
for (j = 0; j < CHARCLASS_WORDS; ++j)
{
- if (c[j] & letters[j])
+ if (c[j] & dfa->syntax.letters[j])
context |= CTX_LETTER;
- if (c[j] & ~(letters[j] | newline[j]))
+ if (c[j] & ~(dfa->syntax.letters[j] | dfa->syntax.newline[j]))
context |= CTX_NONE;
}
@@ -2678,15 +2690,15 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_NEWLINE))
for (j = 0; j < CHARCLASS_WORDS; ++j)
- matches[j] &= ~newline[j];
+ matches[j] &= ~d->syntax.newline[j];
if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_LETTER))
for (j = 0; j < CHARCLASS_WORDS; ++j)
- matches[j] &= ~letters[j];
+ matches[j] &= ~d->syntax.letters[j];
if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_NONE))
for (j = 0; j < CHARCLASS_WORDS; ++j)
- matches[j] &= letters[j] | newline[j];
+ matches[j] &= d->syntax.letters[j] | d->syntax.newline[j];
/* If there are no characters left, there's no point in going on. */
for (j = 0; j < CHARCLASS_WORDS && !matches[j]; ++j)
@@ -2792,7 +2804,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
for (i = 0; i < NOTCHAR; ++i)
trans[i] = unibyte_word_constituent (i) ? state_letter : state;
- trans[eolbyte] = state_newline;
+ trans[d->syntax.eolbyte] = state_newline;
}
else
for (i = 0; i < NOTCHAR; ++i)
@@ -2848,7 +2860,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
}
/* Find out if the new state will want any context information. */
- possible_contexts = charclass_context (labels[i]);
+ possible_contexts = charclass_context (d, labels[i]);
separate_contexts = state_separate_contexts (&follows);
/* Find the state(s) corresponding to the union of the follows. */
@@ -2895,7 +2907,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
{
int c = j * CHARCLASS_WORD_BITS + k;
- if (c == eolbyte)
+ if (c == d->syntax.eolbyte)
trans[c] = state_newline;
else if (unibyte_word_constituent (c))
trans[c] = state_letter;
@@ -3021,8 +3033,8 @@ build_state (state_num s, struct dfa *d)
/* Keep the newline transition in a special place so we can use it as
a sentinel. */
- d->newlines[s] = trans[eolbyte];
- trans[eolbyte] = -1;
+ d->newlines[s] = trans[d->syntax.eolbyte];
+ trans[d->syntax.eolbyte] = -1;
if (ACCEPTING (s, *d))
d->fails[s] = trans;
@@ -3041,7 +3053,7 @@ transit_state_singlebyte (struct dfa *d, state_num s,
unsigned char const **pp)
{
state_num *t;
- if (**pp == eolbyte)
+ if (**pp == d->syntax.eolbyte)
{
/* S is always an initial state in transit_state, so the
transition table for the state must have been built already. */
@@ -3084,7 +3096,7 @@ transit_state (struct dfa *d, state_num s, unsigned char
const **pp,
size_t i, j;
int mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
- int context = wc == eolbyte ? CTX_NEWLINE : CTX_NONE;
+ int context = wc == d->syntax.eolbyte ? CTX_NEWLINE : CTX_NONE;
bool context_newline = context == CTX_NEWLINE;
/* This state has some operators which can match a multibyte character. */
@@ -3202,7 +3214,7 @@ skip_remains_mb (struct dfa *d, unsigned char const *p,
unsigned char const *mbp, char const *end, wint_t *wcp)
{
wint_t wc = WEOF;
- if (never_trail[*p])
+ if (d->syntax.never_trail[*p])
return p;
while (mbp < p)
mbp += mbs_to_wchar (&wc, (char const *) mbp,
@@ -3240,7 +3252,7 @@ dfaexec_main (struct dfa *d, char const *begin, char
*end, bool allow_nl,
unsigned char const *p, *mbp; /* Current input character. */
state_num **trans, *t; /* Copy of d->trans so it can be optimized
into a register. */
- unsigned char eol = eolbyte; /* Likewise for eolbyte. */
+ unsigned char eol = d->syntax.eolbyte; /* Likewise for eolbyte. */
unsigned char saved_end;
size_t nlcount = 0;
@@ -3307,8 +3319,8 @@ dfaexec_main (struct dfa *d, char const *begin, char
*end, bool allow_nl,
}
if (d->states[s].mbps.nelem == 0 || (*p == eol && !allow_nl)
- || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
- || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL))
+ || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE))
+ || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL))
|| (char *) p >= end)
{
/* If an input character does not match ANYCHAR, do it
@@ -3371,14 +3383,14 @@ dfaexec_main (struct dfa *d, char const *begin, char
*end, bool allow_nl,
}
else if (d->fails[s])
{
- if (d->success[s] & sbit[*p])
+ if (d->success[s] & d->syntax.sbit[*p])
goto done;
s1 = s;
if (!multibyte || d->states[s].mbps.nelem == 0
|| (*p == eol && !allow_nl)
- || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
- || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL))
+ || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE))
+ || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL))
|| (char *) p >= end)
{
/* If a input character does not match ANYCHAR, do it
@@ -3480,18 +3492,6 @@ free_mbdata (struct dfa *d)
}
}
-/* Initialize the components of a dfa that the other routines don't
- initialize for themselves. */
-static void
-dfainit (struct dfa *d)
-{
- memset (d, 0, sizeof *d);
- d->multibyte = MB_CUR_MAX > 1;
- d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
- d->fast = !d->multibyte;
- d->lexstate.cur_mb_len = 1;
-}
-
/* Return true if every construct in D is supported by this DFA matcher. */
static bool _GL_ATTRIBUTE_PURE
dfa_supported (struct dfa const *d)
@@ -3642,7 +3642,6 @@ dfassbuild (struct dfa *d)
void
dfacomp (char const *s, size_t len, struct dfa *d, bool searchflag)
{
- dfainit (d);
dfaparse (s, len, d);
dfassbuild (d);
@@ -3958,7 +3957,7 @@ dfamust (struct dfa const *d)
bool endline = false;
bool need_begline = false;
bool need_endline = false;
- bool case_fold_unibyte = case_fold && MB_CUR_MAX == 1;
+ bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1;
for (ri = 0; ri < d->tindex; ++ri)
{
@@ -4194,7 +4193,12 @@ dfamustfree (struct dfamust *dm)
struct dfa *
dfaalloc (void)
{
- return xmalloc (sizeof (struct dfa));
+ struct dfa *d = xcalloc (1, sizeof (struct dfa));
+ d->multibyte = MB_CUR_MAX > 1;
+ d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
+ d->fast = !d->multibyte;
+ d->lexstate.cur_mb_len = 1;
+ return d;
}
/* vim:set shiftwidth=2: */
diff --git a/src/dfa.h b/src/dfa.h
index 60da0e4..014ae96 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -50,10 +50,10 @@ extern struct dfamust *dfamust (struct dfa const *);
/* Free the storage held by the components of a struct dfamust. */
extern void dfamustfree (struct dfamust *);
-/* dfasyntax() takes three arguments; the first sets the syntax bits described
- earlier in this file, the second sets the case-folding flag, and the
- third specifies the line terminator. */
-extern void dfasyntax (reg_syntax_t, bool, unsigned char);
+/* dfasyntax() takes four arguments; the first is the dfa to operate on, the
+ second sets the syntax bits described earlier in this file, the third sets
+ the case-folding flag, and the fourth specifies the line terminator. */
+extern void dfasyntax (struct dfa *, reg_syntax_t, bool, unsigned char);
/* Compile the given string of the given length into the given struct dfa.
Final argument is a flag specifying whether to build a searching or an
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 222232c..3dbf76b 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -123,10 +123,12 @@ GEAcompile (char const *pattern, size_t size,
reg_syntax_t syntax_bits)
size_t total = size;
char *motif;
+ dfa = dfaalloc ();
+
if (match_icase)
syntax_bits |= RE_ICASE;
re_set_syntax (syntax_bits);
- dfasyntax (syntax_bits, match_icase, eolbyte);
+ dfasyntax (dfa, syntax_bits, match_icase, eolbyte);
/* For GNU regex, pass the patterns separately to detect errors like
"[\nallo\n]\n", where the patterns are "[", "allo" and "]", and
@@ -206,7 +208,6 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t
syntax_bits)
else
motif = NULL;
- dfa = dfaalloc ();
dfacomp (pattern, size, dfa, 1);
kwsmusts ();
diff --git a/tests/dfa-match-aux.c b/tests/dfa-match-aux.c
index af933ff..25b0535 100644
--- a/tests/dfa-match-aux.c
+++ b/tests/dfa-match-aux.c
@@ -54,8 +54,8 @@ main (int argc, char **argv)
setlocale (LC_ALL, "");
- dfasyntax (RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n');
dfa = dfaalloc ();
+ dfasyntax (dfa, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n');
dfacomp (argv[1], strlen (argv[1]), dfa, 0);
beg = argv[2];
--
2.8.1
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, (continued)
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Norihiro Tanaka, 2016/08/19
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Zev Weiss, 2016/08/19
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Norihiro Tanaka, 2016/08/19
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Norihiro Tanaka, 2016/08/19
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Zev Weiss, 2016/08/19
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Norihiro Tanaka, 2016/08/20
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Zev Weiss, 2016/08/20
bug#24259: [PATCH 3/6] dfa: thread-safety: move parser state into struct dfa, Zev Weiss, 2016/08/18
bug#24259: [PATCH 5/6] dfa: thread-safety: eliminate static local variables, Zev Weiss, 2016/08/18
bug#24259: [PATCH 2/6] dfa: thread-safety: move lexer state into struct dfa, Zev Weiss, 2016/08/18
bug#24259: [PATCH 4/6] dfa: thread-safety: move regex syntax configuration into struct dfa,
Zev Weiss <=
bug#24259: [PATCH 6/6] dfa: thread-safety: initialize mbrtowc_cache in dfa_init(), Zev Weiss, 2016/08/18
bug#24259: [PATCH 0/6] dfa: thread safety, Jim Meyering, 2016/08/20
bug#24259: [PATCH 0/6] dfa: thread safety, Paul Eggert, 2016/08/23