[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#24259: [PATCH 5/6] dfa: thread-safety: eliminate static local variab
From: |
Zev Weiss |
Subject: |
bug#24259: [PATCH 5/6] dfa: thread-safety: eliminate static local variables |
Date: |
Thu, 18 Aug 2016 05:50:18 -0500 |
* src/dfa.c: Replace utf8 and unibyte_c static local variables with
static globals initialized by a new function dfa_init() which must be
called before any other dfa*() functions.
(dfa_using_utf8): Rename using_utf8() to dfa_using_utf8() for
consistency with other exported functions.
* src/dfa.h (dfa_using_utf8): Rename using_utf8() to dfa_using_utf8();
also add _GL_ATTRIBUTE_PURE.
(dfa_init): New function.
* src/grep.c (main), tests/dfa-match-aux.c (main): Call dfa_init().
* src/dfasearch.c (EGexecute), src/kwsearch.c (Fexecute),
src/pcresearch.c (Pcompile): Replace using_utf8() with
dfa_using_utf8().
---
src/dfa.c | 62 +++++++++++++++++++++++++++------------------------
src/dfa.h | 5 ++++-
src/dfasearch.c | 2 +-
src/grep.c | 2 ++
src/kwsearch.c | 2 +-
src/pcresearch.c | 2 +-
tests/dfa-match-aux.c | 2 ++
7 files changed, 44 insertions(+), 33 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index ae1b340..970b51f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -756,6 +756,16 @@ char_context (struct dfa *dfa, unsigned char c)
return CTX_NONE;
}
+/* UTF-8 encoding allows some optimizations that we can't otherwise
+ assume in a multibyte encoding. */
+static bool using_utf8;
+
+bool
+dfa_using_utf8 (void)
+{
+ return using_utf8;
+}
+
/* Entry point to set syntax options. */
void
dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold, unsigned char eol)
@@ -788,7 +798,7 @@ dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold,
unsigned char eol)
/* POSIX requires that the five bytes in "\n\r./" (including the
terminating NUL) cannot occur inside a multibyte character. */
- dfa->syntax.never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80
+ dfa->syntax.never_trail[uc] = (using_utf8 ? (uc & 0xc0) != 0x80
: strchr ("\n\r./", uc) != NULL);
}
}
@@ -821,21 +831,21 @@ setbit_case_fold_c (int b, charclass c)
setbit (i, c);
}
+static void check_utf8 (void)
+{
+ wchar_t wc;
+ mbstate_t mbs = { 0 };
+ using_utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
+}
+static bool unibyte_c;
-/* UTF-8 encoding allows some optimizations that we can't otherwise
- assume in a multibyte encoding. */
-bool
-using_utf8 (void)
+static void check_unibyte_c (void)
{
- static int utf8 = -1;
- if (utf8 < 0)
- {
- wchar_t wc;
- mbstate_t mbs = { 0 };
- utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
- }
- return utf8;
+ char const *locale = setlocale (LC_ALL, NULL);
+ unibyte_c = (!locale
+ || STREQ (locale, "C")
+ || STREQ (locale, "POSIX"));
}
/* The current locale is known to be a unibyte locale
@@ -862,20 +872,7 @@ using_simple_locale (struct dfa *dfa)
&& '}' == 125 && '~' == 126)
};
- if (! native_c_charset || dfa->multibyte)
- return false;
- else
- {
- static int unibyte_c = -1;
- if (unibyte_c < 0)
- {
- char const *locale = setlocale (LC_ALL, NULL);
- unibyte_c = (!locale
- || STREQ (locale, "C")
- || STREQ (locale, "POSIX"));
- }
- return unibyte_c;
- }
+ return (!native_c_charset || dfa->multibyte) ? false : unibyte_c;
}
/* Fetch the next lexical input character. Set C (of type int) to the
@@ -1842,7 +1839,7 @@ atom (struct dfa *dfa)
dfa->parsestate.tok = lex (dfa);
}
- else if (dfa->parsestate.tok == ANYCHAR && using_utf8 ())
+ else if (dfa->parsestate.tok == ANYCHAR && using_utf8)
{
/* For UTF-8 expand the period to a series of CSETs that define a valid
UTF-8 character. This avoids using the slow multibyte path. I'm
@@ -3523,7 +3520,7 @@ dfaoptimize (struct dfa *d)
size_t i;
bool have_backref = false;
- if (!using_utf8 ())
+ if (!using_utf8)
return;
for (i = 0; i < d->tindex; ++i)
@@ -4201,4 +4198,11 @@ dfaalloc (void)
return d;
}
+void
+dfa_init (void)
+{
+ check_utf8 ();
+ check_unibyte_c ();
+}
+
/* vim:set shiftwidth=2: */
diff --git a/src/dfa.h b/src/dfa.h
index 014ae96..585390a 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -100,4 +100,7 @@ extern void dfawarn (const char *);
The user must supply a dfaerror. */
extern _Noreturn void dfaerror (const char *);
-extern bool using_utf8 (void);
+extern bool dfa_using_utf8 (void) _GL_ATTRIBUTE_PURE;
+
+/* This must be called before calling any of the above dfa*() functions. */
+extern void dfa_init (void);
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 3dbf76b..10c4f51 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -277,7 +277,7 @@ EGexecute (char *buf, size_t size, size_t *match_size,
if (exact_kwset_match)
{
- if (MB_CUR_MAX == 1 || using_utf8 ())
+ if (MB_CUR_MAX == 1 || dfa_using_utf8 ())
goto success;
if (mb_start < beg)
mb_start = beg;
diff --git a/src/grep.c b/src/grep.c
index a82da61..bd1c5cc 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2351,6 +2351,8 @@ main (int argc, char **argv)
textdomain (PACKAGE);
#endif
+ dfa_init ();
+
atexit (clean_up_stdout);
last_recursive = 0;
diff --git a/src/kwsearch.c b/src/kwsearch.c
index d2afa40..fb77280 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -93,7 +93,7 @@ Fexecute (char *buf, size_t size, size_t *match_size,
mb_check = longest = false;
else
{
- mb_check = MB_CUR_MAX > 1 && !using_utf8 ();
+ mb_check = MB_CUR_MAX > 1 && !dfa_using_utf8 ();
longest = mb_check || start_ptr || match_words;
}
diff --git a/src/pcresearch.c b/src/pcresearch.c
index f6e72b0..3f76603 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -114,7 +114,7 @@ Pcompile (char const *pattern, size_t size)
if (1 < MB_CUR_MAX)
{
- if (! using_utf8 ())
+ if (! dfa_using_utf8 ())
error (EXIT_TROUBLE, 0,
_("-P supports only unibyte and UTF-8 locales"));
multibyte_locale = true;
diff --git a/tests/dfa-match-aux.c b/tests/dfa-match-aux.c
index 25b0535..e651735 100644
--- a/tests/dfa-match-aux.c
+++ b/tests/dfa-match-aux.c
@@ -54,6 +54,8 @@ main (int argc, char **argv)
setlocale (LC_ALL, "");
+ dfa_init ();
+
dfa = dfaalloc ();
dfasyntax (dfa, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n');
dfacomp (argv[1], strlen (argv[1]), dfa, 0);
--
2.8.1
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, (continued)
bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Norihiro Tanaka, 2016/08/19
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Zev Weiss, 2016/08/19
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Norihiro Tanaka, 2016/08/19
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Norihiro Tanaka, 2016/08/19
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Zev Weiss, 2016/08/19
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Norihiro Tanaka, 2016/08/20
- bug#24260: [PATCH 1/6] dfa: thread-safety: remove 'dfa' global in dfa.c, Zev Weiss, 2016/08/20
bug#24259: [PATCH 3/6] dfa: thread-safety: move parser state into struct dfa, Zev Weiss, 2016/08/18
bug#24259: [PATCH 5/6] dfa: thread-safety: eliminate static local variables,
Zev Weiss <=
bug#24259: [PATCH 2/6] dfa: thread-safety: move lexer state into struct dfa, Zev Weiss, 2016/08/18
bug#24259: [PATCH 4/6] dfa: thread-safety: move regex syntax configuration into struct dfa, Zev Weiss, 2016/08/18
bug#24259: [PATCH 6/6] dfa: thread-safety: initialize mbrtowc_cache in dfa_init(), Zev Weiss, 2016/08/18
bug#24259: [PATCH 0/6] dfa: thread safety, Jim Meyering, 2016/08/20
bug#24259: [PATCH 0/6] dfa: thread safety, Paul Eggert, 2016/08/23