grep branch, master, updated. v2.10-68-g8b47c4c

grep-commit
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
grep branch, master, updated. v2.10-68-g8b47c4c

From:	Paolo Bonzini
Subject:	grep branch, master, updated. v2.10-68-g8b47c4c
Date:	Sun, 05 Feb 2012 16:35:32 +0000
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  8b47c4cf6556933f59226c234b0fe984f6c77dc7 (commit)
      from  ba63674a816d93ee4a69d0c78a34a7d9b391d33b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=8b47c4cf6556933f59226c234b0fe984f6c77dc7


commit 8b47c4cf6556933f59226c234b0fe984f6c77dc7
Author: Paolo Bonzini <address@hidden>
Date:   Tue Jan 3 11:22:09 2012 +0100

    dfa: introduce contexts for the values in d->success
    
    Also initialize all tables in a single place in dfasyntax.
    
    * src/dfa.c (CTX_NONE, CTX_LETTER, CTX_NEWLINE, char_context): New.
    (sbit, letters, newline): New.
    (dfasyntax): Fill them.
    (dfastate): Remove letters, newline, initialized.
    (build_state): Use CTX_* constants.
    (dfaexec): Remove sbit and sbit_init.

diff --git a/src/dfa.c b/src/dfa.c
index ac3fb18..53307ae 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -91,6 +91,13 @@ typedef int charclass[CHARCLASS_INTS];
    errors that the cast doesn't.  */
 static inline unsigned char to_uchar (char ch) { return ch; }
 
+/* Contexts tell us whether a character is a newline or a word constituent.
+   Word-constituent characters are those that satisfy iswalnum(), plus '_'.  */
+
+#define CTX_NONE       1
+#define CTX_LETTER     2
+#define CTX_NEWLINE    4
+
 /* Sometimes characters can only be matched depending on the surrounding
    context.  Such context decisions depend on what the previous character
    was, and the value of the current (lookahead) character.  Context
@@ -107,8 +114,6 @@ static inline unsigned char to_uchar (char ch) { return ch; 
}
    bit 1 - previous wasn't word-constituent, current is
    bit 0 - neither previous nor current is word-constituent
 
-   Word-constituent characters are those that satisfy isalnum().
-
    The macro SUCCEEDS_IN_CONTEXT determines whether a given constraint
    succeeds in a particular context.  Prevn is true if the previous character
    was a newline, currn is true if the lookahead character is a newline.
@@ -553,14 +558,62 @@ static int case_fold;
 /* End-of-line byte in data.  */
 static unsigned char eolbyte;
 
+/* Cache of char-context values.  */
+static int sbit[NOTCHAR];
+
+/* Set of characters considered letters. */
+static charclass letters;
+
+/* Set of characters that are newline. */
+static charclass newline;
+
+/* Add this to the test for whether a byte is word-constituent, since on
+   BSD-based systems, many values in the 128..255 range are classified as
+   alphabetic, while on glibc-based systems, they are not.  */
+#ifdef __GLIBC__
+# define is_valid_unibyte_character(c) 1
+#else
+# define is_valid_unibyte_character(c) (! (MBS_SUPPORT && btowc (c) == WEOF))
+#endif
+
+/* Return non-zero if C is a 'word-constituent' byte; zero otherwise.  */
+#define IS_WORD_CONSTITUENT(C) \
+  (is_valid_unibyte_character (C) && (isalnum (C) || (C) == '_'))
+
+static int
+char_context (unsigned char c)
+{
+  if (c == eolbyte || c == 0)
+    return CTX_NEWLINE;
+  if (IS_WORD_CONSTITUENT (c))
+    return CTX_LETTER;
+  return CTX_NONE;
+}
+
 /* Entry point to set syntax options. */
 void
 dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
 {
+  unsigned int i;
+
   syntax_bits_set = 1;
   syntax_bits = bits;
   case_fold = fold;
   eolbyte = eol;
+
+  for (i = 0; i < NOTCHAR; ++i)
+    {
+      sbit[i] = char_context (i);
+      switch (sbit[i])
+        {
+        case CTX_LETTER:
+          setbit (i, letters);
+          break;
+        case CTX_NEWLINE:
+          setbit (i, newline);
+          break;
+        }
+    }
 }
 
 /* Set a bit in the charclass for the given wchar_t.  Do nothing if WC
@@ -1073,19 +1126,6 @@ parse_bracket_exp (void)
   return CSET + charclass_index(ccl);
 }
 
-/* Add this to the test for whether a byte is word-constituent, since on
-   BSD-based systems, many values in the 128..255 range are classified as
-   alphabetic, while on glibc-based systems, they are not.  */
-#ifdef __GLIBC__
-# define is_valid_unibyte_character(c) 1
-#else
-# define is_valid_unibyte_character(c) (! (MBS_SUPPORT && btowc (c) == WEOF))
-#endif
-
-/* Return non-zero if C is a `word-constituent' byte; zero otherwise.  */
-#define IS_WORD_CONSTITUENT(C) \
-  (is_valid_unibyte_character(C) && (isalnum(C) || (C) == '_'))
-
 static token
 lex (void)
 {
@@ -2362,8 +2402,6 @@ dfastate (int s, struct dfa *d, int trans[])
   int intersectf;              /* True if intersect is nonempty. */
   charclass leftovers;         /* Stuff in the label that didn't match. */
   int leftoversf;              /* True if leftovers is nonempty. */
-  static charclass letters;    /* Set of characters considered letters. */
-  static charclass newline;    /* Set of characters that are newline. */
   position_set follows;                /* Union of the follows of some group. 
*/
   position_set tmp;            /* Temporary space for merging sets. */
   int state;                   /* New state. */
@@ -2371,23 +2409,12 @@ dfastate (int s, struct dfa *d, int trans[])
   int state_newline;           /* New state on a newline transition. */
   int wants_letter;            /* New state wants to know letter context. */
   int state_letter;            /* New state on a letter transition. */
-  static int initialized;      /* Flag for static initialization. */
   int next_isnt_1st_byte = 0;  /* Flag if we can't add state0.  */
   int i, j, k;
 
   MALLOC (grps, NOTCHAR);
   MALLOC (labels, NOTCHAR);
 
-  /* Initialize the set of letters, if necessary. */
-  if (! initialized)
-    {
-      initialized = 1;
-      for (i = 0; i < NOTCHAR; ++i)
-        if (IS_WORD_CONSTITUENT(i))
-          setbit(i, letters);
-      setbit(eolbyte, newline);
-    }
-
   zeroset(matches);
 
   for (i = 0; i < d->states[s].elems.nelem; ++i)
@@ -2672,13 +2699,13 @@ build_state (int s, struct dfa *d)
   d->success[s] = 0;
   if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 1, d->states[s].letter, 0,
       s, *d))
-    d->success[s] |= 4;
+    d->success[s] |= CTX_NEWLINE;
   if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 1,
       s, *d))
-    d->success[s] |= 2;
+    d->success[s] |= CTX_LETTER;
   if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 0,
       s, *d))
-    d->success[s] |= 1;
+    d->success[s] |= CTX_NONE;
 
   MALLOC(trans, NOTCHAR);
   dfastate(s, d, trans);
@@ -3226,18 +3253,6 @@ dfaexec (struct dfa *d, char const *begin, char *end,
                                    into a register. */
   unsigned char eol = eolbyte; /* Likewise for eolbyte.  */
   unsigned char saved_end;
-  static int sbit[NOTCHAR];    /* Table for anding with d->success. */
-  static int sbit_init;
-
-  if (! sbit_init)
-    {
-      unsigned int i;
-
-      sbit_init = 1;
-      for (i = 0; i < NOTCHAR; ++i)
-        sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
-      sbit[eol] = 4;
-    }
 
   if (! d->tralloc)
     build_state_zero(d);

-----------------------------------------------------------------------

Summary of changes:
 src/dfa.c |  101 +++++++++++++++++++++++++++++++++++--------------------------
 1 files changed, 58 insertions(+), 43 deletions(-)


hooks/post-receive
-- 
grep
[Prev in Thread]
Current Thread
[Next in Thread]
grep branch, master, updated. v2.10-68-g8b47c4c, Paolo Bonzini <=
Prev by Date: grep branch, master, updated. v2.10-67-gba63674
Next by Date: grep branch, master, updated. v2.10-69-g0828253
Previous by thread: grep branch, master, updated. v2.10-67-gba63674
Next by thread: grep branch, master, updated. v2.10-69-g0828253
Index(es):
- Date
- Thread