grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.25-78-g3c381d0


From: Paul Eggert
Subject: grep branch, master, updated. v2.25-78-g3c381d0
Date: Fri, 2 Sep 2016 22:29:07 +0000 (UTC)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  3c381d05ed984f756f45d21776670fa74e1687ca (commit)
      from  ad468bbe3df027f29ecb236283084fb60b734f68 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=3c381d05ed984f756f45d21776670fa74e1687ca


commit 3c381d05ed984f756f45d21776670fa74e1687ca
Author: Paul Eggert <address@hidden>
Date:   Fri Sep 2 15:27:12 2016 -0700

    dfa: new option for anchored searches
    
    This follows up on a suggestion by Norihiro Tanaka (Bug#24262).
    * src/dfa.c (struct regex_syntax): New member 'anchor'.
    (char_context): Use it.
    (dfasyntax): Change signature to specify it, along with the old
    FOLD and EOL args, as a single DFAOPTS arg.  All uses changed.
    * src/dfa.h (DFA_ANCHOR, DFA_CASE_FOLD, DFA_EOL_NUL): New constants
    for dfasyntax new last arg.

diff --git a/src/dfa.c b/src/dfa.c
index 4cbaa75..ff3721c 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -335,6 +335,10 @@ struct regex_syntax
   /* Flag for case-folding letters into sets.  */
   bool case_fold;
 
+  /* True if ^ and $ match only the start and end of data, and do not match
+     end-of-line within data.  */
+  bool anchor;
+
   /* End-of-line byte in data.  */
   unsigned char eolbyte;
 
@@ -754,7 +758,7 @@ unibyte_word_constituent (struct dfa const *dfa, unsigned 
char c)
 static int
 char_context (struct dfa const *dfa, unsigned char c)
 {
-  if (c == dfa->syntax.eolbyte)
+  if (c == dfa->syntax.eolbyte && !dfa->syntax.anchor)
     return CTX_NEWLINE;
   if (unibyte_word_constituent (dfa, c))
     return CTX_LETTER;
@@ -3987,7 +3991,7 @@ dfaalloc (void)
 /* Initialize DFA.  */
 void
 dfasyntax (struct dfa *dfa, struct localeinfo const *linfo,
-           reg_syntax_t bits, bool fold, unsigned char eol)
+           reg_syntax_t bits, int dfaopts)
 {
   int i;
   memset (dfa, 0, offsetof (struct dfa, dfaexec));
@@ -4000,9 +4004,10 @@ dfasyntax (struct dfa *dfa, struct localeinfo const 
*linfo,
   dfa->canychar = -1;
   dfa->lex.cur_mb_len = 1;
   dfa->syntax.syntax_bits_set = true;
+  dfa->syntax.case_fold = (dfaopts & DFA_CASE_FOLD) != 0;
+  dfa->syntax.anchor = (dfaopts & DFA_ANCHOR) != 0;
+  dfa->syntax.eolbyte = dfaopts & DFA_EOL_NUL ? '\0' : '\n';
   dfa->syntax.syntax_bits = bits;
-  dfa->syntax.case_fold = fold;
-  dfa->syntax.eolbyte = eol;
 
   for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
     {
diff --git a/src/dfa.h b/src/dfa.h
index 31baf7a..b8c44cc 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -46,15 +46,29 @@ struct dfa;
    calling dfafree() on it. */
 extern struct dfa *dfaalloc (void) _GL_ATTRIBUTE_MALLOC;
 
+/* DFA options that can be ORed together, for dfasyntax's 4th arg.  */
+enum
+  {
+    /* ^ and $ match only the start and end of data, and do not match
+       end-of-line within data.  This is always false for grep, but
+       possibly true for other apps.  */
+    DFA_ANCHOR = 1 << 0,
+
+    /* Ignore case while matching.  */
+    DFA_CASE_FOLD = 1 << 1,
+
+    /* '\0' in data is end-of-line, instead of the traditional '\n'.  */
+    DFA_EOL_NUL = 1 << 2
+  };
+
 /* Initialize or reinitialize a DFA.  This must be called before
    any of the routines below.  The arguments are:
    1. The DFA to operate on.
    2. Information about the current locale.
-   3. The syntax bits described earlier in this file.
-   4. The case-folding flag.
-   5. The line terminator.  */
+   3. Syntax bits described in regex.h.
+   4. Additional DFA options described above.  */
 extern void dfasyntax (struct dfa *, struct localeinfo const *,
-                       reg_syntax_t, bool, unsigned char);
+                       reg_syntax_t, int);
 
 /* Build and return the struct dfamust from the given struct dfa. */
 extern struct dfamust *dfamust (struct dfa const *);
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 0838e1f..96be58f 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -123,7 +123,9 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t 
syntax_bits)
   if (match_icase)
     syntax_bits |= RE_ICASE;
   re_set_syntax (syntax_bits);
-  dfasyntax (dfa, &localeinfo, syntax_bits, match_icase, eolbyte);
+  int dfaopts = ((match_icase ? DFA_CASE_FOLD : 0)
+                 | (eolbyte ? 0 : DFA_EOL_NUL));
+  dfasyntax (dfa, &localeinfo, syntax_bits, dfaopts);
 
   /* For GNU regex, pass the patterns separately to detect errors like
      "[\nallo\n]\n", where the patterns are "[", "allo" and "]", and
diff --git a/tests/dfa-match-aux.c b/tests/dfa-match-aux.c
index e001b7d..070089c 100644
--- a/tests/dfa-match-aux.c
+++ b/tests/dfa-match-aux.c
@@ -58,7 +58,7 @@ main (int argc, char **argv)
   init_localeinfo (&localeinfo);
 
   dfa = dfaalloc ();
-  dfasyntax (dfa, &localeinfo, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0, '\n');
+  dfasyntax (dfa, &localeinfo, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES, 0);
   dfacomp (argv[1], strlen (argv[1]), dfa, 0);
 
   beg = argv[2];

-----------------------------------------------------------------------

Summary of changes:
 src/dfa.c             |   13 +++++++++----
 src/dfa.h             |   22 ++++++++++++++++++----
 src/dfasearch.c       |    4 +++-
 tests/dfa-match-aux.c |    2 +-
 4 files changed, 31 insertions(+), 10 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]