grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.18-63-g34e17a7


From: Paul Eggert
Subject: grep branch, master, updated. v2.18-63-g34e17a7
Date: Sat, 12 Apr 2014 01:28:44 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  34e17a7a01ac1d85b330444dbc8979d07b8dd9d0 (commit)
       via  441775e9bbec9359ceac237dd908ef9689f76e61 (commit)
      from  5c92a54ec36f176854f0b0cca83b5ff224f2d8e8 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=34e17a7a01ac1d85b330444dbc8979d07b8dd9d0


commit 34e17a7a01ac1d85b330444dbc8979d07b8dd9d0
Author: Paul Eggert <address@hidden>
Date:   Wed Apr 9 19:59:12 2014 -0700

    grep: improvements for the open-CSET patch
    
    * src/dfa.c (dfamust): Simplify by removing some duplicate code.
    Optimize patterns like [aaa] even when not case-folding.
    Avoid an unnecessary copy of the charclass.

diff --git a/src/dfa.c b/src/dfa.c
index 6b4169d..2b6c5d6 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -4036,7 +4036,6 @@ dfamust (struct dfa *d)
   size_t ri;
   size_t i;
   bool exact;
-  token t;
   static must must0;
   struct dfamust *dm;
   static char empty_string[] = "";
@@ -4067,11 +4066,18 @@ dfamust (struct dfa *d)
 #endif
   for (ri = 0; ri < d->tindex; ++ri)
     {
-      switch (t = d->tokens[ri])
+      token t = d->tokens[ri];
+      switch (t)
         {
         case LPAREN:
         case RPAREN:
           assert (!"neither LPAREN nor RPAREN may appear here");
+
+        case STAR:
+        case QMARK:
+          assert (musts < mp);
+          --mp;
+          /* Fall through.  */
         case EMPTY:
         case BEGLINE:
         case ENDLINE:
@@ -4080,14 +4086,11 @@ dfamust (struct dfa *d)
         case LIMWORD:
         case NOTLIMWORD:
         case BACKREF:
+        case ANYCHAR:
+        case MBCSET:
           resetmust (mp);
           break;
-        case STAR:
-        case QMARK:
-          assert (musts < mp);
-          --mp;
-          resetmust (mp);
-          break;
+
         case OR:
           assert (&musts[2] <= mp);
           {
@@ -4126,11 +4129,13 @@ dfamust (struct dfa *d)
             lmp->in = new;
           }
           break;
+
         case PLUS:
           assert (musts < mp);
           --mp;
           mp->is[0] = '\0';
           break;
+
         case END:
           assert (mp == &musts[1]);
           for (i = 0; musts[0].in[i] != NULL; ++i)
@@ -4139,6 +4144,7 @@ dfamust (struct dfa *d)
           if (STREQ (result, musts[0].is))
             exact = true;
           goto done;
+
         case CAT:
           assert (&musts[2] <= mp);
           {
@@ -4188,62 +4194,41 @@ dfamust (struct dfa *d)
               lmp->is[0] = '\0';
           }
           break;
+
+        case '\0':
+          /* Not on *my* shift.  */
+          goto done;
+
         default:
-          if (t < END)
-            {
-              assert (!"oops! t >= END");
-            }
-          else if (t == '\0')
-            {
-              /* not on *my* shift */
-              goto done;
-            }
-          else if (t >= CSET)
+          resetmust (mp);
+          if (CSET <= t)
             {
-              charclass ccl;
+              /* If T is a singleton, or if case-folding in a unibyte
+                 locale and T's members all case-fold to the same char,
+                 convert T to one of its members.  Otherwise, do
+                 nothing further with T.  */
+              charclass *ccl = &d->charclasses[t - CSET];
               int j;
-              copyset (d->charclasses[t - CSET], ccl);
-              for (j = 0; j < NOTCHAR; ++j)
-                if (tstbit (j, ccl))
+              for (j = 0; j < NOTCHAR; j++)
+                if (tstbit (j, *ccl))
+                  break;
+              if (! (j < NOTCHAR))
+                break;
+              t = j;
+              while (++j < NOTCHAR)
+                if (tstbit (j, *ccl)
+                    && ! (case_fold && MB_CUR_MAX == 1
+                          && toupper (j) == toupper (t)))
                   break;
               if (j < NOTCHAR)
-                {
-                  int c = (case_fold && MB_CUR_MAX == 1) ? toupper (j) : j;
-                  for (; j < NOTCHAR; j++)
-                    if (tstbit (j, ccl)
-                        && (!(case_fold && MB_CUR_MAX == 1) || c != toupper 
(j)))
-                      break;
-                  if (j < NOTCHAR)
-                    resetmust (mp);
-                  else
-                    {
-                      resetmust (mp);
-                      mp->is[0] = mp->left[0] = mp->right[0] = c;
-                      mp->is[1] = mp->left[1] = mp->right[1] = '\0';
-                      mp->in = enlist (mp->in, mp->is, (size_t) 1);
-                      if (mp->in == NULL)
-                        goto done;
-                    }
-                }
-              else
-                resetmust (mp);
-            }
-          else if (t == ANYCHAR || t == MBCSET)
-            {
-              /* easy enough */
-              resetmust (mp);
-            }
-          else
-            {
-              /* plain character */
-              resetmust (mp);
-              mp->is[0] = mp->left[0] = mp->right[0] =
-                (case_fold && MB_CUR_MAX == 1) ? toupper (t) : t;
-              mp->is[1] = mp->left[1] = mp->right[1] = '\0';
-              mp->in = enlist (mp->in, mp->is, (size_t) 1);
-              if (mp->in == NULL)
-                goto done;
+                break;
             }
+          mp->is[0] = mp->left[0] = mp->right[0]
+            = case_fold && MB_CUR_MAX == 1 ? toupper (t) : t;
+          mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+          mp->in = enlist (mp->in, mp->is, 1);
+          if (mp->in == NULL)
+            goto done;
           break;
         }
 #ifdef DEBUG

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=441775e9bbec9359ceac237dd908ef9689f76e61


commit 34e17a7a01ac1d85b330444dbc8979d07b8dd9d0
Author: Paul Eggert <address@hidden>
Date:   Wed Apr 9 19:59:12 2014 -0700

    grep: improvements for the open-CSET patch
    
    * src/dfa.c (dfamust): Simplify by removing some duplicate code.
    Optimize patterns like [aaa] even when not case-folding.
    Avoid an unnecessary copy of the charclass.

diff --git a/src/dfa.c b/src/dfa.c
index 6b4169d..2b6c5d6 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -4036,7 +4036,6 @@ dfamust (struct dfa *d)
   size_t ri;
   size_t i;
   bool exact;
-  token t;
   static must must0;
   struct dfamust *dm;
   static char empty_string[] = "";
@@ -4067,11 +4066,18 @@ dfamust (struct dfa *d)
 #endif
   for (ri = 0; ri < d->tindex; ++ri)
     {
-      switch (t = d->tokens[ri])
+      token t = d->tokens[ri];
+      switch (t)
         {
         case LPAREN:
         case RPAREN:
           assert (!"neither LPAREN nor RPAREN may appear here");
+
+        case STAR:
+        case QMARK:
+          assert (musts < mp);
+          --mp;
+          /* Fall through.  */
         case EMPTY:
         case BEGLINE:
         case ENDLINE:
@@ -4080,14 +4086,11 @@ dfamust (struct dfa *d)
         case LIMWORD:
         case NOTLIMWORD:
         case BACKREF:
+        case ANYCHAR:
+        case MBCSET:
           resetmust (mp);
           break;
-        case STAR:
-        case QMARK:
-          assert (musts < mp);
-          --mp;
-          resetmust (mp);
-          break;
+
         case OR:
           assert (&musts[2] <= mp);
           {
@@ -4126,11 +4129,13 @@ dfamust (struct dfa *d)
             lmp->in = new;
           }
           break;
+
         case PLUS:
           assert (musts < mp);
           --mp;
           mp->is[0] = '\0';
           break;
+
         case END:
           assert (mp == &musts[1]);
           for (i = 0; musts[0].in[i] != NULL; ++i)
@@ -4139,6 +4144,7 @@ dfamust (struct dfa *d)
           if (STREQ (result, musts[0].is))
             exact = true;
           goto done;
+
         case CAT:
           assert (&musts[2] <= mp);
           {
@@ -4188,62 +4194,41 @@ dfamust (struct dfa *d)
               lmp->is[0] = '\0';
           }
           break;
+
+        case '\0':
+          /* Not on *my* shift.  */
+          goto done;
+
         default:
-          if (t < END)
-            {
-              assert (!"oops! t >= END");
-            }
-          else if (t == '\0')
-            {
-              /* not on *my* shift */
-              goto done;
-            }
-          else if (t >= CSET)
+          resetmust (mp);
+          if (CSET <= t)
             {
-              charclass ccl;
+              /* If T is a singleton, or if case-folding in a unibyte
+                 locale and T's members all case-fold to the same char,
+                 convert T to one of its members.  Otherwise, do
+                 nothing further with T.  */
+              charclass *ccl = &d->charclasses[t - CSET];
               int j;
-              copyset (d->charclasses[t - CSET], ccl);
-              for (j = 0; j < NOTCHAR; ++j)
-                if (tstbit (j, ccl))
+              for (j = 0; j < NOTCHAR; j++)
+                if (tstbit (j, *ccl))
+                  break;
+              if (! (j < NOTCHAR))
+                break;
+              t = j;
+              while (++j < NOTCHAR)
+                if (tstbit (j, *ccl)
+                    && ! (case_fold && MB_CUR_MAX == 1
+                          && toupper (j) == toupper (t)))
                   break;
               if (j < NOTCHAR)
-                {
-                  int c = (case_fold && MB_CUR_MAX == 1) ? toupper (j) : j;
-                  for (; j < NOTCHAR; j++)
-                    if (tstbit (j, ccl)
-                        && (!(case_fold && MB_CUR_MAX == 1) || c != toupper 
(j)))
-                      break;
-                  if (j < NOTCHAR)
-                    resetmust (mp);
-                  else
-                    {
-                      resetmust (mp);
-                      mp->is[0] = mp->left[0] = mp->right[0] = c;
-                      mp->is[1] = mp->left[1] = mp->right[1] = '\0';
-                      mp->in = enlist (mp->in, mp->is, (size_t) 1);
-                      if (mp->in == NULL)
-                        goto done;
-                    }
-                }
-              else
-                resetmust (mp);
-            }
-          else if (t == ANYCHAR || t == MBCSET)
-            {
-              /* easy enough */
-              resetmust (mp);
-            }
-          else
-            {
-              /* plain character */
-              resetmust (mp);
-              mp->is[0] = mp->left[0] = mp->right[0] =
-                (case_fold && MB_CUR_MAX == 1) ? toupper (t) : t;
-              mp->is[1] = mp->left[1] = mp->right[1] = '\0';
-              mp->in = enlist (mp->in, mp->is, (size_t) 1);
-              if (mp->in == NULL)
-                goto done;
+                break;
             }
+          mp->is[0] = mp->left[0] = mp->right[0]
+            = case_fold && MB_CUR_MAX == 1 ? toupper (t) : t;
+          mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+          mp->in = enlist (mp->in, mp->is, 1);
+          if (mp->in == NULL)
+            goto done;
           break;
         }
 #ifdef DEBUG

-----------------------------------------------------------------------

Summary of changes:
 src/dfa.c |   76 +++++++++++++++++++++++++++++++++++++------------------------
 1 files changed, 46 insertions(+), 30 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]