grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.18-89-gc7ea5ae


From: Paul Eggert
Subject: grep branch, master, updated. v2.18-89-gc7ea5ae
Date: Wed, 23 Apr 2014 04:09:35 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  c7ea5aea911b950b2398454ca89cce23cabd3a40 (commit)
       via  e2f3093e35da98b4cc1357ae63b5e0da26796128 (commit)
      from  ca7868cc27db3d9deafaa2e0ac5a2bb0aa8ef373 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=c7ea5aea911b950b2398454ca89cce23cabd3a40


commit c7ea5aea911b950b2398454ca89cce23cabd3a40
Author: Paul Eggert <address@hidden>
Date:   Mon Apr 21 17:15:56 2014 -0700

    kwset: simplify Boyer-Moore with unibyte -i
    
    This change doesn't significantly affect performance on my platform,
    and should make the code easier to maintain.
    * src/kwset.c (BM_DELTA2_SEARCH, LAST_SHIFT, TRANS):
    Remove these macros, in favor of ...
    (tr, bm_delta2_search): New functions.  All uses changed.
    The latter function is inline because this improves code size and
    runtime CPU slightly on x86-64 with gcc -O2 (GCC 4.9.0).
    (bmexec): Prefer tr when that's simpler.

diff --git a/src/kwset.c b/src/kwset.c
index d212d59..69b0a55 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -464,75 +464,66 @@ kwsprep (kwset_t kwset)
       kwset->delta[i] = delta[U(trans[i])];
 }
 
-#define BM_DELTA2_SEARCH                               \
-  if (TRANS(tp[-2]) == gc2)                            \
-    {                                                  \
-      for (i = 3; i <= len; ++i)                       \
-        if (TRANS(tp[-i]) != TRANS(sp[-i]))            \
-          break;                                       \
-      if (i > len)                                     \
-        return tp - len - text;                                \
-      d = kwset->shift[i - 2]; tp += d;                        \
-      if (tp > ep)                                     \
-        break;                                         \
-      if (TRANS(tp[-1]) != gc1)                                \
-        {                                              \
-          d = d1[U(tp[-1])]; LAST_SHIFT;               \
-          continue;                                    \
-        }                                              \
-      skip = i - 1;                                    \
-    }                                                  \
-  else                                                 \
-    {                                                  \
-      d = kwset->shift[0]; tp += d;                    \
-      if (tp > ep)                                     \
-        break;                                         \
-      if (TRANS(tp[-1]) != gc1)                                \
-        {                                              \
-          d = d1[U(tp[-1])]; LAST_SHIFT;               \
-          continue;                                    \
-        }                                              \
-      skip = 1;                                                \
-    }                                                  \
-  while (true)                                         \
-    {                                                  \
-      if (TRANS(tp[-2]) == gc2)                                \
-        {                                              \
-          for (i = 3; i <= d; ++i)                     \
-            if (TRANS(tp[-i]) != TRANS(sp[-i]))                \
-              break;                                   \
-          if (i > d)                                   \
-            {                                          \
-              for (i = d + skip + 1; i <= len; ++i)    \
-                if (TRANS(tp[-i]) != TRANS(sp[-i]))    \
-                  break;                               \
-              if (i > len)                             \
-                return tp - len - text;                        \
-            }                                          \
-          d = kwset->shift[i - 2]; tp += d;            \
-          if (tp > ep)                                 \
-            break;                                     \
-          if (TRANS(tp[-1]) != gc1)                    \
-            {                                          \
-              d = d1[U(tp[-1])]; LAST_SHIFT;           \
-              break;                                   \
-            }                                          \
-          skip = i - 1;                                        \
-        }                                              \
-      else                                             \
-        {                                              \
-          d = kwset->shift[0]; tp += d;                        \
-          if (tp > ep)                                 \
-            break;                                     \
-          if (TRANS(tp[-1]) != gc1)                    \
-            {                                          \
-              d = d1[U(tp[-1])]; LAST_SHIFT;           \
-              break;                                   \
-            }                                          \
-          skip = 1;                                    \
-        }                                              \
+/* Use TRANS to transliterate C.  A null TRANS does no transliteration.  */
+static char
+tr (char const *trans, char c)
+{
+  return trans ? trans[U(c)] : c;
+}
+
+/* Delta2 portion of a Boyer-Moore search.  *TP is the string text
+   pointer; it is updated in place.  EP is the end of the string text,
+   and SP the end of the pattern.  LEN is the pattern length; it must
+   be at least 2.  TRANS, if nonnull, is the input translation table.
+   GC1 and GC2 are the last and second-from last bytes of the pattern,
+   transliterated by TRANS; the caller precomputes them for
+   efficiency.  If D1 is nonnull, it is a delta1 table for shifting *TP
+   when failing.  KWSET->shift says how much to shift.  */
+static inline bool
+bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
+                  char const *trans, char gc1, char gc2,
+                  unsigned char const *d1, kwset_t kwset)
+{
+  char const *tp = *tpp;
+  int d = len, skip = 0;
+
+  while (true)
+    {
+      int i = 2;
+      if (tr (trans, tp[-2]) == gc2)
+        {
+          while (++i <= d)
+            if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
+              break;
+          if (i > d)
+            {
+              for (i = d + skip + 1; i <= len; ++i)
+                if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
+                  break;
+              if (i > len)
+                {
+                  *tpp = tp - len;
+                  return true;
+                }
+            }
+        }
+
+      tp += d = kwset->shift[i - 2];
+      if (tp > ep)
+        break;
+      if (tr (trans, tp[-1]) != gc1)
+        {
+          if (d1)
+            tp += d1[U(tp[-1])];
+          break;
+        }
+      skip = i - 1;
     }
 
+  *tpp = tp;
+  return false;
+}
+
 
 /* Fast boyer-moore search. */
 static size_t _GL_ATTRIBUTE_PURE
@@ -540,8 +531,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
 {
   unsigned char const *d1;
   char const *ep, *sp, *tp;
-  int d, i, skip;
-  char gc1, gc2;
+  int d;
   int len = kwset->mind;
   char const *trans = kwset->trans;
 
@@ -568,17 +558,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   d1 = kwset->delta;
   sp = kwset->target + len;
   tp = text + len;
-  if (trans)
-    {
-      gc1 = trans[U(sp[-1])];
-      gc2 = trans[U(sp[-2])];
-    }
-  else
-    {
-      gc1 = sp[-1];
-      gc2 = sp[-2];
-    }
-  skip = 0;
+  char gc1 = tr (trans, sp[-1]);
+  char gc2 = tr (trans, sp[-2]);
 
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
   if (size > 12 * len)
@@ -606,20 +587,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
           }
         break;
       found:
-#undef LAST_SHIFT
-#define LAST_SHIFT tp += d
-        if (trans)
-          {
-#undef TRANS
-#define TRANS(ch) trans[U(ch)]
-            BM_DELTA2_SEARCH;
-          }
-        else
-          {
-#undef TRANS
-#define TRANS(ch) ch
-            BM_DELTA2_SEARCH;
-          }
+        if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
+          return tp - text;
       }
 
   /* Now we have only a few characters left to search.  We
@@ -631,20 +600,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
       d = d1[U((tp += d)[-1])];
       if (d != 0)
         continue;
-#undef LAST_SHIFT
-#define LAST_SHIFT
-      if (trans)
-        {
-#undef TRANS
-#define TRANS(ch) trans[U(ch)]
-          BM_DELTA2_SEARCH;
-        }
-      else
-        {
-#undef TRANS
-#define TRANS(ch) ch
-          BM_DELTA2_SEARCH;
-        }
+      if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
+        return tp - text;
     }
 
   return -1;

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=e2f3093e35da98b4cc1357ae63b5e0da26796128


commit c7ea5aea911b950b2398454ca89cce23cabd3a40
Author: Paul Eggert <address@hidden>
Date:   Mon Apr 21 17:15:56 2014 -0700

    kwset: simplify Boyer-Moore with unibyte -i
    
    This change doesn't significantly affect performance on my platform,
    and should make the code easier to maintain.
    * src/kwset.c (BM_DELTA2_SEARCH, LAST_SHIFT, TRANS):
    Remove these macros, in favor of ...
    (tr, bm_delta2_search): New functions.  All uses changed.
    The latter function is inline because this improves code size and
    runtime CPU slightly on x86-64 with gcc -O2 (GCC 4.9.0).
    (bmexec): Prefer tr when that's simpler.

diff --git a/src/kwset.c b/src/kwset.c
index d212d59..69b0a55 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -464,75 +464,66 @@ kwsprep (kwset_t kwset)
       kwset->delta[i] = delta[U(trans[i])];
 }
 
-#define BM_DELTA2_SEARCH                               \
-  if (TRANS(tp[-2]) == gc2)                            \
-    {                                                  \
-      for (i = 3; i <= len; ++i)                       \
-        if (TRANS(tp[-i]) != TRANS(sp[-i]))            \
-          break;                                       \
-      if (i > len)                                     \
-        return tp - len - text;                                \
-      d = kwset->shift[i - 2]; tp += d;                        \
-      if (tp > ep)                                     \
-        break;                                         \
-      if (TRANS(tp[-1]) != gc1)                                \
-        {                                              \
-          d = d1[U(tp[-1])]; LAST_SHIFT;               \
-          continue;                                    \
-        }                                              \
-      skip = i - 1;                                    \
-    }                                                  \
-  else                                                 \
-    {                                                  \
-      d = kwset->shift[0]; tp += d;                    \
-      if (tp > ep)                                     \
-        break;                                         \
-      if (TRANS(tp[-1]) != gc1)                                \
-        {                                              \
-          d = d1[U(tp[-1])]; LAST_SHIFT;               \
-          continue;                                    \
-        }                                              \
-      skip = 1;                                                \
-    }                                                  \
-  while (true)                                         \
-    {                                                  \
-      if (TRANS(tp[-2]) == gc2)                                \
-        {                                              \
-          for (i = 3; i <= d; ++i)                     \
-            if (TRANS(tp[-i]) != TRANS(sp[-i]))                \
-              break;                                   \
-          if (i > d)                                   \
-            {                                          \
-              for (i = d + skip + 1; i <= len; ++i)    \
-                if (TRANS(tp[-i]) != TRANS(sp[-i]))    \
-                  break;                               \
-              if (i > len)                             \
-                return tp - len - text;                        \
-            }                                          \
-          d = kwset->shift[i - 2]; tp += d;            \
-          if (tp > ep)                                 \
-            break;                                     \
-          if (TRANS(tp[-1]) != gc1)                    \
-            {                                          \
-              d = d1[U(tp[-1])]; LAST_SHIFT;           \
-              break;                                   \
-            }                                          \
-          skip = i - 1;                                        \
-        }                                              \
-      else                                             \
-        {                                              \
-          d = kwset->shift[0]; tp += d;                        \
-          if (tp > ep)                                 \
-            break;                                     \
-          if (TRANS(tp[-1]) != gc1)                    \
-            {                                          \
-              d = d1[U(tp[-1])]; LAST_SHIFT;           \
-              break;                                   \
-            }                                          \
-          skip = 1;                                    \
-        }                                              \
+/* Use TRANS to transliterate C.  A null TRANS does no transliteration.  */
+static char
+tr (char const *trans, char c)
+{
+  return trans ? trans[U(c)] : c;
+}
+
+/* Delta2 portion of a Boyer-Moore search.  *TP is the string text
+   pointer; it is updated in place.  EP is the end of the string text,
+   and SP the end of the pattern.  LEN is the pattern length; it must
+   be at least 2.  TRANS, if nonnull, is the input translation table.
+   GC1 and GC2 are the last and second-from last bytes of the pattern,
+   transliterated by TRANS; the caller precomputes them for
+   efficiency.  If D1 is nonnull, it is a delta1 table for shifting *TP
+   when failing.  KWSET->shift says how much to shift.  */
+static inline bool
+bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
+                  char const *trans, char gc1, char gc2,
+                  unsigned char const *d1, kwset_t kwset)
+{
+  char const *tp = *tpp;
+  int d = len, skip = 0;
+
+  while (true)
+    {
+      int i = 2;
+      if (tr (trans, tp[-2]) == gc2)
+        {
+          while (++i <= d)
+            if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
+              break;
+          if (i > d)
+            {
+              for (i = d + skip + 1; i <= len; ++i)
+                if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
+                  break;
+              if (i > len)
+                {
+                  *tpp = tp - len;
+                  return true;
+                }
+            }
+        }
+
+      tp += d = kwset->shift[i - 2];
+      if (tp > ep)
+        break;
+      if (tr (trans, tp[-1]) != gc1)
+        {
+          if (d1)
+            tp += d1[U(tp[-1])];
+          break;
+        }
+      skip = i - 1;
     }
 
+  *tpp = tp;
+  return false;
+}
+
 
 /* Fast boyer-moore search. */
 static size_t _GL_ATTRIBUTE_PURE
@@ -540,8 +531,7 @@ bmexec (kwset_t kwset, char const *text, size_t size)
 {
   unsigned char const *d1;
   char const *ep, *sp, *tp;
-  int d, i, skip;
-  char gc1, gc2;
+  int d;
   int len = kwset->mind;
   char const *trans = kwset->trans;
 
@@ -568,17 +558,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   d1 = kwset->delta;
   sp = kwset->target + len;
   tp = text + len;
-  if (trans)
-    {
-      gc1 = trans[U(sp[-1])];
-      gc2 = trans[U(sp[-2])];
-    }
-  else
-    {
-      gc1 = sp[-1];
-      gc2 = sp[-2];
-    }
-  skip = 0;
+  char gc1 = tr (trans, sp[-1]);
+  char gc2 = tr (trans, sp[-2]);
 
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
   if (size > 12 * len)
@@ -606,20 +587,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
           }
         break;
       found:
-#undef LAST_SHIFT
-#define LAST_SHIFT tp += d
-        if (trans)
-          {
-#undef TRANS
-#define TRANS(ch) trans[U(ch)]
-            BM_DELTA2_SEARCH;
-          }
-        else
-          {
-#undef TRANS
-#define TRANS(ch) ch
-            BM_DELTA2_SEARCH;
-          }
+        if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
+          return tp - text;
       }
 
   /* Now we have only a few characters left to search.  We
@@ -631,20 +600,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
       d = d1[U((tp += d)[-1])];
       if (d != 0)
         continue;
-#undef LAST_SHIFT
-#define LAST_SHIFT
-      if (trans)
-        {
-#undef TRANS
-#define TRANS(ch) trans[U(ch)]
-          BM_DELTA2_SEARCH;
-        }
-      else
-        {
-#undef TRANS
-#define TRANS(ch) ch
-          BM_DELTA2_SEARCH;
-        }
+      if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
+        return tp - text;
     }
 
   return -1;

-----------------------------------------------------------------------

Summary of changes:
 src/kwset.c |  146 +++++++++++++++++++++++++++++++++-------------------------
 1 files changed, 83 insertions(+), 63 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]