grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.25-49-g487a7fc


From: Paul Eggert
Subject: grep branch, master, updated. v2.25-49-g487a7fc
Date: Thu, 18 Aug 2016 01:47:33 +0000 (UTC)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  487a7fcfd8dfd6d6e8b3e95020d2ed3d852d317f (commit)
       via  ed85edf0553f08295c0408e353edf81f7eb2f22a (commit)
      from  f3f8153fcd71f1d2a7a43c23c00e6e6ce9c3c41d (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=487a7fcfd8dfd6d6e8b3e95020d2ed3d852d317f


commit 487a7fcfd8dfd6d6e8b3e95020d2ed3d852d317f
Author: Paul Eggert <address@hidden>
Date:   Wed Aug 17 18:41:16 2016 -0700

    dfa: use 64-bit when ulong is at least that wide
    
    * src/dfa.c (charclass_word): Now unsigned long instead of unsigned.
    (CHARCLASS_WORD_BITS): Now 64 on 64-bit platforms.
    (CHARCLASS_PAIR, CHARCLASS_INIT): New macros.
    (CHARCLASS_WORD_MASK): Now a static const, since it no longer
    needs to be a macro.
    (equal): Open-code rather than calling memcmp.
    (add_utf8_anychar): Use CHARCLASS_INIT.

diff --git a/src/dfa.c b/src/dfa.c
index be62a18..d337bb6 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -63,15 +63,30 @@ enum { NOTCHAR = 1 << CHAR_BIT };
 
 /* This represents part of a character class.  It must be unsigned and
    at least CHARCLASS_WORD_BITS wide.  Any excess bits are zero.  */
-typedef unsigned int charclass_word;
+typedef unsigned long int charclass_word;
 
-/* The number of bits used in a charclass word.  utf8_classes assumes
-   this is exactly 32.  */
+/* CHARCLASS_WORD_BITS is the number of bits used in a charclass word.
+   CHARCLASS_PAIR (LO, HI) is part of a charclass initializer, and
+   represents 64 bits' worth of a charclass, where LO and HI are the
+   low and high-order 32 bits of the 64-bit quantity.  */
+#if ULONG_MAX >> 31 >> 31 < 3
 enum { CHARCLASS_WORD_BITS = 32 };
+# define CHARCLASS_PAIR(lo, hi) lo, hi
+#else
+enum { CHARCLASS_WORD_BITS = 64 };
+# define CHARCLASS_PAIR(lo, hi) (((charclass_word) (hi) << 32) + (lo))
+#endif
+
+/* An initializer for a charclass whose 32-bit words are A through H.  */
+#define CHARCLASS_INIT(a, b, c, d, e, f, g, h)         \
+    {                                                  \
+      CHARCLASS_PAIR (a, b), CHARCLASS_PAIR (c, d),    \
+      CHARCLASS_PAIR (e, f), CHARCLASS_PAIR (g, h)     \
+    }
 
 /* The maximum useful value of a charclass_word; all used bits are 1.  */
-#define CHARCLASS_WORD_MASK \
-  (((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1)
+static charclass_word const CHARCLASS_WORD_MASK
+  = ((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1;
 
 /* Number of words required to hold a bit for every character.  */
 enum
@@ -605,7 +620,11 @@ notset (charclass s)
 static bool
 equal (charclass const s1, charclass const s2)
 {
-  return memcmp (s1, s2, sizeof (charclass)) == 0;
+  charclass_word w = 0;
+  int i;
+  for (i = 0; i < CHARCLASS_WORDS; i++)
+    w |= s1[i] ^ s2[i];
+  return w == 0;
 }
 
 static bool
@@ -1675,20 +1694,19 @@ add_utf8_anychar (void)
 {
   static charclass const utf8_classes[5] = {
     /* 80-bf: non-leading bytes.  */
-    {0, 0, 0, 0, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, 0, 0},
+    CHARCLASS_INIT (0, 0, 0, 0, 0xffffffff, 0xffffffff, 0, 0),
 
     /* 00-7f: 1-byte sequence.  */
-    {CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK,
-     CHARCLASS_WORD_MASK, 0, 0, 0, 0},
+    CHARCLASS_INIT (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0, 0, 0, 
0),
 
     /* c2-df: 2-byte sequence.  */
-    {0, 0, 0, 0, 0, 0, ~3 & CHARCLASS_WORD_MASK, 0},
+    CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0xfffffffc, 0),
 
     /* e0-ef: 3-byte sequence.  */
-    {0, 0, 0, 0, 0, 0, 0, 0xffff},
+    CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xffff),
 
     /* f0-f7: 4-byte sequence.  */
-    {0, 0, 0, 0, 0, 0, 0, 0xff0000}
+    CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xff0000)
   };
   const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]);
   unsigned int i;

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=ed85edf0553f08295c0408e353edf81f7eb2f22a


commit 487a7fcfd8dfd6d6e8b3e95020d2ed3d852d317f
Author: Paul Eggert <address@hidden>
Date:   Wed Aug 17 18:41:16 2016 -0700

    dfa: use 64-bit when ulong is at least that wide
    
    * src/dfa.c (charclass_word): Now unsigned long instead of unsigned.
    (CHARCLASS_WORD_BITS): Now 64 on 64-bit platforms.
    (CHARCLASS_PAIR, CHARCLASS_INIT): New macros.
    (CHARCLASS_WORD_MASK): Now a static const, since it no longer
    needs to be a macro.
    (equal): Open-code rather than calling memcmp.
    (add_utf8_anychar): Use CHARCLASS_INIT.

diff --git a/src/dfa.c b/src/dfa.c
index be62a18..d337bb6 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -63,15 +63,30 @@ enum { NOTCHAR = 1 << CHAR_BIT };
 
 /* This represents part of a character class.  It must be unsigned and
    at least CHARCLASS_WORD_BITS wide.  Any excess bits are zero.  */
-typedef unsigned int charclass_word;
+typedef unsigned long int charclass_word;
 
-/* The number of bits used in a charclass word.  utf8_classes assumes
-   this is exactly 32.  */
+/* CHARCLASS_WORD_BITS is the number of bits used in a charclass word.
+   CHARCLASS_PAIR (LO, HI) is part of a charclass initializer, and
+   represents 64 bits' worth of a charclass, where LO and HI are the
+   low and high-order 32 bits of the 64-bit quantity.  */
+#if ULONG_MAX >> 31 >> 31 < 3
 enum { CHARCLASS_WORD_BITS = 32 };
+# define CHARCLASS_PAIR(lo, hi) lo, hi
+#else
+enum { CHARCLASS_WORD_BITS = 64 };
+# define CHARCLASS_PAIR(lo, hi) (((charclass_word) (hi) << 32) + (lo))
+#endif
+
+/* An initializer for a charclass whose 32-bit words are A through H.  */
+#define CHARCLASS_INIT(a, b, c, d, e, f, g, h)         \
+    {                                                  \
+      CHARCLASS_PAIR (a, b), CHARCLASS_PAIR (c, d),    \
+      CHARCLASS_PAIR (e, f), CHARCLASS_PAIR (g, h)     \
+    }
 
 /* The maximum useful value of a charclass_word; all used bits are 1.  */
-#define CHARCLASS_WORD_MASK \
-  (((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1)
+static charclass_word const CHARCLASS_WORD_MASK
+  = ((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1;
 
 /* Number of words required to hold a bit for every character.  */
 enum
@@ -605,7 +620,11 @@ notset (charclass s)
 static bool
 equal (charclass const s1, charclass const s2)
 {
-  return memcmp (s1, s2, sizeof (charclass)) == 0;
+  charclass_word w = 0;
+  int i;
+  for (i = 0; i < CHARCLASS_WORDS; i++)
+    w |= s1[i] ^ s2[i];
+  return w == 0;
 }
 
 static bool
@@ -1675,20 +1694,19 @@ add_utf8_anychar (void)
 {
   static charclass const utf8_classes[5] = {
     /* 80-bf: non-leading bytes.  */
-    {0, 0, 0, 0, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, 0, 0},
+    CHARCLASS_INIT (0, 0, 0, 0, 0xffffffff, 0xffffffff, 0, 0),
 
     /* 00-7f: 1-byte sequence.  */
-    {CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK,
-     CHARCLASS_WORD_MASK, 0, 0, 0, 0},
+    CHARCLASS_INIT (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0, 0, 0, 
0),
 
     /* c2-df: 2-byte sequence.  */
-    {0, 0, 0, 0, 0, 0, ~3 & CHARCLASS_WORD_MASK, 0},
+    CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0xfffffffc, 0),
 
     /* e0-ef: 3-byte sequence.  */
-    {0, 0, 0, 0, 0, 0, 0, 0xffff},
+    CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xffff),
 
     /* f0-f7: 4-byte sequence.  */
-    {0, 0, 0, 0, 0, 0, 0, 0xff0000}
+    CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xff0000)
   };
   const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]);
   unsigned int i;

-----------------------------------------------------------------------

Summary of changes:
 src/dfa.c       |   55 +++++++++++++++++++++++++++++++++++++++++--------------
 src/dfasearch.c |    8 +++-----
 2 files changed, 44 insertions(+), 19 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]