bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algo


From: Bruno Haible
Subject: Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm
Date: Sat, 31 Jul 2010 21:35:41 +0200
User-agent: KMail/1.9.9

Hi Paolo,

> I pushed it now, thanks for the review!

After I added some more unit tests for u8_strchr, I see crashes and assertion
failures:

1) 
        mem = (UNIT *) (page_boundary - 2 * sizeof (UNIT));
        mem[0] = 0x50;
        mem[1] = 0;
        ASSERT (u8_strchr (mem, 0x123) == NULL); /* crashes */

2)
        mem = (UNIT *) (page_boundary - 3 * sizeof (UNIT));
        mem[0] = 0x50;
        mem[1] = 0x50;
        mem[2] = 0;
        ASSERT (u8_strchr (mem, 0x123) == NULL); /* assertion failed */
3)
        mem = (UNIT *) (page_boundary - 3 * sizeof (UNIT));
        mem[0] = 0x50;
        mem[1] = 0x50;
        mem[2] = 0;
        ASSERT (u8_strchr (mem, 0x3456) == NULL); /* crashes */
4)
        mem = (UNIT *) (page_boundary - 4 * sizeof (UNIT));
        mem[0] = 0x50;
        mem[1] = 0x50;
        mem[2] = 0x50;
        mem[3] = 0;
        ASSERT (u8_strchr (mem, 0x23456) == NULL); /* crashes */

I'm applying the two attached patches: Bug fixes, then comments and tiny
optimizations.


2010-07-31  Bruno Haible  <address@hidden>

        unistr/u8-strchr: Fix several bugs.
        * lib/unistr/u8-strchr.c (u8_strchr): Don't search beyond the end of
        the string. When not found, return NULL, not a pointer near the end.

--- lib/unistr/u8-strchr.c.orig Sat Jul 31 21:26:45 2010
+++ lib/unistr/u8-strchr.c      Sat Jul 31 21:22:34 2010
@@ -62,7 +62,7 @@
     switch (u8_uctomb_aux (c, uc, 6))
       {
       case 2:
-        if (*s == 0)
+        if (*s == 0 || s[1] == 0)
           break;
         {
           uint8_t c0 = c[0];
@@ -96,11 +96,11 @@
               if (s[1] == 0)
                 break;
             }
-          return (uint8_t *) s;
         }
+        break;
 
       case 3:
-        if (*s == 0 || s[1] == 0)
+        if (*s == 0 || s[1] == 0 || s[2] == 0)
           break;
         {
           uint8_t c0 = c[0];
@@ -147,7 +147,7 @@
         }
 
       case 4:
-        if (*s == 0 || s[1] == 0 || s[2] == 0)
+        if (*s == 0 || s[1] == 0 || s[2] == 0 || s[3] == 0)
           break;
         {
           uint8_t c0 = c[0];


2010-07-31  Bruno Haible  <address@hidden>

        unistr/u8-chr, unistr/u8-strchr: Optimize and add comments.
        * lib/unistr/u8-chr.c (u8_chr): Add comments. Remove a useless test at
        the beginning of the loop.
        * lib/unistr/u8-strchr.c (u8_strchr): Add comments. Don't fall through
        cases in 'switch' statement.

--- lib/unistr/u8-chr.c.orig    Sat Jul 31 21:29:46 2010
+++ lib/unistr/u8-chr.c Sat Jul 31 21:29:14 2010
@@ -70,14 +70,17 @@
           uint8_t c1 = c[1];
           const uint8_t *end = s + n - 1;
 
-          while (s < end)
+          do
             {
+              /* Here s < end.
+                 Test whether s[0..1] == { c0, c1 }.  */
               uint8_t s1 = s[1];
               if (s1 == c1)
                 {
                   if (*s == c0)
                     return (uint8_t *) s;
                   else
+                    /* Skip the search at s + 1, because s[1] = c1 < c0.  */
                     s += 2;
                 }
               else
@@ -85,9 +88,11 @@
                   if (s1 == c0)
                     s++;
                   else
+                    /* Skip the search at s + 1, because s[1] != c0.  */
                     s += 2;
                 }
             }
+          while (s < end);
           break;
         }
 
@@ -104,14 +109,19 @@
           else
             skip = 3;
 
-          while (s < end)
+          do
             {
+              /* Here s < end.
+                 Test whether s[0..2] == { c0, c1, c2 }.  */
               uint8_t s2 = s[2];
               if (s2 == c2)
                 {
                   if (s[1] == c1 && *s == c0)
                     return (uint8_t *) s;
                   else
+                    /* If c2 != c1:
+                         Skip the search at s + 1, because s[2] == c2 != c1.
+                       Skip the search at s + 2, because s[2] == c2 < c0.  */
                     s += skip;
                 }
               else
@@ -119,11 +129,15 @@
                   if (s2 == c1)
                     s++;
                   else if (s2 == c0)
+                    /* Skip the search at s + 1, because s[2] != c1.  */
                     s += 2;
                   else
+                    /* Skip the search at s + 1, because s[2] != c1.
+                       Skip the search at s + 2, because s[2] != c0.  */
                     s += 3;
                 }
             }
+          while (s < end);
           break;
         }
 
@@ -143,14 +157,21 @@
           else
             skip = 4;
 
-          while (s < end)
+          do
             {
+              /* Here s < end.
+                 Test whether s[0..3] == { c0, c1, c2, c3 }.  */
               uint8_t s3 = s[3];
               if (s3 == c3)
                 {
                   if (s[2] == c2 && s[1] == c1 && *s == c0)
                     return (uint8_t *) s;
                   else
+                    /* If c3 != c2:
+                         Skip the search at s + 1, because s[3] == c3 != c2.
+                       If c3 != c1:
+                         Skip the search at s + 2, because s[3] == c3 != c1.
+                       Skip the search at s + 3, because s[3] == c3 < c0.  */
                     s += skip;
                 }
               else
@@ -158,13 +179,20 @@
                   if (s3 == c2)
                     s++;
                   else if (s3 == c1)
+                    /* Skip the search at s + 1, because s[3] != c2.  */
                     s += 2;
                   else if (s3 == c0)
+                    /* Skip the search at s + 1, because s[3] != c2.
+                       Skip the search at s + 2, because s[3] != c1.  */
                     s += 3;
                   else
+                    /* Skip the search at s + 1, because s[3] != c2.
+                       Skip the search at s + 2, because s[3] != c1.
+                       Skip the search at s + 3, because s[3] != c0.  */
                     s += 4;
                 }
             }
+          while (s < end);
           break;
         }
       }
--- lib/unistr/u8-strchr.c.orig Sat Jul 31 21:29:46 2010
+++ lib/unistr/u8-strchr.c      Sat Jul 31 21:29:14 2010
@@ -67,15 +67,19 @@
         {
           uint8_t c0 = c[0];
           uint8_t c1 = c[1];
+          /* Search for { c0, c1 }.  */
           uint8_t s1 = s[1];
 
           for (;;)
             {
+              /* Here s[0] != 0, s[1] != 0.
+                 Test whether s[0..1] == { c0, c1 }.  */
               if (s1 == c1)
                 {
                   if (*s == c0)
                     return (uint8_t *) s;
                   else
+                    /* Skip the search at s + 1, because s[1] = c1 < c0.  */
                     goto case2_skip2;
                 }
               else
@@ -83,6 +87,7 @@
                   if (s1 == c0)
                     goto case2_skip1;
                   else
+                    /* Skip the search at s + 1, because s[1] != c0.  */
                     goto case2_skip2;
                 }
              case2_skip2:
@@ -106,26 +111,36 @@
           uint8_t c0 = c[0];
           uint8_t c1 = c[1];
           uint8_t c2 = c[2];
+          /* Search for { c0, c1, c2 }.  */
           uint8_t s2 = s[2];
 
           for (;;)
             {
+              /* Here s[0] != 0, s[1] != 0, s[2] != 0.
+                 Test whether s[0..2] == { c0, c1, c2 }.  */
               if (s2 == c2)
                 {
                   if (s[1] == c1 && *s == c0)
                     return (uint8_t *) s;
-                  else if (c2 == c1)
-                    goto case3_skip1;
                   else
-                    goto case3_skip3;
+                    /* If c2 != c1:
+                         Skip the search at s + 1, because s[2] == c2 != c1.
+                       Skip the search at s + 2, because s[2] == c2 < c0.  */
+                    if (c2 == c1)
+                      goto case3_skip1;
+                    else
+                      goto case3_skip3;
                 }
               else
                 {
                   if (s2 == c1)
                     goto case3_skip1;
                   else if (s2 == c0)
+                    /* Skip the search at s + 1, because s[2] != c1.  */
                     goto case3_skip2;
                   else
+                    /* Skip the search at s + 1, because s[2] != c1.
+                       Skip the search at s + 2, because s[2] != c0.  */
                     goto case3_skip3;
                 }
              case3_skip3:
@@ -145,6 +160,7 @@
                 break;
             }
         }
+        break;
 
       case 4:
         if (*s == 0 || s[1] == 0 || s[2] == 0 || s[3] == 0)
@@ -154,30 +170,45 @@
           uint8_t c1 = c[1];
           uint8_t c2 = c[2];
           uint8_t c3 = c[3];
+          /* Search for { c0, c1, c2, c3 }.  */
           uint8_t s3 = s[3];
 
           for (;;)
             {
+              /* Here s[0] != 0, s[1] != 0, s[2] != 0, s[3] != 0.
+                 Test whether s[0..3] == { c0, c1, c2, c3 }.  */
               if (s3 == c3)
                 {
                   if (s[2] == c2 && s[1] == c1 && *s == c0)
                     return (uint8_t *) s;
-                  else if (c3 == c2)
-                    goto case4_skip1;
-                  else if (c3 == c1)
-                    goto case4_skip2;
                   else
-                    goto case4_skip4;
+                    /* If c3 != c2:
+                         Skip the search at s + 1, because s[3] == c3 != c2.
+                       If c3 != c1:
+                         Skip the search at s + 2, because s[3] == c3 != c1.
+                       Skip the search at s + 3, because s[3] == c3 < c0.  */
+                    if (c3 == c2)
+                      goto case4_skip1;
+                    else if (c3 == c1)
+                      goto case4_skip2;
+                    else
+                      goto case4_skip4;
                 }
               else
                 {
                   if (s3 == c2)
                     goto case4_skip1;
                   else if (s3 == c1)
+                    /* Skip the search at s + 1, because s[3] != c2.  */
                     goto case4_skip2;
                   else if (s3 == c0)
+                    /* Skip the search at s + 1, because s[3] != c2.
+                       Skip the search at s + 2, because s[3] != c1.  */
                     goto case4_skip3;
                   else
+                    /* Skip the search at s + 1, because s[3] != c2.
+                       Skip the search at s + 2, because s[3] != c1.
+                       Skip the search at s + 3, because s[3] != c0.  */
                     goto case4_skip4;
                 }
              case4_skip4:
@@ -202,6 +233,7 @@
                 break;
             }
         }
+        break;
       }
 
   return NULL;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]