bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: strchrnul speed


From: Eric Blake
Subject: Re: strchrnul speed
Date: Mon, 28 Apr 2008 23:20:05 +0000 (UTC)
User-agent: Loom/3.14 (http://gmane.org/)

Eric Blake <ebb9 <at> byu.net> writes:

> Meanwhile, adding rawmemchr would speed up the (corner) case of
> strchrnul(s,0), but I haven't tackled that yet.

Here it is, plus another potential C99 read-beyond-end violation.  I'll follow 
Bruno's advice and add .valgrind files for strchrnul and rawmemchr next, if you 
don't beat me to it.


>From 2b430e392c3090c47279cd50946b456d9c4477cc Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Mon, 28 Apr 2008 17:08:43 -0600
Subject: [PATCH] Add rawmemchr module, matching glibc.

* modules/string (Makefile.am): New indicator.
* m4/string_h.m4 (gl_HEADER_STRING_H_DEFAULTS): Set it.
* lib/string.in.h (rawmemchr): Declare when appropriate.
* modules/rawmemchr: New file.
* m4/rawmemchr.m4: Likewise.
* lib/rawmemchr.c: Likewise.
* modules/rawmemchr-tests: Likewise.
* tests/test-rawmemchr.c: Likewise.
* doc/glibc-functions/rawmemchr.texi (rawmemchr): Document
module.
* modules/strchrnul (Depends-on): Add rawmemchr.
* lib/strchrnul.c (strchrnul): Optimize a corner case.

Signed-off-by: Eric Blake <address@hidden>
---
 ChangeLog                          |   14 ++++
 doc/glibc-functions/rawmemchr.texi |    8 +-
 lib/rawmemchr.c                    |  136 ++++++++++++++++++++++++++++++++++++
 lib/strchrnul.c                    |    2 +
 lib/string.in.h                    |   16 ++++
 m4/rawmemchr.m4                    |   21 ++++++
 m4/string_h.m4                     |    2 +
 modules/rawmemchr                  |   25 +++++++
 modules/rawmemchr-tests            |   10 +++
 modules/strchrnul                  |    1 +
 modules/string                     |    2 +
 tests/test-rawmemchr.c             |   84 ++++++++++++++++++++++
 12 files changed, 317 insertions(+), 4 deletions(-)
 create mode 100644 lib/rawmemchr.c
 create mode 100644 m4/rawmemchr.m4
 create mode 100644 modules/rawmemchr
 create mode 100644 modules/rawmemchr-tests
 create mode 100644 tests/test-rawmemchr.c

diff --git a/ChangeLog b/ChangeLog
index 07baa5e..ea8676c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
 2008-04-28  Eric Blake  <address@hidden>
 
+       Add rawmemchr module, matching glibc.
+       * modules/string (Makefile.am): New indicator.
+       * m4/string_h.m4 (gl_HEADER_STRING_H_DEFAULTS): Set it.
+       * lib/string.in.h (rawmemchr): Declare when appropriate.
+       * modules/rawmemchr: New file.
+       * m4/rawmemchr.m4: Likewise.
+       * lib/rawmemchr.c: Likewise.
+       * modules/rawmemchr-tests: Likewise.
+       * tests/test-rawmemchr.c: Likewise.
+       * doc/glibc-functions/rawmemchr.texi (rawmemchr): Document
+       module.
+       * modules/strchrnul (Depends-on): Add rawmemchr.
+       * lib/strchrnul.c (strchrnul): Optimize a corner case.
+
        Whitespace cleanup.
        * tests/test-strchrnul.c: Reindent.
        * lib/strchrnul.c: Likewise.
diff --git a/doc/glibc-functions/rawmemchr.texi b/doc/glibc-
functions/rawmemchr.texi
index 1cbeeb3..478231f 100644
--- a/doc/glibc-functions/rawmemchr.texi
+++ b/doc/glibc-functions/rawmemchr.texi
@@ -2,15 +2,15 @@
 @subsection @code{rawmemchr}
 @findex rawmemchr
 
-Gnulib module: ---
+Gnulib module: rawmemchr
 
 Portability problems fixed by Gnulib:
 @itemize
address@hidden
+This function is missing on all non-glibc platforms:
+MacOS X 10.3, FreeBSD 6.0, NetBSD 3.0, OpenBSD 3.8, AIX 5.1, HP-UX 11, IRIX 
6.5, OSF/1 5.1, Solaris 10, Cygwin, mingw, Interix 3.5, BeOS.
 @end itemize
 
 Portability problems not fixed by Gnulib:
 @itemize
address@hidden
-This function is missing on all non-glibc platforms:
-MacOS X 10.3, FreeBSD 6.0, NetBSD 3.0, OpenBSD 3.8, AIX 5.1, HP-UX 11, IRIX 
6.5, OSF/1 5.1, Solaris 10, Cygwin, mingw, Interix 3.5, BeOS.
 @end itemize
diff --git a/lib/rawmemchr.c b/lib/rawmemchr.c
new file mode 100644
index 0000000..d7cdc6e
--- /dev/null
+++ b/lib/rawmemchr.c
@@ -0,0 +1,136 @@
+/* Searching in a string.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include <string.h>
+
+/* Find the first occurrence of C in S.  */
+void *
+rawmemchr (const void *s, int c_in)
+{
+  /* On 32-bit hardware, choosing longword to be a 32-bit unsigned
+     long instead of a 64-bit uintmax_t tends to give better
+     performance.  On 64-bit hardware, unsigned long is generally 64
+     bits already.  Change this typedef to experiment with
+     performance.  */
+  typedef unsigned long int longword;
+
+  const unsigned char *char_ptr;
+  const longword *longword_ptr;
+  longword repeated_one;
+  longword repeated_c;
+  unsigned char c;
+
+  c = (unsigned char) c_in;
+
+  /* Handle the first few bytes by reading one byte at a time.
+     Do this until CHAR_PTR is aligned on a longword boundary.  */
+  for (char_ptr = (const unsigned char *) s;
+       (size_t) char_ptr % sizeof (longword) != 0;
+       ++char_ptr)
+    if (*char_ptr == c)
+      return (void *) char_ptr;
+
+  longword_ptr = (const longword *) char_ptr;
+
+  /* All these elucidatory comments refer to 4-byte longwords,
+     but the theory applies equally well to any size longwords.  */
+
+  /* Compute auxiliary longword values:
+     repeated_one is a value which has a 1 in every byte.
+     repeated_c has c in every byte.  */
+  repeated_one = 0x01010101;
+  repeated_c = c | (c << 8);
+  repeated_c |= repeated_c << 16;
+  if (0xffffffffU < (longword) -1)
+    {
+      repeated_one |= repeated_one << 31 << 1;
+      repeated_c |= repeated_c << 31 << 1;
+      if (8 < sizeof (longword))
+        {
+          size_t i;
+
+          for (i = 64; i < sizeof (longword) * 8; i *= 2)
+            {
+              repeated_one |= repeated_one << i;
+              repeated_c |= repeated_c << i;
+            }
+        }
+    }
+
+  /* Instead of the traditional loop which tests each byte, we will
+     test a longword at a time.  The tricky part is testing if *any of
+     the four* bytes in the longword in question are equal to NUL or
+     c.  We first use an xor with repeated_c.  This reduces the task
+     to testing whether *any of the four* bytes in longword1 is zero.
+
+     We compute tmp =
+       ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7).
+     That is, we perform the following operations:
+       1. Subtract repeated_one.
+       2. & ~longword1.
+       3. & a mask consisting of 0x80 in every byte.
+     Consider what happens in each byte:
+       - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff,
+         and step 3 transforms it into 0x80.  A carry can also be propagated
+         to more significant bytes.
+       - If a byte of longword1 is nonzero, let its lowest 1 bit be at
+         position k (0 <= k <= 7); so the lowest k bits are 0.  After step 1,
+         the byte ends in a single bit of value 0 and k bits of value 1.
+         After step 2, the result is just k bits of value 1: 2^k - 1.  After
+         step 3, the result is 0.  And no carry is produced.
+     So, if longword1 has only non-zero bytes, tmp is zero.
+     Whereas if longword1 has a zero byte, call j the position of the least
+     significant zero byte.  Then the result has a zero at positions 0, ...,
+     j-1 and a 0x80 at position j.  We cannot predict the result at the more
+     significant bytes (positions j+1..3), but it does not matter since we
+     already have a non-zero bit at position 8*j+7.
+
+     The test whether any byte in longword1 is zero is equivalent
+     to testing whether tmp is nonzero.
+
+     This test can read beyond the end of a string, depending on where
+     C_IN is encountered.  However, this is considered safe since the
+     initialization phase ensured that the read will be aligned,
+     therefore, the read will not cross page boundaries and will not
+     cause a fault.  */
+
+  while (1)
+    {
+      longword longword1 = *longword_ptr ^ repeated_c;
+
+      if ((((longword1 - repeated_one) & ~longword1)
+           & (repeated_one << 7)) != 0)
+        break;
+      longword_ptr++;
+    }
+
+  char_ptr = (const unsigned char *) longword_ptr;
+
+  /* At this point, we know that one of the sizeof (longword) bytes
+     starting at char_ptr is == c.  On little-endian machines, we
+     could determine the first such byte without any further memory
+     accesses, just by looking at the tmp result from the last loop
+     iteration.  But this does not work on big-endian machines.
+     Choose code that works in both cases.  */
+
+  char_ptr = (unsigned char *) longword_ptr;
+  while (*char_ptr != c)
+    char_ptr++;
+  return (void *) char_ptr;
+}
diff --git a/lib/strchrnul.c b/lib/strchrnul.c
index 0902c18..5ed237c 100644
--- a/lib/strchrnul.c
+++ b/lib/strchrnul.c
@@ -37,6 +37,8 @@ strchrnul (const char *s, int c_in)
   unsigned char c;
 
   c = (unsigned char) c_in;
+  if (!c)
+    return rawmemchr (s, 0);
 
   /* Handle the first few bytes by reading one byte at a time.
      Do this until CHAR_PTR is aligned on a longword boundary.  */
diff --git a/lib/string.in.h b/lib/string.in.h
index d88e8a7..49ea1d8 100644
--- a/lib/string.in.h
+++ b/lib/string.in.h
@@ -93,6 +93,22 @@ extern void *memrchr (void const *, int, size_t)
      memrchr (a, b, c))
 #endif
 
+/* Find the first occurrence of C in S.  More efficient than
+   memchr(S,C,N), at the expense of undefined behavior if C does not
+   occur within N bytes.  */
+#if @GNULIB_RAWMEMCHR@
+# if ! @HAVE_RAWMEMCHR@
+extern void *rawmemchr (void const *__s, int __c_in)
+  __attribute__ ((__pure__));
+# endif
+#elif defined GNULIB_POSIXCHECK
+# undef rawmemchr
+# define rawmemchr(a,b) \
+    (GL_LINK_WARNING ("rawmemchr is unportable - " \
+                      "use gnulib module rawmemchr for portability"), \
+     rawmemchr (a, b))
+#endif
+
 /* Copy SRC to DST, returning the address of the terminating '\0' in DST.  */
 #if @GNULIB_STPCPY@
 # if ! @HAVE_STPCPY@
diff --git a/m4/rawmemchr.m4 b/m4/rawmemchr.m4
new file mode 100644
index 0000000..1ac7b74
--- /dev/null
+++ b/m4/rawmemchr.m4
@@ -0,0 +1,21 @@
+# rawmemchr.m4 serial 1
+dnl Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_FUNC_RAWMEMCHR],
+[
+  dnl Persuade glibc <string.h> to declare rawmemchr().
+  AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
+
+  AC_REQUIRE([gl_HEADER_STRING_H_DEFAULTS])
+  AC_REPLACE_FUNCS([rawmemchr])
+  if test $ac_cv_func_rawmemchr = no; then
+    HAVE_RAWMEMCHR=0
+    gl_PREREQ_RAWMEMCHR
+  fi
+])
+
+# Prerequisites of lib/strchrnul.c.
+AC_DEFUN([gl_PREREQ_RAWMEMCHR], [:])
diff --git a/m4/string_h.m4 b/m4/string_h.m4
index 766d7e9..7143690 100644
--- a/m4/string_h.m4
+++ b/m4/string_h.m4
@@ -35,6 +35,7 @@ AC_DEFUN([gl_HEADER_STRING_H_DEFAULTS],
   GNULIB_MEMMEM=0;      AC_SUBST([GNULIB_MEMMEM])
   GNULIB_MEMPCPY=0;     AC_SUBST([GNULIB_MEMPCPY])
   GNULIB_MEMRCHR=0;     AC_SUBST([GNULIB_MEMRCHR])
+  GNULIB_RAWMEMCHR=0;   AC_SUBST([GNULIB_RAWMEMCHR])
   GNULIB_STPCPY=0;      AC_SUBST([GNULIB_STPCPY])
   GNULIB_STPNCPY=0;     AC_SUBST([GNULIB_STPNCPY])
   GNULIB_STRCHRNUL=0;   AC_SUBST([GNULIB_STRCHRNUL])
@@ -66,6 +67,7 @@ AC_DEFUN([gl_HEADER_STRING_H_DEFAULTS],
   HAVE_DECL_MEMMEM=1;          AC_SUBST([HAVE_DECL_MEMMEM])
   HAVE_MEMPCPY=1;              AC_SUBST([HAVE_MEMPCPY])
   HAVE_DECL_MEMRCHR=1;         AC_SUBST([HAVE_DECL_MEMRCHR])
+  HAVE_RAWMEMCHR=1;            AC_SUBST([HAVE_RAWMEMCHR])
   HAVE_STPCPY=1;               AC_SUBST([HAVE_STPCPY])
   HAVE_STPNCPY=1;              AC_SUBST([HAVE_STPNCPY])
   HAVE_STRCHRNUL=1;            AC_SUBST([HAVE_STRCHRNUL])
diff --git a/modules/rawmemchr b/modules/rawmemchr
new file mode 100644
index 0000000..655e392
--- /dev/null
+++ b/modules/rawmemchr
@@ -0,0 +1,25 @@
+Description:
+rawmemchr() function: Find the first occurrence of C in S.
+
+Files:
+lib/rawmemchr.c
+m4/rawmemchr.m4
+
+Depends-on:
+extensions
+string
+
+configure.ac:
+gl_FUNC_RAWMEMCHR
+gl_STRING_MODULE_INDICATOR([rawmemchr])
+
+Makefile.am:
+
+Include:
+<string.h>
+
+License:
+LGPL
+
+Maintainer:
+FSF
diff --git a/modules/rawmemchr-tests b/modules/rawmemchr-tests
new file mode 100644
index 0000000..dd1a6e6
--- /dev/null
+++ b/modules/rawmemchr-tests
@@ -0,0 +1,10 @@
+Files:
+tests/test-rawmemchr.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-rawmemchr
+check_PROGRAMS += test-rawmemchr
diff --git a/modules/strchrnul b/modules/strchrnul
index f1daf7e..d431dba 100644
--- a/modules/strchrnul
+++ b/modules/strchrnul
@@ -9,6 +9,7 @@ m4/strchrnul.m4
 Depends-on:
 extensions
 string
+rawmemchr
 
 configure.ac:
 gl_FUNC_STRCHRNUL
diff --git a/modules/string b/modules/string
index 52af959..6ca5c23 100644
--- a/modules/string
+++ b/modules/string
@@ -40,6 +40,7 @@ string.h: string.in.h
              -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
              -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
              -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \
+             -e 's|@''GNULIB_RAWMEMCHR''@|$(GNULIB_RAWMEMCHR)|g' \
              -e 's|@''GNULIB_STPCPY''@|$(GNULIB_STPCPY)|g' \
              -e 's|@''GNULIB_STPNCPY''@|$(GNULIB_STPNCPY)|g' \
              -e 's|@''GNULIB_STRCHRNUL''@|$(GNULIB_STRCHRNUL)|g' \
@@ -56,6 +57,7 @@ string.h: string.in.h
              -e 's|@''HAVE_DECL_MEMMEM''@|$(HAVE_DECL_MEMMEM)|g' \
              -e 's|@''HAVE_MEMPCPY''@|$(HAVE_MEMPCPY)|g' \
              -e 's|@''HAVE_DECL_MEMRCHR''@|$(HAVE_DECL_MEMRCHR)|g' \
+             -e 's|@''HAVE_RAWMEMCHR''@|$(HAVE_RAWMEMCHR)|g' \
              -e 's|@''HAVE_STPCPY''@|$(HAVE_STPCPY)|g' \
              -e 's|@''HAVE_STPNCPY''@|$(HAVE_STPNCPY)|g' \
              -e 's|@''HAVE_STRCHRNUL''@|$(HAVE_STRCHRNUL)|g' \
diff --git a/tests/test-rawmemchr.c b/tests/test-rawmemchr.c
new file mode 100644
index 0000000..34e70ef
--- /dev/null
+++ b/tests/test-rawmemchr.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2008 Free Software Foundation
+ * Written by Eric Blake and Bruno Haible
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <string.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ASSERT(expr) \
+  do                                                                        \
+    {                                                                       \
+      if (!(expr))                                                          \
+       {                                                                    \
+         fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+         fflush (stderr);                                                   \
+         abort ();                                                          \
+       }                                                                    \
+    }                                                                       \
+  while (0)
+
+/* Calculating void * + int is not portable, so this wrapper converts
+   to char * to make the tests easier to write.  */
+#define RAWMEMCHR (char *) rawmemchr
+
+int
+main ()
+{
+  size_t n = 0x100000;
+  char *input = malloc (n + 1);
+  ASSERT (input);
+
+  input[0] = 'a';
+  input[1] = 'b';
+  memset (input + 2, 'c', 1024);
+  memset (input + 1026, 'd', n - 1028);
+  input[n - 2] = 'e';
+  input[n - 1] = 'a';
+  input[n] = '\0';
+
+  /* Basic behavior tests.  */
+  ASSERT (RAWMEMCHR (input, 'a') == input);
+  ASSERT (RAWMEMCHR (input, 'b') == input + 1);
+  ASSERT (RAWMEMCHR (input, 'c') == input + 2);
+  ASSERT (RAWMEMCHR (input, 'd') == input + 1026);
+
+  ASSERT (RAWMEMCHR (input + 1, 'a') == input + n - 1);
+  ASSERT (RAWMEMCHR (input + 1, 'e') == input + n - 2);
+
+  ASSERT (RAWMEMCHR (input, '\0') == input + n);
+
+  /* Alignment tests.  */
+  {
+    int i, j;
+    for (i = 0; i < 32; i++)
+      {
+        for (j = 0; j < 256; j++)
+          input[i + j] = j;
+        for (j = 0; j < 256; j++)
+          {
+            ASSERT (RAWMEMCHR (input + i, j) == input + i + j);
+          }
+      }
+  }
+
+  free (input);
+
+  return 0;
+}
-- 
1.5.5.1







reply via email to

[Prev in Thread] Current Thread [Next in Thread]