bug-diffutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [bug-diffutils] Multithreading support?


From: Paul Eggert
Subject: Re: [bug-diffutils] Multithreading support?
Date: Thu, 22 Aug 2013 15:59:27 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130625 Thunderbird/17.0.7

On 08/13/13 08:48, David Balažic wrote:
> I'll try to benchmark it on my ARM based router, when time permits.

Thanks.  While you're at it, could you also benchmark the following
patch, which I just now pushed to the diffutils git master on
Savannah?

>From 6749fe1ddd6805828b526c73b2f1a579eb0d9f63 Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Thu, 22 Aug 2013 15:45:56 -0700
Subject: [PATCH] cmp, diff, sdiff: tune by using rawmemchr

On my platform (AMD Phenom II X4 910e, Fedora 17 x86-64), this sped up
'cmp -n 8GiB /dev/full /dev/zero' by a factor of 3.8, and
'cmp -sn 8GiB /dev/full /dev/zero' by a factor of 1.8.
* bootstrap.conf (gnulib_modules): Add rawmemchr.
* src/cmp.c (cmp): Optimize the common case where buffers are the same,
by using count_newlines rather than block_compare_and_count.
(block_compare_and_count): Remove.
(count_newlines): New function.
* src/cmp.c (count_newlines):
* src/io.c (prepare_text):
* src/sdiff.c (lf_copy, lf_skip, lf_snarf):
Use rawmemchr instead of memchr, for speed.
---
 bootstrap.conf |  1 +
 src/cmp.c      | 88 +++++++++++++++++++---------------------------------------
 src/io.c       | 24 ++++++++++------
 src/sdiff.c    | 10 +++----
 4 files changed, 51 insertions(+), 72 deletions(-)

diff --git a/bootstrap.conf b/bootstrap.conf
index 240754b..ac85adf 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -56,6 +56,7 @@ mkstemp
 mktime
 progname
 propername
+rawmemchr
 readme-release
 regex
 sh-quote
diff --git a/src/cmp.c b/src/cmp.c
index 97473c9..9e35b07 100644
--- a/src/cmp.c
+++ b/src/cmp.c
@@ -52,7 +52,7 @@
 static int cmp (void);
 static off_t file_position (int);
 static size_t block_compare (word const *, word const *) _GL_ATTRIBUTE_PURE;
-static size_t block_compare_and_count (word const *, word const *, off_t *);
+static size_t count_newlines (char *, size_t);
 static void sprintc (char *, unsigned char);
 
 /* Filenames of the compared files.  */
@@ -448,20 +448,23 @@ cmp (void)
       if (read1 == SIZE_MAX)
        error (EXIT_TROUBLE, errno, "%s", file[1]);
 
-      /* Insert sentinels for the block compare.  */
+      smaller = MIN (read0, read1);
 
-      buf0[read0] = ~buf1[read0];
-      buf1[read1] = ~buf0[read1];
+      /* Optimize the common case where the buffers are the same.  */
+      if (memcmp (buf0, buf1, smaller) == 0)
+       first_diff = smaller;
+      else
+       {
+         /* Insert sentinels for the block compare.  */
+         buf0[read0] = ~buf1[read0];
+         buf1[read1] = ~buf0[read1];
 
-      /* If the line number should be written for differing files,
-        compare the blocks and count the number of newlines
-        simultaneously.  */
-      first_diff = (comparison_type == type_first_diff
-                   ? block_compare_and_count (buffer0, buffer1, &line_number)
-                   : block_compare (buffer0, buffer1));
+         first_diff = block_compare (buffer0, buffer1);
+       }
 
       byte_number += first_diff;
-      smaller = MIN (read0, read1);
+      if (comparison_type == type_first_diff)
+       line_number += count_newlines (buf0, first_diff);
 
       if (first_diff < smaller)
        {
@@ -567,54 +570,6 @@ cmp (void)
   return differing == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
 }
 
-/* Compare two blocks of memory P0 and P1 until they differ,
-   and count the number of '\n' occurrences in the common
-   part of P0 and P1.
-   If the blocks are not guaranteed to be different, put sentinels at the ends
-   of the blocks before calling this function.
-
-   Return the offset of the first byte that differs.
-   Increment *COUNT by the count of '\n' occurrences.  */
-
-static size_t
-block_compare_and_count (word const *p0, word const *p1, off_t *count)
-{
-  word l;              /* One word from first buffer. */
-  word const *l0, *l1; /* Pointers into each buffer. */
-  char const *c0, *c1; /* Pointers for finding exact address. */
-  size_t cnt = 0;      /* Number of '\n' occurrences. */
-  word nnnn;           /* Newline, sizeof (word) times.  */
-  int i;
-
-  nnnn = 0;
-  for (i = 0; i < sizeof nnnn; i++)
-    nnnn = (nnnn << CHAR_BIT) | '\n';
-
-  /* Find the rough position of the first difference by reading words,
-     not bytes.  */
-
-  for (l0 = p0, l1 = p1;  (l = *l0) == *l1;  l0++, l1++)
-    {
-      l ^= nnnn;
-      for (i = 0; i < sizeof l; i++)
-       {
-         unsigned char uc = l;
-         cnt += ! uc;
-         l >>= CHAR_BIT;
-       }
-    }
-
-  /* Find the exact differing position (endianness independent).  */
-
-  for (c0 = (char const *) l0, c1 = (char const *) l1;
-       *c0 == *c1;
-       c0++, c1++)
-    cnt += *c0 == '\n';
-
-  *count += cnt;
-  return c0 - (char const *) p0;
-}
-
 /* Compare two blocks of memory P0 and P1 until they differ.
    If the blocks are not guaranteed to be different, put sentinels at the ends
    of the blocks before calling this function.
@@ -643,6 +598,21 @@ block_compare (word const *p0, word const *p1)
   return c0 - (char const *) p0;
 }
 
+/* Return the number of newlines in BUF, of size BUFSIZE,
+   where BUF[NBYTES] is available for use as a sentinel.  */
+
+static size_t
+count_newlines (char *buf, size_t bufsize)
+{
+  size_t count = 0;
+  char *p;
+  char *lim = buf + bufsize;
+  *lim = '\n';
+  for (p = buf; (p = rawmemchr (p, '\n')) != lim; p++)
+    count++;
+  return count;
+}
+
 /* Put into BUF the unsigned char C, making unprintable bytes
    visible by quoting like cat -t does.  */
 
diff --git a/src/io.c b/src/io.c
index 463ee35..05a898c 100644
--- a/src/io.c
+++ b/src/io.c
@@ -474,7 +474,6 @@ prepare_text (struct file_data *current)
 {
   size_t buffered = current->buffered;
   char *p = FILE_BUFFER (current);
-  char *dst;
 
   if (buffered == 0 || p[buffered - 1] == '\n')
     current->missing_newline = false;
@@ -490,16 +489,25 @@ prepare_text (struct file_data *current)
   /* Don't use uninitialized storage when planting or using sentinels.  */
   memset (p + buffered, 0, sizeof (word));
 
-  if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
+  if (strip_trailing_cr)
     {
-      char const *src = dst;
-      char const *srclim = p + buffered;
+      char *dst;
+      char *srclim = p + buffered;
+      *srclim = '\r';
+      dst = rawmemchr (p, '\r');
 
-      do
-       dst += ! ((*dst = *src++) == '\r' && *src == '\n');
-      while (src < srclim);
+      if (dst != srclim)
+       {
+         char const *src = dst;
+         do
+           {
+             *dst = *src++;
+             dst += ! (*dst == '\r' && *src == '\n');
+           }
+         while (src < srclim);
 
-      buffered -= src - dst;
+         buffered -= src - dst;
+       }
     }
 
   current->buffered = buffered;
diff --git a/src/sdiff.c b/src/sdiff.c
index b7f9f6a..e7bc657 100644
--- a/src/sdiff.c
+++ b/src/sdiff.c
@@ -379,8 +379,8 @@ lf_copy (struct line_filter *lf, lin lines, FILE *outfile)
 
   while (lines)
     {
-      lf->bufpos = (char *) memchr (lf->bufpos, '\n', lf->buflim - lf->bufpos);
-      if (! lf->bufpos)
+      lf->bufpos = rawmemchr (lf->bufpos, '\n');
+      if (lf->bufpos == lf->buflim)
        {
          ck_fwrite (start, lf->buflim - start, outfile);
          if (! lf_refill (lf))
@@ -403,8 +403,8 @@ lf_skip (struct line_filter *lf, lin lines)
 {
   while (lines)
     {
-      lf->bufpos = (char *) memchr (lf->bufpos, '\n', lf->buflim - lf->bufpos);
-      if (! lf->bufpos)
+      lf->bufpos = rawmemchr (lf->bufpos, '\n');
+      if (lf->bufpos == lf->buflim)
        {
          if (! lf_refill (lf))
            break;
@@ -424,7 +424,7 @@ lf_snarf (struct line_filter *lf, char *buffer, size_t 
bufsize)
   for (;;)
     {
       char *start = lf->bufpos;
-      char *next = (char *) memchr (start, '\n', lf->buflim + 1 - start);
+      char *next = rawmemchr (start, '\n');
       size_t s = next - start;
       if (bufsize <= s)
        return 0;
-- 
1.7.11.7





reply via email to

[Prev in Thread] Current Thread [Next in Thread]