--- Begin Message ---
Subject: |
Re: [bug-diffutils] Multithreading support? |
Date: |
Sun, 1 Sep 2013 22:57:42 +0200 |
Hi!
I finally tested the patches on my Netgear WNDR3700v2 router running
OpenWRT 12.09 "Attitude Ajdustment".
I tested the "builtin" cmp (part of busybox), and versions of cmp
compiled by me:
- unmodified diffutils-3.3 ("./cmp")
- patch # define word size_t ("./cmpp1")
- additionally patch tune by using rawmemchr ("./cmpp2")
I did two tests:
- test file with random contents:
dd if=/dev/urandom of=test bs=1M count=17
time cmp test test # busybox cmp reads the inputs even if they are the same file
1.22 seconds
time cat test | ./cmp test -
real 0m 0.90s
time cat test | ./cmpp1 test -
real 0m 0.80s
time cat test | ./cmpp2 test -
real 0m 0.60s
(average of multiple runs)
- test with /dev/zero
# dd if=/dev/zero bs=1M count=64 | time ./cmp - /dev/zero
64+0 records in
64+0 records out
cmp: EOF on -
Command exited with non-zero status 1
real 0m 2.79s
user 0m 1.12s
sys 0m 0.94s
# dd if=/dev/zero bs=1M count=64 | time ./cmpp1 - /dev/zero
64+0 records in
64+0 records out
cmp: EOF on -
Command exited with non-zero status 1
real 0m 2.53s
user 0m 1.13s
sys 0m 0.80s
# dd if=/dev/zero bs=1M count=64 | time ./cmpp2 - /dev/zero
64+0 records in
64+0 records out
cmp: EOF on -
Command exited with non-zero status 1
real 0m 1.80s
user 0m 0.32s
sys 0m 0.69s
So both patches seem to improve speed on this machine, especially the second.
Regards,
David
On 23 August 2013 00:59, Paul Eggert <address@hidden> wrote:
> On 08/13/13 08:48, David Balažic wrote:
>> I'll try to benchmark it on my ARM based router, when time permits.
>
> Thanks. While you're at it, could you also benchmark the following
> patch, which I just now pushed to the diffutils git master on
> Savannah?
>
> From 6749fe1ddd6805828b526c73b2f1a579eb0d9f63 Mon Sep 17 00:00:00 2001
> From: Paul Eggert <address@hidden>
> Date: Thu, 22 Aug 2013 15:45:56 -0700
> Subject: [PATCH] cmp, diff, sdiff: tune by using rawmemchr
>
> On my platform (AMD Phenom II X4 910e, Fedora 17 x86-64), this sped up
> 'cmp -n 8GiB /dev/full /dev/zero' by a factor of 3.8, and
> 'cmp -sn 8GiB /dev/full /dev/zero' by a factor of 1.8.
> * bootstrap.conf (gnulib_modules): Add rawmemchr.
> * src/cmp.c (cmp): Optimize the common case where buffers are the same,
> by using count_newlines rather than block_compare_and_count.
> (block_compare_and_count): Remove.
> (count_newlines): New function.
> * src/cmp.c (count_newlines):
> * src/io.c (prepare_text):
> * src/sdiff.c (lf_copy, lf_skip, lf_snarf):
> Use rawmemchr instead of memchr, for speed.
> ---
> bootstrap.conf | 1 +
> src/cmp.c | 88
> +++++++++++++++++++---------------------------------------
> src/io.c | 24 ++++++++++------
> src/sdiff.c | 10 +++----
> 4 files changed, 51 insertions(+), 72 deletions(-)
>
> diff --git a/bootstrap.conf b/bootstrap.conf
> index 240754b..ac85adf 100644
> --- a/bootstrap.conf
> +++ b/bootstrap.conf
> @@ -56,6 +56,7 @@ mkstemp
> mktime
> progname
> propername
> +rawmemchr
> readme-release
> regex
> sh-quote
> diff --git a/src/cmp.c b/src/cmp.c
> index 97473c9..9e35b07 100644
> --- a/src/cmp.c
> +++ b/src/cmp.c
> @@ -52,7 +52,7 @@
> static int cmp (void);
> static off_t file_position (int);
> static size_t block_compare (word const *, word const *) _GL_ATTRIBUTE_PURE;
> -static size_t block_compare_and_count (word const *, word const *, off_t *);
> +static size_t count_newlines (char *, size_t);
> static void sprintc (char *, unsigned char);
>
> /* Filenames of the compared files. */
> @@ -448,20 +448,23 @@ cmp (void)
> if (read1 == SIZE_MAX)
> error (EXIT_TROUBLE, errno, "%s", file[1]);
>
> - /* Insert sentinels for the block compare. */
> + smaller = MIN (read0, read1);
>
> - buf0[read0] = ~buf1[read0];
> - buf1[read1] = ~buf0[read1];
> + /* Optimize the common case where the buffers are the same. */
> + if (memcmp (buf0, buf1, smaller) == 0)
> + first_diff = smaller;
> + else
> + {
> + /* Insert sentinels for the block compare. */
> + buf0[read0] = ~buf1[read0];
> + buf1[read1] = ~buf0[read1];
>
> - /* If the line number should be written for differing files,
> - compare the blocks and count the number of newlines
> - simultaneously. */
> - first_diff = (comparison_type == type_first_diff
> - ? block_compare_and_count (buffer0, buffer1, &line_number)
> - : block_compare (buffer0, buffer1));
> + first_diff = block_compare (buffer0, buffer1);
> + }
>
> byte_number += first_diff;
> - smaller = MIN (read0, read1);
> + if (comparison_type == type_first_diff)
> + line_number += count_newlines (buf0, first_diff);
>
> if (first_diff < smaller)
> {
> @@ -567,54 +570,6 @@ cmp (void)
> return differing == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
> }
>
> -/* Compare two blocks of memory P0 and P1 until they differ,
> - and count the number of '\n' occurrences in the common
> - part of P0 and P1.
> - If the blocks are not guaranteed to be different, put sentinels at the
> ends
> - of the blocks before calling this function.
> -
> - Return the offset of the first byte that differs.
> - Increment *COUNT by the count of '\n' occurrences. */
> -
> -static size_t
> -block_compare_and_count (word const *p0, word const *p1, off_t *count)
> -{
> - word l; /* One word from first buffer. */
> - word const *l0, *l1; /* Pointers into each buffer. */
> - char const *c0, *c1; /* Pointers for finding exact address. */
> - size_t cnt = 0; /* Number of '\n' occurrences. */
> - word nnnn; /* Newline, sizeof (word) times. */
> - int i;
> -
> - nnnn = 0;
> - for (i = 0; i < sizeof nnnn; i++)
> - nnnn = (nnnn << CHAR_BIT) | '\n';
> -
> - /* Find the rough position of the first difference by reading words,
> - not bytes. */
> -
> - for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
> - {
> - l ^= nnnn;
> - for (i = 0; i < sizeof l; i++)
> - {
> - unsigned char uc = l;
> - cnt += ! uc;
> - l >>= CHAR_BIT;
> - }
> - }
> -
> - /* Find the exact differing position (endianness independent). */
> -
> - for (c0 = (char const *) l0, c1 = (char const *) l1;
> - *c0 == *c1;
> - c0++, c1++)
> - cnt += *c0 == '\n';
> -
> - *count += cnt;
> - return c0 - (char const *) p0;
> -}
> -
> /* Compare two blocks of memory P0 and P1 until they differ.
> If the blocks are not guaranteed to be different, put sentinels at the
> ends
> of the blocks before calling this function.
> @@ -643,6 +598,21 @@ block_compare (word const *p0, word const *p1)
> return c0 - (char const *) p0;
> }
>
> +/* Return the number of newlines in BUF, of size BUFSIZE,
> + where BUF[NBYTES] is available for use as a sentinel. */
> +
> +static size_t
> +count_newlines (char *buf, size_t bufsize)
> +{
> + size_t count = 0;
> + char *p;
> + char *lim = buf + bufsize;
> + *lim = '\n';
> + for (p = buf; (p = rawmemchr (p, '\n')) != lim; p++)
> + count++;
> + return count;
> +}
> +
> /* Put into BUF the unsigned char C, making unprintable bytes
> visible by quoting like cat -t does. */
>
> diff --git a/src/io.c b/src/io.c
> index 463ee35..05a898c 100644
> --- a/src/io.c
> +++ b/src/io.c
> @@ -474,7 +474,6 @@ prepare_text (struct file_data *current)
> {
> size_t buffered = current->buffered;
> char *p = FILE_BUFFER (current);
> - char *dst;
>
> if (buffered == 0 || p[buffered - 1] == '\n')
> current->missing_newline = false;
> @@ -490,16 +489,25 @@ prepare_text (struct file_data *current)
> /* Don't use uninitialized storage when planting or using sentinels. */
> memset (p + buffered, 0, sizeof (word));
>
> - if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
> + if (strip_trailing_cr)
> {
> - char const *src = dst;
> - char const *srclim = p + buffered;
> + char *dst;
> + char *srclim = p + buffered;
> + *srclim = '\r';
> + dst = rawmemchr (p, '\r');
>
> - do
> - dst += ! ((*dst = *src++) == '\r' && *src == '\n');
> - while (src < srclim);
> + if (dst != srclim)
> + {
> + char const *src = dst;
> + do
> + {
> + *dst = *src++;
> + dst += ! (*dst == '\r' && *src == '\n');
> + }
> + while (src < srclim);
>
> - buffered -= src - dst;
> + buffered -= src - dst;
> + }
> }
>
> current->buffered = buffered;
> diff --git a/src/sdiff.c b/src/sdiff.c
> index b7f9f6a..e7bc657 100644
> --- a/src/sdiff.c
> +++ b/src/sdiff.c
> @@ -379,8 +379,8 @@ lf_copy (struct line_filter *lf, lin lines, FILE *outfile)
>
> while (lines)
> {
> - lf->bufpos = (char *) memchr (lf->bufpos, '\n', lf->buflim -
> lf->bufpos);
> - if (! lf->bufpos)
> + lf->bufpos = rawmemchr (lf->bufpos, '\n');
> + if (lf->bufpos == lf->buflim)
> {
> ck_fwrite (start, lf->buflim - start, outfile);
> if (! lf_refill (lf))
> @@ -403,8 +403,8 @@ lf_skip (struct line_filter *lf, lin lines)
> {
> while (lines)
> {
> - lf->bufpos = (char *) memchr (lf->bufpos, '\n', lf->buflim -
> lf->bufpos);
> - if (! lf->bufpos)
> + lf->bufpos = rawmemchr (lf->bufpos, '\n');
> + if (lf->bufpos == lf->buflim)
> {
> if (! lf_refill (lf))
> break;
> @@ -424,7 +424,7 @@ lf_snarf (struct line_filter *lf, char *buffer, size_t
> bufsize)
> for (;;)
> {
> char *start = lf->bufpos;
> - char *next = (char *) memchr (start, '\n', lf->buflim + 1 - start);
> + char *next = rawmemchr (start, '\n');
> size_t s = next - start;
> if (bufsize <= s)
> return 0;
> --
> 1.7.11.7
>
>
--- End Message ---