[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#6669: [PATCH] sort: -R no longer disables multithreading
From: |
Paul Eggert |
Subject: |
bug#6669: [PATCH] sort: -R no longer disables multithreading |
Date: |
Mon, 19 Jul 2010 11:04:29 -0700 |
User-agent: |
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.10) Gecko/20100527 Thunderbird/3.0.5 |
While looking into the random-number issues I mentioned last week I
discovered that sort's -R flag disables the new multithreading stuff.
(Actually, I knew that, but I'd forgotten.) This is pretty easy to
fix, albeit at the theoretical price of not being random when there
are MD5 collisions. I don't think MD5 collisions are a problem in
practice with sort -R, but if they do turn out to be a problem we
should simply use a better checksum algorithm. So I took the liberty
of installing the following, which simplifies the code. (I don't see
a reasonable way to test this sort of patch.)
>From ce379d4449f30ff33daa272978869eb517a62467 Mon Sep 17 00:00:00 2001
From: Paul R. Eggert <address@hidden>
Date: Mon, 19 Jul 2010 10:46:41 -0700
Subject: [PATCH] sort: -R no longer disables multithreading
* src/sort.c (random_md5_state): New static var.
(random_md5_state_init): New function, to initialize random_md5_state.
(random_state, randread_source): Remove.
(cmp_hashes): Use random_md5_state rather than random_state.
Break ties using memcmp, not by getting more randomness.
If MD5 collisions turn into a problem in practice, we should
simply use a better checksum.
(main): If -R is given, call random_md5_state_init rather than
going single-threaded.
---
src/sort.c | 80 ++++++++++++++++++++---------------------------------------
1 files changed, 27 insertions(+), 53 deletions(-)
diff --git a/src/sort.c b/src/sort.c
index 960df74..afa45a8 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -2028,42 +2028,23 @@ getmonth (char const *month, size_t len, char const
**ea)
return 0;
}
-/* A source of random data. */
-static struct randread_source *randread_source;
+/* A randomly chosen MD5 state, used for random comparison. */
+static struct md5_ctx random_md5_state;
-/* Return the Ith randomly-generated state. The caller must invoke
- random_state (H) for all H less than I before invoking random_state
- (I). */
+/* Initialize the randomly chosen MD5 state. */
-static struct md5_ctx
-random_state (size_t i)
+static void
+random_md5_state_init (char const *random_source)
{
- /* An array of states resulting from the random data, and counts of
- its used and allocated members. */
- static struct md5_ctx *state;
- static size_t used;
- static size_t allocated;
-
- struct md5_ctx *s = &state[i];
-
- if (used <= i)
- {
- unsigned char buf[MD5_DIGEST_SIZE];
-
- used++;
-
- if (allocated <= i)
- {
- state = X2NREALLOC (state, &allocated);
- s = &state[i];
- }
-
- randread (randread_source, buf, sizeof buf);
- md5_init_ctx (s);
- md5_process_bytes (buf, sizeof buf, s);
- }
-
- return *s;
+ unsigned char buf[MD5_DIGEST_SIZE];
+ struct randread_source *r = randread_new (random_source, sizeof buf);
+ if (! r)
+ die (_("open failed"), random_source);
+ randread (r, buf, sizeof buf);
+ if (randread_free (r) != 0)
+ die (_("close failed"), random_source);
+ md5_init_ctx (&random_md5_state);
+ md5_process_bytes (buf, sizeof buf, &random_md5_state);
}
/* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
@@ -2078,19 +2059,19 @@ cmp_hashes (char const *texta, size_t lena,
first pair of random hashes agree, check whether the keys are
identical and if so report no difference. */
int diff;
- size_t i;
- for (i = 0; ; i++)
+ uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
+ struct md5_ctx s[2];
+ s[0] = s[1] = random_md5_state;
+ md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
+ md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
+ diff = memcmp (dig[0], dig[1], sizeof dig[0]);
+ if (! diff)
{
- uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
- struct md5_ctx s[2];
- s[0] = s[1] = random_state (i);
- md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
- md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
- diff = memcmp (dig[0], dig[1], sizeof dig[0]);
- if (diff != 0)
- break;
- if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
- break;
+ /* Break ties with memcmp. This is good enough given the
+ astronomically low probability of an MD5 collision. */
+ diff = memcmp (texta, textb, MIN (lena, lenb));
+ if (! diff)
+ diff = lena < lenb ? -1 : lena != lenb;
}
return diff;
@@ -4485,14 +4466,7 @@ main (int argc, char **argv)
reverse = gkey.reverse;
if (need_random)
- {
- /* Threading does not lock the randread_source structure, so
- downgrade to one thread to avoid race conditions. */
- nthreads = 1;
- randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
- if (! randread_source)
- die (_("open failed"), random_source);
- }
+ random_md5_state_init (random_source);
if (temp_dir_count == 0)
{
--
1.7.1
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- bug#6669: [PATCH] sort: -R no longer disables multithreading,
Paul Eggert <=