diff --git a/gl/lib/randperm.c b/gl/lib/randperm.c index 97c8d9a..9b0f03d 100644 --- a/gl/lib/randperm.c +++ b/gl/lib/randperm.c @@ -22,6 +22,7 @@ #include "randperm.h" #include +#include #include "xalloc.h" @@ -57,6 +58,73 @@ randperm_bound (size_t h, size_t n) return bound; } +#include "hash.h" + +/* Use this to suppress gcc's `...may be used before initialized' warnings. */ +#ifdef lint +# define IF_LINT(Code) Code +#else +# define IF_LINT(Code) /* empty */ +#endif + +struct val_ent +{ + size_t index; + size_t val; +}; + +static size_t +val_hash (void const *x, size_t table_size) +{ + struct val_ent const *ent = x; + return ent->index % table_size; +} + +static bool +val_cmp (void const *x, void const *y) +{ + struct val_ent const *ent1 = x; + struct val_ent const *ent2 = y; + return ent1->index == ent2->index; +} + +static void +sparse_swap (Hash_table *v, size_t* rv, size_t i, size_t j) +{ + struct val_ent *v1 = hash_delete (v, &(struct val_ent) {i,0}); + struct val_ent *v2 = hash_delete (v, &(struct val_ent) {j,0}); + if (!v1) + { + v1 = xmalloc (sizeof *v1); + v1->index = v1->val = i; + } + if (!v2) + { + v2 = xmalloc (sizeof *v2); + v2->index = v2->val = j; + } + + size_t t = v1->val; + v1->val = v2->val; + v2->val = t; + if (!hash_insert (v, v1)) + xalloc_die(); + if (!hash_insert (v, v2)) + xalloc_die(); + + /* As an optimization, keep the array of values we're returning, + updated here. */ + rv[i] = v1->val; +} + +static void +swap (size_t *v, size_t i, size_t j) +{ + size_t t = v[i]; + v[i] = v[j]; + v[j] = t; +} + /* From R, allocate and return a malloc'd array of the first H elements of a random permutation of N elements. H must not exceed N. Return NULL if H is zero. */ @@ -79,21 +147,64 @@ randperm_new (struct randint_source *r, size_t h, size_t n) default: { + /* The algorithm is essentially the same in both + the sparse and non sparse case. In the sparse case we use + a hash to implement sparse storage for the set of n numbers + we're shuffling. When to use the sparse method was + determined with the help of this script: + + #!/bin/sh + for n in $(seq 2 32); do + for h in $(seq 2 32); do + test $h -gt $n && continue + for s in o n; do + test $s = o && shuf=shuf || shuf=./shuf + num=$(env time -f "$s:${h},${n} = %e,%M" \ + $shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l) + test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2 + done + done + done + + This showed that if sparseness = n/h, then: + + sparseness = 128 => .125 mem used, and about same speed + sparseness = 64 => .25 mem used, but 1.5 times slower + sparseness = 32 => .5 mem used, but 2 times slower + + Also the memory usage was only significant when n > 128Ki + */ + bool sparse = (n >= (128 * 1024)) && (n / h >= 32); + size_t i; + Hash_table *sparse_v; - v = xnmalloc (n, sizeof *v); - for (i = 0; i < n; i++) - v[i] = i; + if (sparse) + { + sparse_v = hash_initialize (h, NULL, val_hash, val_cmp, free); + v = xnmalloc (h, sizeof *v); + } + else + { + IF_LINT (sparse_v = NULL); + v = xnmalloc (n, sizeof *v); + for (i = 0; i < n; i++) + v[i] = i; + } for (i = 0; i < h; i++) { size_t j = i + randint_choose (r, n - i); - size_t t = v[i]; - v[i] = v[j]; - v[j] = t; + if (sparse) + sparse_swap (sparse_v, v, i, j); + else + swap (v, i, j); } - v = xnrealloc (v, h, sizeof *v); + if (sparse) + hash_free (sparse_v); + else + v = xnrealloc (v, h, sizeof *v); } break; }