diff --git a/gl/lib/randperm.c b/gl/lib/randperm.c
index 97c8d9a..9b0f03d 100644
--- a/gl/lib/randperm.c
+++ b/gl/lib/randperm.c
@@ -22,6 +22,7 @@
 #include "randperm.h"
 
 #include <limits.h>
+#include <stdlib.h>
 
 #include "xalloc.h"
 
@@ -57,6 +58,73 @@ randperm_bound (size_t h, size_t n)
   return bound;
 }
 
+#include "hash.h"
+
+/* Use this to suppress gcc's `...may be used before initialized' warnings. */
+#ifdef lint
+# define IF_LINT(Code) Code
+#else
+# define IF_LINT(Code) /* empty */
+#endif
+
+struct val_ent
+{
+   size_t index;
+   size_t val;
+};
+
+static size_t
+val_hash (void const *x, size_t table_size)
+{
+  struct val_ent const *ent = x;
+  return ent->index % table_size;
+}
+
+static bool
+val_cmp (void const *x, void const *y)
+{
+  struct val_ent const *ent1 = x;
+  struct val_ent const *ent2 = y;
+  return ent1->index == ent2->index;
+}
+
+static void
+sparse_swap (Hash_table *v, size_t* rv, size_t i, size_t j)
+{
+  struct val_ent *v1 = hash_delete (v, &(struct val_ent) {i,0});
+  struct val_ent *v2 = hash_delete (v, &(struct val_ent) {j,0});
+  if (!v1)
+    {
+      v1 = xmalloc (sizeof *v1);
+      v1->index = v1->val = i;
+    }
+  if (!v2)
+    {
+      v2 = xmalloc (sizeof *v2);
+      v2->index = v2->val = j;
+    }
+
+  size_t t = v1->val;
+  v1->val = v2->val;
+  v2->val = t;
+  if (!hash_insert (v, v1))
+    xalloc_die();
+  if (!hash_insert (v, v2))
+    xalloc_die();
+
+  /* As an optimization, keep the array of values we're returning,
+     updated here.  */
+  rv[i] = v1->val;
+}
+
+static void
+swap (size_t *v, size_t i, size_t j)
+{
+  size_t t = v[i];
+  v[i] = v[j];
+  v[j] = t;
+}
+
 /* From R, allocate and return a malloc'd array of the first H elements
    of a random permutation of N elements.  H must not exceed N.
    Return NULL if H is zero.  */
@@ -79,21 +147,64 @@ randperm_new (struct randint_source *r, size_t h, size_t n)
 
     default:
       {
+        /* The algorithm is essentially the same in both
+           the sparse and non sparse case.  In the sparse case we use
+           a hash to implement sparse storage for the set of n numbers
+           we're shuffling.  When to use the sparse method was
+           determined with the help of this script:
+
+           #!/bin/sh
+           for n in $(seq 2 32); do
+             for h in $(seq 2 32); do
+               test $h -gt $n && continue
+               for s in o n; do
+                 test $s = o && shuf=shuf || shuf=./shuf
+                 num=$(env time -f "$s:${h},${n} = %e,%M" \
+                       $shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l)
+                 test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2
+               done
+             done
+           done
+
+           This showed that if sparseness = n/h, then:
+
+           sparseness = 128 => .125 mem used, and about same speed
+           sparseness =  64 => .25  mem used, but 1.5 times slower
+           sparseness =  32 => .5   mem used, but 2 times slower
+
+           Also the memory usage was only significant when n > 128Ki
+        */
+        bool sparse = (n >= (128 * 1024)) && (n / h >= 32);
+
         size_t i;
+        Hash_table *sparse_v;
 
-        v = xnmalloc (n, sizeof *v);
-        for (i = 0; i < n; i++)
-          v[i] = i;
+        if (sparse)
+          {
+            sparse_v = hash_initialize (h, NULL, val_hash, val_cmp, free);
+            v = xnmalloc (h, sizeof *v);
+          }
+        else
+          {
+            IF_LINT (sparse_v = NULL);
+            v = xnmalloc (n, sizeof *v);
+            for (i = 0; i < n; i++)
+              v[i] = i;
+          }
 
         for (i = 0; i < h; i++)
           {
             size_t j = i + randint_choose (r, n - i);
-            size_t t = v[i];
-            v[i] = v[j];
-            v[j] = t;
+            if (sparse)
+              sparse_swap (sparse_v, v, i, j);
+            else
+              swap (v, i, j);
           }
 
-        v = xnrealloc (v, h, sizeof *v);
+        if (sparse)
+          hash_free (sparse_v);
+        else
+          v = xnrealloc (v, h, sizeof *v);
       }
       break;
     }