emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Binary Search Tree and Treap Functions bst-assq and treap-put


From: Andy Sonnenburg
Subject: Re: Binary Search Tree and Treap Functions bst-assq and treap-put
Date: Sun, 4 Dec 2016 12:39:53 -0500

I've attached the patch file and reproduced the diff below (I'm not sure which form is preferred).

diff --git src/fns.c src/fns.c
index dfc7842..5f3cacc 100644
--- src/fns.c
+++ src/fns.c
@@ -21,6 +21,7 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 #include <config.h>
 
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 #include <filevercmp.h>
 #include <intprops.h>
@@ -4751,7 +4752,181 @@ returns nil, then (funcall TEST x1 x2) also returns nil.  */)
   return Fput (name, Qhash_table_test, list2 (test, hash));
 }
 
+#define EMACS_CAT_I(a, b) a ## b
+#define EMACS_CAT(a, b) EMACS_CAT_I(a, b)
+#define EMACS_LT(x, y) (XLI (x) < XLI (y))
+#define EMACS_BST_VECTOR_SIZE 4
+#define EMACS_FNV_PRIME 1099511628211
+#define EMACS_FNV_OFFSET_BASIS 14695981039346656037
+
+DEFUN ("bst-assq", Fbst_assq, Sbst_assq,
+       2, 2, 0,
+       doc: /* Return the value whose key is `eq' to KEY in BST.
+BST is either a vector whose first element is a key, second element is a value,
+and third and fourth elements are left and right children; or a non-vector.  The
+left child's key must be less than BST's key.  The right child's key must be
+greater than BST's key.  Non-vector descendants of BST are ignored.  */)
+  (Lisp_Object new_key, Lisp_Object bst)
+{
+  const struct Lisp_Vector *vector;
+  const Lisp_Object *contents;
+  Lisp_Object key;
+  for (;;)
+    {
+      if (!VECTORP (bst))
+        {
+          return Qnil;
+        }
+      vector = XVECTOR (bst);
+      if (vector->header.size != EMACS_BST_VECTOR_SIZE)
+        {
+          args_out_of_range_3  (bst,
+                                make_number (0),
+                                make_number (EMACS_BST_VECTOR_SIZE));
+        }
+      contents = vector->contents;
+      key = contents[0];
+      if (EQ (key, new_key))
+        {
+          return contents[1];
+        }
+      bst = contents[2 + EMACS_LT (key, new_key)];
+    }
+}
+
+static uint64_t
+fnv_1a_hash (const void *first, const void *last)
+{
+  const unsigned char *i;
+  uint64_t hash;
+  hash = (uint64_t) EMACS_CAT(EMACS_FNV_OFFSET_BASIS, ull);
+  for (i = (const unsigned char *) first; i != last; ++i)
+    {
+      hash ^= *i;
+      hash *= (uint64_t) EMACS_CAT(EMACS_FNV_PRIME, ull);
+    }
+  return hash;
+}
+
+static Lisp_Object
+treap_singleton (Lisp_Object key, Lisp_Object value, Lisp_Object nil)
+{
+  Lisp_Object result, *contents;
+  result = make_uninit_vector (EMACS_BST_VECTOR_SIZE);
+  contents = XVECTOR (result)->contents;
+  contents[0] = key;
+  contents[1] = value;
+  contents[2] = nil;
+  contents[3] = nil;
+  return result;
+}
+
+static void
+rotate (Lisp_Object *a, Lisp_Object *b, Lisp_Object *c)
+{
+  Lisp_Object d;
+  d = *a;
+  *a = *b;
+  *b = *c;
+  *c = d;
+}
+
+static void
+tree_rotate (Lisp_Object *root, int rotation_index, int opposite_index)
+{
+  Lisp_Object *opposite, *rotation;
+  opposite = XVECTOR (*root)->contents + opposite_index;
+  rotation = XVECTOR (*opposite)->contents + rotation_index;
+  rotate (opposite, rotation, root);
+}
+
+static Lisp_Object
+make_lisp_vector (struct Lisp_Vector *vector)
+{
+  return XIL (TAG_PTR (Lisp_Vectorlike, vector));
+}
+
+static uint64_t
+treap_hash (Lisp_Object object)
+{
+  EMACS_INT i;
+  i = XLI (object);
+  return fnv_1a_hash (&i, &i + 1);
+}
+
+static Lisp_Object
+treap_put (Lisp_Object, uint64_t, Lisp_Object, Lisp_Object);
+
+static Lisp_Object
+treap_vector_put (Lisp_Object new_key,
+                  uint64_t new_hash,
+                  Lisp_Object new_value,
+                  struct Lisp_Vector *vector)
+{
+  const Lisp_Object *contents;
+  Lisp_Object key, value, new_treap, *new_contents;
+  int i, j;
+  if (vector->header.size != EMACS_BST_VECTOR_SIZE)
+    {
+      args_out_of_range_3 (make_lisp_vector (vector),
+                           make_number (0),
+                           make_number (EMACS_BST_VECTOR_SIZE));
+    }
+  contents = vector->contents;
+  key = contents[0];
+  value = contents[1];
+  new_treap = make_uninit_vector (EMACS_BST_VECTOR_SIZE);
+  new_contents = XVECTOR (new_treap)->contents;
+  new_contents[0] = key;
+  if (EQ (key, new_key))
+    {
+      new_contents[1] = new_value;
+      memcpy (new_contents + 2, contents + 2, 2 * sizeof *new_contents);
+      return new_treap;
+    }
+  new_contents[1] = value;
+  i = 2 + EMACS_LT (key, new_key);
+  j = EMACS_BST_VECTOR_SIZE + 1 - i;
+  new_contents[i] = treap_put (new_key, new_hash, new_value, contents[i]);
+  new_contents[j] = contents[j];
+  if (treap_hash (key) < new_hash)
+    {
+      tree_rotate (&new_treap, j, i);
+    }
+  return new_treap;
+}
+
+static Lisp_Object
+treap_put (Lisp_Object key, uint64_t hash, Lisp_Object value, Lisp_Object treap)
+{
+  if (VECTORP (treap))
+    {
+      return treap_vector_put (key, hash, value, XVECTOR (treap));
+    }
+  return treap_singleton (key, value, treap);
+}
+
+DEFUN ("treap-put", Ftreap_put, Streap_put,
+       3, 3, 0,
+       doc: /* Associate KEY with VALUE in a treap derived from TREAP.
+If KEY is already present in TREAP, return a treap with VALUE replacing the
+existing value.  TREAP will not be modified, though sharing of structure between
+the result treap and TREAP may occur.  */)
+  (Lisp_Object key, Lisp_Object value, Lisp_Object treap)
+{
+  if (VECTORP (treap))
+    {
+      return treap_vector_put (key, treap_hash (key), value, XVECTOR (treap));
+    }
+  return treap_singleton (key, value, treap);
+}
 
+#undef EMACS_FNV_OFFSET_BASIS
+#undef EMACS_FNV_PRIME
+#undef EMACS_BST_VECTOR_SIZE
+#undef EMACS_LT
+#undef EMACS_CAT
+#undef EMACS_CAT_I
 
 /************************************************************************
  MD5, SHA-1, and SHA-2
@@ -5232,4 +5407,6 @@ this variable.  */);
   defsubr (&Ssecure_hash);
   defsubr (&Sbuffer_hash);
   defsubr (&Slocale_info);
+  defsubr (&Sbst_assq);
+  defsubr (&Streap_put);
 }
diff --git test/src/fns-tests.el test/src/fns-tests.el
index c533bad..1ef393e 100644
--- test/src/fns-tests.el
+++ test/src/fns-tests.el
@@ -245,3 +245,11 @@
   (let ((data '((foo) (bar))))
     (should (equal (mapcan #'identity data) '(foo bar)))
     (should (equal data                     '((foo bar) (bar))))))
+
+(ert-deftest fns-tests-treap-put ()
+  (let ((n 64)
+        (treap nil))
+    (dotimes (i n)
+      (dotimes (j i) (should (equal (bst-assq j treap) j)))
+      (dotimes (j (- n i)) (should (equal (bst-assq (+ i j) treap) nil)))
+      (setq treap (treap-put i i treap)))))


On Sun, Dec 4, 2016 at 12:13 PM, Andy Sonnenburg <address@hidden> wrote:

It is written in C.  The only real reason C was used was performance concerns, real or imagined.  I can post a diff of the changes - it isn't that many lines.


On Dec 4, 2016 12:04 PM, "Stefan Monnier" <address@hidden> wrote:
> That's too bad (I mean, its good for performance, but unfortunate one of
> the use cases doesn't exist).  However, the treap functions may still be of
> general use.  Let me know if there is any interest.  They are documented
> and tested.  They fill a gap between alists (persistent, linear lookup) and
> hash tables (ephemeral, constant lookup) by being persistent while
> providing average case logarithmic lookup.

Is it written in C or Elisp?  If it's Elisp, then we definitely would
welcome it into GNU ELPA (there is already an avl-tree implementation in
Emacs itself at lisp/emacs-lisp/avl-tree.el, but the more the merrier).
If it's written C, I'll let others decide whether we want to include it.


        Stefan

Attachment: treap.patch
Description: Text Data


reply via email to

[Prev in Thread] Current Thread [Next in Thread]