[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
scratch/hash-table-perf 8ec0e030b66 35/35: Combine hash and next vector
From: |
Mattias Engdegård |
Subject: |
scratch/hash-table-perf 8ec0e030b66 35/35: Combine hash and next vector into a single array |
Date: |
Fri, 12 Jan 2024 10:53:27 -0500 (EST) |
branch: scratch/hash-table-perf
commit 8ec0e030b661ae50d1aef4723b4e83a03645e4ee
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>
Combine hash and next vector into a single array
This shrinks the hash object by one word and should give better
locality since these values are often accessed at the same time.
---
src/alloc.c | 16 +++++-----------
src/fns.c | 52 ++++++++++++++++++----------------------------------
src/lisp.h | 22 ++++++++++------------
src/pdumper.c | 3 +--
4 files changed, 34 insertions(+), 59 deletions(-)
diff --git a/src/alloc.c b/src/alloc.c
index 180b9995993..7f7e04415b4 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -3473,11 +3473,9 @@ cleanup_vector (struct Lisp_Vector *vector)
eassert (h->index_size > 1);
xfree (h->index);
xfree (h->key_and_value);
- xfree (h->next);
- xfree (h->hash);
+ xfree (h->hash_next);
ptrdiff_t bytes = (h->table_size * (2 * sizeof *h->key_and_value
- + sizeof *h->hash
- + sizeof *h->next)
+ + sizeof *h->hash_next)
+ h->index_size * sizeof *h->index);
hash_table_allocated_bytes -= bytes;
}
@@ -5974,13 +5972,9 @@ purecopy_hash_table (struct Lisp_Hash_Table *table)
if (table->table_size > 0)
{
- ptrdiff_t hash_bytes = table->table_size * sizeof *table->hash;
- pure->hash = pure_alloc (hash_bytes, -(int)sizeof *table->hash);
- memcpy (pure->hash, table->hash, hash_bytes);
-
- ptrdiff_t next_bytes = table->table_size * sizeof *table->next;
- pure->next = pure_alloc (next_bytes, -(int)sizeof *table->next);
- memcpy (pure->next, table->next, next_bytes);
+ ptrdiff_t hn_bytes = table->table_size * sizeof *table->hash_next;
+ pure->hash_next = pure_alloc (hn_bytes, -(int)sizeof *table->hash_next);
+ memcpy (pure->hash_next, table->hash_next, hn_bytes);
ptrdiff_t nvalues = table->table_size * 2;
ptrdiff_t kv_bytes = nvalues * sizeof *table->key_and_value;
diff --git a/src/fns.c b/src/fns.c
index 557853d42fb..a96ddad1683 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4280,13 +4280,13 @@ static void
set_hash_next_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, ptrdiff_t val)
{
eassert (idx >= 0 && idx < h->table_size);
- h->next[idx] = val;
+ h->hash_next[idx].next = val;
}
static void
set_hash_hash_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, hash_hash_t val)
{
eassert (idx >= 0 && idx < h->table_size);
- h->hash[idx] = val;
+ h->hash_next[idx].hash = val;
}
static void
set_hash_index_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, ptrdiff_t val)
@@ -4383,7 +4383,7 @@ static ptrdiff_t
HASH_NEXT (struct Lisp_Hash_Table *h, ptrdiff_t idx)
{
eassert (idx >= 0 && idx < h->table_size);
- return h->next[idx];
+ return h->hash_next[idx].next;
}
/* Return the index of the element in hash table H that is the start
@@ -4571,8 +4571,7 @@ make_hash_table (const struct hash_table_test *test,
EMACS_INT size,
if (size == 0)
{
h->key_and_value = NULL;
- h->hash = NULL;
- h->next = NULL;
+ h->hash_next = NULL;
eassert (index_size == 1);
h->index = (hash_idx_t *)empty_hash_index_vector;
h->next_free = -1;
@@ -4584,12 +4583,10 @@ make_hash_table (const struct hash_table_test *test,
EMACS_INT size,
for (ptrdiff_t i = 0; i < 2 * size; i++)
h->key_and_value[i] = HASH_UNUSED_ENTRY_KEY;
- h->hash = hash_table_alloc_bytes (size * sizeof *h->hash);
-
- h->next = hash_table_alloc_bytes (size * sizeof *h->next);
+ h->hash_next = hash_table_alloc_bytes (size * sizeof *h->hash_next);
for (ptrdiff_t i = 0; i < size - 1; i++)
- h->next[i] = i + 1;
- h->next[size - 1] = -1;
+ h->hash_next[i].next = i + 1;
+ h->hash_next[size - 1].next = -1;
h->index = hash_table_alloc_bytes (index_size * sizeof *h->index);
for (ptrdiff_t i = 0; i < index_size; i++)
@@ -4630,13 +4627,9 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
h2->key_and_value = hash_table_alloc_bytes (kv_bytes);
memcpy (h2->key_and_value, h1->key_and_value, kv_bytes);
- ptrdiff_t hash_bytes = h1->table_size * sizeof *h1->hash;
- h2->hash = hash_table_alloc_bytes (hash_bytes);
- memcpy (h2->hash, h1->hash, hash_bytes);
-
- ptrdiff_t next_bytes = h1->table_size * sizeof *h1->next;
- h2->next = hash_table_alloc_bytes (next_bytes);
- memcpy (h2->next, h1->next, next_bytes);
+ ptrdiff_t hn_bytes = h1->table_size * sizeof *h1->hash_next;
+ h2->hash_next = hash_table_alloc_bytes (hn_bytes);
+ memcpy (h2->hash_next, h1->hash_next, hn_bytes);
ptrdiff_t index_bytes = h1->index_size * sizeof *h1->index;
h2->index = hash_table_alloc_bytes (index_bytes);
@@ -4674,11 +4667,12 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
/* Allocate all the new vectors before updating *H, to
avoid problems if memory is exhausted. */
- hash_idx_t *next = hash_table_alloc_bytes (new_size * sizeof *next);
- memcpy (next, h->next, old_size * sizeof *next);
+ struct hash_next *hash_next
+ = hash_table_alloc_bytes (new_size * sizeof *hash_next);
+ memcpy (hash_next, h->hash_next, old_size * sizeof *hash_next);
for (ptrdiff_t i = old_size; i < new_size - 1; i++)
- next[i] = i + 1;
- next[new_size - 1] = -1;
+ hash_next[i].next = i + 1;
+ hash_next[new_size - 1].next = -1;
Lisp_Object *key_and_value
= hash_table_alloc_bytes (2 * new_size * sizeof *key_and_value);
@@ -4687,9 +4681,6 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
for (ptrdiff_t i = 2 * old_size; i < 2 * new_size; i++)
key_and_value[i] = HASH_UNUSED_ENTRY_KEY;
- hash_hash_t *hash = hash_table_alloc_bytes (new_size * sizeof *hash);
- memcpy (hash, h->hash, old_size * sizeof *hash);
-
ptrdiff_t old_index_size = h->index_size;
ptrdiff_t index_size = hash_index_size (new_size);
hash_idx_t *index = hash_table_alloc_bytes (index_size * sizeof *index);
@@ -4708,11 +4699,8 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
2 * old_size * sizeof *h->key_and_value);
h->key_and_value = key_and_value;
- hash_table_free_bytes (h->hash, old_size * sizeof *h->hash);
- h->hash = hash;
-
- hash_table_free_bytes (h->next, old_size * sizeof *h->next);
- h->next = next;
+ hash_table_free_bytes (h->hash_next, old_size * sizeof *h->hash_next);
+ h->hash_next = hash_next;
h->key_and_value = key_and_value;
@@ -4760,11 +4748,7 @@ hash_table_thaw (Lisp_Object hash_table)
h->index_size = index_size;
h->next_free = -1;
- h->hash = hash_table_alloc_bytes (size * sizeof *h->hash);
-
- h->next = hash_table_alloc_bytes (size * sizeof *h->next);
- for (ptrdiff_t i = 0; i < size; i++)
- h->next[i] = -1;
+ h->hash_next = hash_table_alloc_bytes (size * sizeof *h->hash_next);
h->index = hash_table_alloc_bytes (index_size * sizeof *h->index);
for (ptrdiff_t i = 0; i < index_size; i++)
diff --git a/src/lisp.h b/src/lisp.h
index 64e8e0c11c3..30e452ffbb0 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2429,6 +2429,11 @@ typedef enum {
(hash) indices. It's signed and a subtype of ptrdiff_t. */
typedef int32_t hash_idx_t;
+struct hash_next {
+ hash_hash_t hash; /* Hash value for entry. Undefined for unused entries.
*/
+ hash_idx_t next; /* Next index in bucket or free list, or -1 if last. */
+};
+
struct Lisp_Hash_Table
{
union vectorlike_header header;
@@ -2455,11 +2460,11 @@ struct Lisp_Hash_Table
|
next_free
- The table is physically split into three vectors (hash, next,
+ The table is physically split into two vectors (hash_next,
key_and_value) which may or may not be beneficial. */
hash_idx_t index_size; /* Size of the index vector. */
- hash_idx_t table_size; /* Size of the next and hash vectors. */
+ hash_idx_t table_size; /* Size of hash_next and key_and_value arrays. */
/* Bucket vector. An entry of -1 indicates no item is present,
and a nonnegative entry is the index of the first item in
@@ -2470,9 +2475,9 @@ struct Lisp_Hash_Table
Otherwise it is heap-allocated. */
hash_idx_t *index;
- /* Vector of hash codes. Unused entries have undefined values.
+ /* Vector of hash values and next indices.
This vector is table_size entries long. */
- hash_hash_t *hash;
+ struct hash_next *hash_next;
/* Vector of keys and values. The key of item I is found at index
2 * I, the value is found at index 2 * I + 1.
@@ -2484,13 +2489,6 @@ struct Lisp_Hash_Table
/* The comparison and hash functions. */
const struct hash_table_test *test;
- /* Vector used to chain entries. If entry I is free, next[I] is the
- entry number of the next free item. If entry I is non-free,
- next[I] is the index of the next entry in the collision chain,
- or -1 if there is no such entry.
- This vector is table_size entries long. */
- hash_idx_t *next;
-
/* Number of key/value entries in the table. */
hash_idx_t count;
@@ -2565,7 +2563,7 @@ INLINE hash_hash_t
HASH_HASH (const struct Lisp_Hash_Table *h, ptrdiff_t idx)
{
eassert (idx >= 0 && idx < h->table_size);
- return h->hash[idx];
+ return h->hash_next[idx].hash;
}
/* Value is the size of hash table H. */
diff --git a/src/pdumper.c b/src/pdumper.c
index 0bc3a9bd958..83406343b98 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -2717,8 +2717,7 @@ static void
hash_table_freeze (struct Lisp_Hash_Table *h)
{
h->key_and_value = hash_table_contents (h);
- h->next = NULL;
- h->hash = NULL;
+ h->hash_next = NULL;
h->index = NULL;
h->table_size = 0;
h->index_size = 0;
- scratch/hash-table-perf 9b39cee78e3 12/35: * src/print.c (print_object): Don't print hash table test if `eql`., (continued)
- scratch/hash-table-perf 9b39cee78e3 12/35: * src/print.c (print_object): Don't print hash table test if `eql`., Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 08acca67739 14/35: Don't print or read the hash table size parameter, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf b1218be258a 18/35: Allow zero hash table size, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 8f608cb4a1c 28/35: Use key Qunbound instead of hash value hash_unused for free entries, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 93d6326e6c0 32/35: Hash-table documentation updates, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 1462fca6dce 16/35: Remove rehash-threshold and rehash-size struct members, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 3b3fea97ecc 22/35: Use hash_idx_t for storing hash indices, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 58559a83827 29/35: * src/lisp.h (hash_hash_t): Change to uint32_t., Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 441c4d53cf6 31/35: Don't pretend that hash-table-size is useful, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf a0560e90d3b 26/35: Change hash_idx_t to int32_t on all platforms, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 8ec0e030b66 35/35: Combine hash and next vector into a single array,
Mattias Engdegård <=
- scratch/hash-table-perf b9a0b88aea8 05/35: ; * src/fns.c (collect_interval): Move misplaced function., Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf ed4ce0af9a9 06/35: Refactor: extract hash and index computations to functions, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf e55fcdafa07 09/35: Abstract predicate and constant for unused hash keys, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 594152bf667 01/35: Add internal hash-table debug functions, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 422c91a822a 02/35: ; * src/pdumper.c (dump_hash_table): Remove unused argument., Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf f2d6e2713c0 07/35: Refactor hash table vector reallocation, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf dd4ee2c634b 15/35: Represent hash table weakness as an enum internally, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf 615d3e4cdc6 17/35: Leaner hash table dumping and thawing, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf d003b84c484 19/35: Use non-Lisp allocation for internal hash-table vectors, Mattias Engdegård, 2024/01/12
- scratch/hash-table-perf afd2185bfdb 20/35: Store hash values as integers instead of Lisp_Object, Mattias Engdegård, 2024/01/12