[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 11/15] Eliminate the trie->next field
From: |
Nick Cleaton |
Subject: |
[PATCH 11/15] Eliminate the trie->next field |
Date: |
Sat, 2 Oct 2010 07:07:44 +0100 |
Don't use trie->next, instead iterate over the nodes in node vector
order. This requires a change to the fail and shift computations,
since the traversal is no-longer breadth first.
---
src/kwset.c | 57 ++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 34 insertions(+), 23 deletions(-)
diff --git a/src/kwset.c b/src/kwset.c
index c30e951..27a47c3 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -73,7 +73,6 @@ struct trie
unsigned char is_lastkid; /* True for the last of a group of siblings. */
unsigned int accepting; /* Word index of accepted word, or zero. */
struct trie *links; /* Tree of edges leaving this node. */
- struct trie *next; /* List of all trie nodes in level order. */
int shift; /* Shift function for search failures. */
int maxshift; /* Max shift of self and descendents. */
};
@@ -103,6 +102,7 @@ struct kwset
int kwbuf_capacity; /* Capacity of the buffer. */
struct trie **prev_kw_tries; /* The trie nodes of the last keyword added. */
struct trie *node_storage; /* Vector of trie nodes. */
+ size_t node_count; /* The total number of trie nodes. */
struct trie *next_free_node; /* Next unallocated trie in node_storage. */
struct broodnote *bn_cursor; /* A cursor for walking the broodnotes. */
struct trie *next_prealloced; /* Linked list of pre-allocated trie nodes. */
@@ -130,6 +130,7 @@ kwsalloc (char const *trans)
kwset->kwbuf_capacity = 0;
kwset->prev_kw_tries = NULL;
kwset->node_storage = NULL;
+ kwset->node_count = 0;
kwset->next_free_node = NULL;
kwset->bn_cursor = NULL;
kwset->next_prealloced = NULL;
@@ -349,7 +350,6 @@ install_suffix (kwset_t kws, struct cpsort_md *md, char
const *text,
{
link = allocate_and_plumb_trie_node (kwset, index, ++trie_depth, trie);
link->accepting = 0;
- link->next = NULL;
link->lf.fail = NULL;
link->rd.depth = trie_depth;
link->shift = 0;
@@ -377,7 +377,8 @@ build_trie_from_sorted_keywords (struct kwset *kwset)
return err;
/* Allocate a single vector to hold all of the trie nodes. */
- MALLOC (kwset->node_storage, 1 + cpsdump.tot_suf_len);
+ kwset->node_count = 1 + cpsdump.tot_suf_len;
+ MALLOC (kwset->node_storage, kwset->node_count);
if (!kwset->node_storage)
return _("memory exhausted");
kwset->next_free_node = kwset->node_storage;
@@ -386,7 +387,6 @@ build_trie_from_sorted_keywords (struct kwset *kwset)
kwset->trie = kwset->next_free_node++;
kwset->trie->accepting = 0;
kwset->trie->links = NULL;
- kwset->trie->next = NULL;
kwset->trie->lf.fail = NULL;
kwset->trie->rd.depth = 0;
kwset->trie->shift = 0;
@@ -421,8 +421,9 @@ build_trie_from_sorted_keywords (struct kwset *kwset)
/* Compute the Aho-Corasick failure function for the given trie node,
given the failure function for its parent as well as a last resort
- failure node. */
-static void
+ failure node.
+ Return the parent of the installed fail node. */
+static struct trie const *
triefail (struct trie *trie, struct trie const *fail, struct trie *recourse)
{
struct trie *link;
@@ -438,7 +439,7 @@ triefail (struct trie *trie, struct trie const *fail,
struct trie *recourse)
if (trie->label == link->label)
{
trie->lf.fail = link;
- return;
+ return fail->lf.fail;
}
}
while (!link++->is_lastkid);
@@ -447,6 +448,7 @@ triefail (struct trie *trie, struct trie const *fail,
struct trie *recourse)
}
trie->lf.fail = recourse;
+ return NULL;
}
/* Return true if A has every label in B. */
@@ -513,7 +515,7 @@ kwsprep (kwset_t kws)
else
{
struct trie *fail;
- struct trie *last, *next[NCHAR], *child;
+ struct trie *next[NCHAR], *child, *nodes_end;
if ((err = build_trie_from_sorted_keywords (kwset)))
return err;
@@ -521,27 +523,36 @@ kwsprep (kwset_t kws)
cpsort_free (kwset->cps);
kwset->cps = NULL;
- /* Traverse the nodes of the trie in level order, simultaneously
- computing the delta table, failure function, and shift function. */
- for (curr = last = kwset->trie; curr; curr = curr->next)
+ /* Traverse the nodes of the trie, simultaneously computing the delta
+ table, failure function, and shift function. */
+ nodes_end = kwset->trie + kwset->node_count;
+ kwset->trie->shift = kwset->trie->maxshift = kwset->mind;
+ for (curr = kwset->trie; curr < nodes_end; ++curr)
{
- curr->shift = kwset->mind;
- curr->maxshift = kwset->mind;
-
/* Loop over the immediate descendents of this node. */
if ((child = curr->links))
{
do
{
- /* Enqueue the descendent in the level order queue. */
- last = last->next = child;
-
/* Update the delta table. */
if (curr->rd.depth < delta[child->label])
delta[child->label] = curr->rd.depth;
- /* Compute the failure function for this node. */
- triefail (child, curr->lf.fail, kwset->trie);
+ /* Initialize shift and maxshift, and compute the failure
+ function for this node. Since we are not traversing
+ the trie in level order, we may need to recurse ahead
+ to compute the fail of the fail and so on. */
+ {
+ struct trie *target = child;
+ struct trie const *pfail = curr->lf.fail;
+
+ while (target->lf.fail == NULL && target != kwset->trie)
+ {
+ target->shift = target->maxshift = kwset->mind;
+ pfail = triefail (target, pfail, kwset->trie);
+ target = target->lf.fail;
+ }
+ }
}
while (!child++->is_lastkid);
}
@@ -565,10 +576,10 @@ kwsprep (kwset_t kws)
}
}
- /* Traverse the trie in level order again, fixing up all nodes whose
- shift exceeds their inherited maxshift and plumbing in the rlink
- and llink fields. */
- for (curr = kwset->trie; curr; curr = curr->next)
+ /* Traverse the trie again, fixing up all nodes whose shift exceeds
+ their inherited maxshift and plumbing in the rlink and llink
+ fields. */
+ for (curr = kwset->trie; curr < nodes_end; ++curr)
{
if (!(child = curr->links))
continue;
--
1.5.6.3
- [PATCH 01/15] Merge trie and tree into a single struct, (continued)
- [PATCH 01/15] Merge trie and tree into a single struct, Nick Cleaton, 2010/10/02
- [PATCH 02/15] Sort the keywords before building the trie, Nick Cleaton, 2010/10/02
- [PATCH 03/15] Lay out the trie more systematically, Nick Cleaton, 2010/10/02
- [PATCH 04/15] Lay out the trees more systematically, Nick Cleaton, 2010/10/02
- [PATCH 05/15] Avoid using llink and rlink during prep, Nick Cleaton, 2010/10/02
- [PATCH 06/15] Defer llink/rlink plumbing to the end of prep, Nick Cleaton, 2010/10/02
- [PATCH 07/15] Share storage between llink and fail, Nick Cleaton, 2010/10/02
- [PATCH 08/15] Avoid using trie->depth during matching, Nick Cleaton, 2010/10/02
- [PATCH 09/15] Share storage between rlink and depth, Nick Cleaton, 2010/10/02
- [PATCH 10/15] Eliminate the trie->parent field, Nick Cleaton, 2010/10/02
- [PATCH 11/15] Eliminate the trie->next field,
Nick Cleaton <=
- [PATCH 12/15] Eliminate the trie->maxshift field, Nick Cleaton, 2010/10/02
- [PATCH 13/15] Reduce trie->accepting to a flag, Nick Cleaton, 2010/10/02
- [PATCH 14/15] Merge flags into a trie->flags field, Nick Cleaton, 2010/10/02
- [PATCH 15/15] Make trie->shift an unsigned short, Nick Cleaton, 2010/10/02
- Re: Proof of concept: kwset running 3 times smaller, Nick Cleaton, 2010/10/30