[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 12/15] Eliminate the trie->maxshift field
From: |
Nick Cleaton |
Subject: |
[PATCH 12/15] Eliminate the trie->maxshift field |
Date: |
Sat, 2 Oct 2010 07:07:44 +0100 |
Move the maxshift storage to an external vector of ints, laid out in
the same order as the trie node vector. Because maxshift isn't touched
when matching, this reduces the size of the working set of the matching
process.
---
src/kwset.c | 41 ++++++++++++++++++++++++++++-------------
1 files changed, 28 insertions(+), 13 deletions(-)
diff --git a/src/kwset.c b/src/kwset.c
index 27a47c3..e23cbca 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -74,7 +74,6 @@ struct trie
unsigned int accepting; /* Word index of accepted word, or zero. */
struct trie *links; /* Tree of edges leaving this node. */
int shift; /* Shift function for search failures. */
- int maxshift; /* Max shift of self and descendents. */
};
/* A record of a trie node that has one or more siblings. */
@@ -516,17 +515,24 @@ kwsprep (kwset_t kws)
{
struct trie *fail;
struct trie *next[NCHAR], *child, *nodes_end;
+ int *maxshift;
if ((err = build_trie_from_sorted_keywords (kwset)))
return err;
+ MALLOC (maxshift, kwset->node_count);
+ if (!maxshift)
+ return _("memory exhausted");
+ for (i = 0; i < kwset->node_count; ++i)
+ maxshift[i] = kwset->mind;
+
cpsort_free (kwset->cps);
kwset->cps = NULL;
/* Traverse the nodes of the trie, simultaneously computing the delta
table, failure function, and shift function. */
nodes_end = kwset->trie + kwset->node_count;
- kwset->trie->shift = kwset->trie->maxshift = kwset->mind;
+ kwset->trie->shift = kwset->mind;
for (curr = kwset->trie; curr < nodes_end; ++curr)
{
/* Loop over the immediate descendents of this node. */
@@ -538,17 +544,17 @@ kwsprep (kwset_t kws)
if (curr->rd.depth < delta[child->label])
delta[child->label] = curr->rd.depth;
- /* Initialize shift and maxshift, and compute the failure
- function for this node. Since we are not traversing
- the trie in level order, we may need to recurse ahead
- to compute the fail of the fail and so on. */
+ /* Initialize shift, and compute the failure function for
+ this node. Since we are not traversing the trie in
+ level order, we may need to recurse ahead to compute
+ the fail of the fail and so on. */
{
struct trie *target = child;
struct trie const *pfail = curr->lf.fail;
while (target->lf.fail == NULL && target != kwset->trie)
{
- target->shift = target->maxshift = kwset->mind;
+ target->shift = kwset->mind;
pfail = triefail (target, pfail, kwset->trie);
target = target->lf.fail;
}
@@ -571,8 +577,11 @@ kwsprep (kwset_t kws)
/* If the current node is accepting then the shift at the
fail and its descendents should be no larger than the
difference of their depths. */
- if (curr->accepting && fail->maxshift > curr->rd.depth -
fail->rd.depth)
- fail->maxshift = curr->rd.depth - fail->rd.depth;
+ if (curr->accepting
+ && maxshift[fail - kwset->trie] >
+ curr->rd.depth - fail->rd.depth)
+ maxshift[fail - kwset->trie] =
+ curr->rd.depth - fail->rd.depth;
}
}
@@ -581,15 +590,20 @@ kwsprep (kwset_t kws)
fields. */
for (curr = kwset->trie; curr < nodes_end; ++curr)
{
+ int parent_maxshift, *child_maxshift_ptr;
+
if (!(child = curr->links))
continue;
+ parent_maxshift = maxshift[curr - kwset->trie];
+ child_maxshift_ptr = maxshift + (child - kwset->trie);
do
{
- if (curr->maxshift < child->maxshift)
- child->maxshift = curr->maxshift;
- if (child->maxshift < child->shift)
- child->shift = child->maxshift;
+ if (parent_maxshift < *child_maxshift_ptr)
+ *child_maxshift_ptr = parent_maxshift;
+ if (*child_maxshift_ptr < child->shift)
+ child->shift = *child_maxshift_ptr;
+ child_maxshift_ptr++;
}
while (!child++->is_lastkid);
@@ -598,6 +612,7 @@ kwsprep (kwset_t kws)
plumb_siblings (&links, child - links);
}
}
+ free (maxshift);
/* Create a vector, indexed by character code, of the outgoing links
from the root node. */
--
1.5.6.3
- [PATCH 02/15] Sort the keywords before building the trie, (continued)
- [PATCH 02/15] Sort the keywords before building the trie, Nick Cleaton, 2010/10/02
- [PATCH 03/15] Lay out the trie more systematically, Nick Cleaton, 2010/10/02
- [PATCH 04/15] Lay out the trees more systematically, Nick Cleaton, 2010/10/02
- [PATCH 05/15] Avoid using llink and rlink during prep, Nick Cleaton, 2010/10/02
- [PATCH 06/15] Defer llink/rlink plumbing to the end of prep, Nick Cleaton, 2010/10/02
- [PATCH 07/15] Share storage between llink and fail, Nick Cleaton, 2010/10/02
- [PATCH 08/15] Avoid using trie->depth during matching, Nick Cleaton, 2010/10/02
- [PATCH 09/15] Share storage between rlink and depth, Nick Cleaton, 2010/10/02
- [PATCH 10/15] Eliminate the trie->parent field, Nick Cleaton, 2010/10/02
- [PATCH 11/15] Eliminate the trie->next field, Nick Cleaton, 2010/10/02
- [PATCH 12/15] Eliminate the trie->maxshift field,
Nick Cleaton <=
- [PATCH 13/15] Reduce trie->accepting to a flag, Nick Cleaton, 2010/10/02
- [PATCH 14/15] Merge flags into a trie->flags field, Nick Cleaton, 2010/10/02
- [PATCH 15/15] Make trie->shift an unsigned short, Nick Cleaton, 2010/10/02
- Re: Proof of concept: kwset running 3 times smaller, Nick Cleaton, 2010/10/30