From d7ff453a2aac557fb133100b94e2fc7c007fbe26 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Tue, 20 May 2014 10:18:42 +0900 Subject: [PATCH] dfa: speed-up for a pattern that many atoms are catenated When many atoms are catenated, dfamust() is very slow in order that pushing a string into `in' list is slow. This change fixes it. * src/dfa.c (istrstr): Use memchr() in order to find a potential match. (enlist): If there is already something in the list, don't allocate memory. (dfamust): If don't have to check in the list, enlist a new entry directly. --- src/dfa.c | 57 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index 48a83cd..9f6d588 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -3753,15 +3753,24 @@ icatalloc (char *old, char const *new) } static char *_GL_ATTRIBUTE_PURE -istrstr (char const *lookin, char const *lookfor) +istrstr (char const *lookin, char const *lookfor, size_t const lenfor) { char const *cp; - size_t len; + size_t lenin; - len = strlen (lookfor); - for (cp = lookin; *cp != '\0'; ++cp) - if (strncmp (cp, lookfor, len) == 0) - return (char *) cp; + lenin = strlen (lookin); + if (lenfor == 1) + return memchr (lookin, *lookfor, lenin); + + cp = lookin; + for (cp = lookin; cp < lookin + lenin - lenfor; ++cp) + { + memchr (cp, *lookfor, lenin - (cp - lookin)); + if (!cp) + return NULL; + if (strncmp (cp + 1, lookfor + 1, lenfor - 1) == 0) + return (char *) cp; + } return NULL; } @@ -3776,19 +3785,16 @@ static char ** enlist (char **cpp, char *new, size_t len) { size_t i, j; - new = memcpy (xmalloc (len + 1), new, len); - new[len] = '\0'; /* Is there already something in the list that's new (or longer)? */ for (i = 0; cpp[i] != NULL; ++i) - if (istrstr (cpp[i], new) != NULL) - { - free (new); - return cpp; - } + if (istrstr (cpp[i], new, len) != NULL) + return cpp; + new = memcpy (xmalloc (len + 1), new, len); + new[len] = '\0'; /* Eliminate any obsoleted strings. */ j = 0; while (cpp[j] != NULL) - if (istrstr (new, cpp[j]) == NULL) + if (istrstr (new, cpp[j], strlen (cpp[j])) == NULL) ++j; else { @@ -4023,17 +4029,28 @@ dfamust (struct dfa *d) /* In. Everything in left, plus everything in right, plus concatenation of left's right and right's left. */ - lmp->in = addlists (lmp->in, rmp->in); if (lmp->right[0] != '\0' && rmp->left[0] != '\0') { size_t lrlen = strlen (lmp->right); size_t rllen = strlen (rmp->left); - char *tp = xmalloc (lrlen + rllen); - memcpy (tp, lmp->right, lrlen); - memcpy (tp + lrlen, rmp->left, rllen); - lmp->in = enlist (lmp->in, tp, lrlen + rllen); - free (tp); + if (!lmp->in[1] && !rmp->in[1]) + { + lmp->in[0] = xnrealloc (lmp->in[0], lrlen + rllen + 1, + sizeof *lmp->in[0]); + memcpy (lmp->in[0] + lrlen, rmp->in[0], rllen + 1); + } + else + { + char *tp = xmalloc (lrlen + rllen); + memcpy (tp, lmp->right, lrlen); + memcpy (tp + lrlen, rmp->left, rllen); + lmp->in = addlists (lmp->in, rmp->in); + lmp->in = enlist (lmp->in, tp, lrlen + rllen); + free (tp); + } } + else + lmp->in = addlists (lmp->in, rmp->in); /* Left-hand */ if (lmp->is[0] != '\0') lmp->left = icatalloc (lmp->left, rmp->left); -- 2.0.0