[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r27315 - gnunet/src/regex
From: |
gnunet |
Subject: |
[GNUnet-SVN] r27315 - gnunet/src/regex |
Date: |
Tue, 28 May 2013 01:35:17 +0200 |
Author: bartpolot
Date: 2013-05-28 01:35:17 +0200 (Tue, 28 May 2013)
New Revision: 27315
Modified:
gnunet/src/regex/regex_test_lib.c
Log:
Fix regexes with accepting common prefix, optimize regex prefix lengths
Modified: gnunet/src/regex/regex_test_lib.c
===================================================================
--- gnunet/src/regex/regex_test_lib.c 2013-05-27 17:28:55 UTC (rev 27314)
+++ gnunet/src/regex/regex_test_lib.c 2013-05-27 23:35:17 UTC (rev 27315)
@@ -60,29 +60,30 @@
char *s;
};
-// static void
-// space (int n)
-// {
-// int i;
-// for (i = 0; i < n; i++)
-// printf (" ");
-// }
-//
-// static void
-// debugctx (struct RegexCombineCtx *ctx, int level)
-// {
-// struct RegexCombineCtx *p;
-// space (level);
-// if (NULL != ctx->s)
-// printf ("'%s'\n", ctx->s);
-// else
-// printf ("NULL\n");
-// for (p = ctx->head; NULL != p; p = p->next)
-// {
-// debugctx (p, level + 1);
-// }
-// }
+/*
+static void
+space (int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ printf (" ");
+}
+static void
+debugctx (struct RegexCombineCtx *ctx, int level)
+{
+ struct RegexCombineCtx *p;
+ space (level);
+ if (NULL != ctx->s)
+ printf ("'%s'\n", ctx->s);
+ else
+ printf ("NULL\n");
+ for (p = ctx->head; NULL != p; p = p->next)
+ {
+ debugctx (p, level + 1);
+ }
+}
+*/
/**
* Extract a string from all prefix-combined regexes.
@@ -110,7 +111,9 @@
s = regex_combine (p);
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
if (strlen(s) == 0)
+ {
opt = GNUNET_YES;
+ }
else
{
GNUNET_asprintf (&tmp, "%s%s|", regex, s);
@@ -136,7 +139,7 @@
if (NULL != ctx->s)
{
if (opt)
- GNUNET_asprintf (&s, "%s[%s]", ctx->s, regex);
+ GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex);
else
GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex);
GNUNET_free (regex);
@@ -149,6 +152,68 @@
/**
+ * Get the number of matching characters on the prefix of both strings.
+ *
+ * @param s1 String 1.
+ * @param s2 String 2.
+ *
+ * @return Number of characters of matching prefix.
+ */
+static unsigned int
+get_prefix_length (const char *s1, const char *s2)
+{
+ unsigned int l1;
+ unsigned int l2;
+ unsigned int limit;
+ unsigned int i;
+
+ l1 = strlen (s1);
+ l2 = strlen (s2);
+ limit = l1 > l2 ? l2 : l1;
+
+ for (i = 1; i <= limit; i++)
+ {
+ if (0 != strncmp (s1, s2, i))
+ return i - 1;
+ }
+ return limit;
+}
+
+
+/**
+ * Return the child context with the longest prefix match with the regex.
+ * Usually only one child will match, search all just in case.
+ *
+ * @param ctx Context whose children to search.
+ * @param regex String to match.
+ *
+ * @return Child with the longest prefix, NULL if no child matches.
+ */
+static struct RegexCombineCtx *
+get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
+{
+ struct RegexCombineCtx *p;
+ struct RegexCombineCtx *best;
+ unsigned int l;
+ unsigned int best_l;
+
+ best_l = 0;
+ best = NULL;
+ for (p = ctx->head; NULL != p; p = p->next)
+ {
+ l = get_prefix_length (p->s, regex);
+ if (l > best_l)
+ {
+ GNUNET_break (0 == best_l);
+ best = p;
+ best_l = l;
+ }
+ }
+ return best;
+}
+
+
+/**
* Add a single regex to a context, combining with exisiting regex by-prefix.
*
* @param ctx Context with 0 or more regexes.
@@ -158,42 +223,55 @@
regex_add (struct RegexCombineCtx *ctx, const char *regex)
{
struct RegexCombineCtx *p;
- const char *rest;
+ struct RegexCombineCtx *newctx;
+ unsigned int prefix_l;
+ const char *rest_r;
+ const char *rest_s;
+ size_t len;
- rest = ®ex[1];
- for (p = ctx->head; NULL != p; p = p->next)
+ if (0 == strlen (regex))
+ return;
+
+ p = get_longest_prefix (ctx, regex);
+ if (NULL != p) /* There is some prefix match, reduce regex and try again */
{
- if (p->s[0] == regex[0])
+ prefix_l = get_prefix_length (p->s, regex);
+ rest_s = &p->s[prefix_l];
+ rest_r = ®ex[prefix_l];
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
+ len = strlen (p->s);
+ if (prefix_l < len) /* only partial match, split existing state */
{
- if (1 == strlen(p->s))
- {
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "common char %s\n", p->s);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding %s\n", rest);
- regex_add (p, rest);
- }
- else
- {
- struct RegexCombineCtx *newctx;
- newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
- newctx->s = GNUNET_strdup (&p->s[1]);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p has now %s\n", p->s);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p will have %.1s\n", p->s);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " regex is %s\n", regex);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new has now %s\n", newctx->s);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " rest is now %s\n", rest);
- p->s[1] = '\0'; /* dont realloc */
- GNUNET_CONTAINER_DLL_insert (p->head, p->tail, newctx);
- regex_add (p, rest);
- }
- return;
+ newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
+ newctx->head = p->head;
+ newctx->tail = p->tail;
+ newctx->s = GNUNET_malloc(len - prefix_l + 1);
+ strncpy (newctx->s, rest_s, len - prefix_l + 1);
+
+ p->head = newctx;
+ p->tail = newctx;
+ p->s[prefix_l] = '\0';
}
+ regex_add (p, rest_r);
+ return;
}
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
+ /* There is no prefix match, add new */
+ if (NULL == ctx->head && NULL != ctx->s)
+ {
+ /* this was the end before, add empty string */
+ newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
+ newctx->s = GNUNET_strdup ("");
+ GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
+ }
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
- p = GNUNET_malloc (sizeof (struct RegexCombineCtx));
- p->s = GNUNET_strdup (regex);
- GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, p);
+ newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
+ newctx->s = GNUNET_strdup (regex);
+ GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
}
@@ -240,9 +318,10 @@
current = regexes[i];
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
regex_add (ctx, current);
+ /* debugctx (ctx, 0); */
}
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
- //debugctx (ctx, 0);
+ /* debugctx (ctx, 0); */
combined = regex_combine (ctx);
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r27315 - gnunet/src/regex,
gnunet <=