[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Changes to m4/src/Attic/builtin.c,v [branch-1_4]
From: |
Eric Blake |
Subject: |
Changes to m4/src/Attic/builtin.c,v [branch-1_4] |
Date: |
Tue, 09 Oct 2007 16:51:10 +0000 |
CVSROOT: /sources/m4
Module name: m4
Branch: branch-1_4
Changes by: Eric Blake <ericb> 07/10/09 16:51:10
Index: src/builtin.c
===================================================================
RCS file: /sources/m4/m4/src/Attic/builtin.c,v
retrieving revision 1.1.1.1.2.64
retrieving revision 1.1.1.1.2.65
diff -u -b -r1.1.1.1.2.64 -r1.1.1.1.2.65
--- src/builtin.c 2 Oct 2007 22:05:53 -0000 1.1.1.1.2.64
+++ src/builtin.c 9 Oct 2007 16:51:09 -0000 1.1.1.1.2.65
@@ -231,6 +231,102 @@
/* True if --warn-macro-sequence is in effect. */
static bool macro_sequence_inuse;
+/* Maybe this is worth making runtime tunable. Too small, and nothing
+ gets cached because the working set of active regex is larger than
+ the cache, and we are always swapping out entries. Too large, and
+ the time spent searching the cache for a match overtakes the time
+ saved by caching. For now, this size proved reasonable for the
+ typical working set of Autoconf 2.62. */
+#define REGEX_CACHE_SIZE 16
+
+/* Structure for caching compiled regex. */
+struct m4_regex {
+ unsigned count; /* usage counter */
+ size_t len; /* length of string */
+ char *str; /* copy of compiled string */
+ struct re_pattern_buffer *buf; /* compiled regex, allocated */
+ struct re_registers regs; /* match registers, reused */
+};
+typedef struct m4_regex m4_regex;
+
+/* Storage for the cache of regular expressions. */
+static m4_regex regex_cache[REGEX_CACHE_SIZE];
+
+/*------------------------------------------------------------------.
+| Compile STR, with length LEN, into a regex. On success, set BUF |
+| and REGS to the compiled regex. Compilation is cached, so do not |
+| free the results here; rather, use free_regex at the end of the |
+| program. Return NULL on success, or an error message. |
+`------------------------------------------------------------------*/
+static const char *
+compile_pattern (const char *str, size_t len, struct re_pattern_buffer **buf,
+ struct re_registers **regs)
+{
+ int i;
+ m4_regex *victim;
+ unsigned victim_count;
+ struct re_pattern_buffer *new_buf;
+ struct re_registers *new_regs;
+ const char *msg;
+
+ /* First, check if STR is already cached. If so, increase its use
+ count and return it. */
+ for (i = 0; i < REGEX_CACHE_SIZE; i++)
+ if (len == regex_cache[i].len && regex_cache[i].str
+ && memcmp (str, regex_cache[i].str, len) == 0)
+ {
+ *buf = regex_cache[i].buf;
+ *regs = ®ex_cache[i].regs;
+ regex_cache[i].count++;
+ return NULL;
+ }
+
+ /* Next, check if STR can be compiled. */
+ new_buf = xzalloc (sizeof *new_buf);
+ msg = re_compile_pattern (str, len, new_buf);
+ if (msg)
+ {
+ regfree (new_buf);
+ free (new_buf);
+ return msg;
+ }
+
+ /* Now, find a victim slot. Decrease the count of all entries, then
+ prime the count of the victim slot at REGEX_CACHE_SIZE. This
+ way, frequently used entries and newly created entries are least
+ likely to be victims next time we have a cache miss. */
+ victim = regex_cache;
+ victim_count = victim->count;
+ if (victim_count)
+ victim->count--;
+ for (i = 1; i < REGEX_CACHE_SIZE; i++)
+ {
+ if (regex_cache[i].count < victim_count)
+ {
+ victim_count = regex_cache[i].count;
+ victim = ®ex_cache[i];
+ }
+ if (regex_cache[i].count)
+ regex_cache[i].count--;
+ }
+ victim->count = REGEX_CACHE_SIZE;
+ victim->len = len;
+ if (victim->str)
+ {
+ free (victim->str);
+ regfree (victim->buf);
+ free (victim->buf);
+ }
+ victim->str = xstrdup (str);
+ victim->buf = new_buf;
+ new_regs = &victim->regs;
+ re_set_registers (new_buf, new_regs, new_regs->num_regs,
+ new_regs->start, new_regs->end);
+ *buf = new_buf;
+ *regs = new_regs;
+ return NULL;
+}
+
/*----------------------------------------.
| Clean up regular expression variables. |
`----------------------------------------*/
@@ -273,14 +369,21 @@
macro_sequence_inuse = true;
}
-/*------------------------------------------------------------.
-| Free dynamic memory utilized by the define sequence regular |
-| expression. |
-`------------------------------------------------------------*/
+/*------------------------------------------------------.
+| Free dynamic memory utilized by regular expressions. |
+`------------------------------------------------------*/
void
-free_macro_sequence (void)
+free_regex (void)
{
+ int i;
free_pattern_buffer (¯o_sequence_buf, ¯o_sequence_regs);
+ for (i = 0; i < REGEX_CACHE_SIZE; i++)
+ if (regex_cache[i].str)
+ {
+ free (regex_cache[i].str);
+ free_pattern_buffer (regex_cache[i].buf, ®ex_cache[i].regs);
+ free (regex_cache[i].buf);
+ }
}
/*-------------------------------------------------------------------------.
@@ -1965,8 +2068,8 @@
const char *regexp; /* regular expression */
const char *repl; /* replacement string */
- struct re_pattern_buffer buf; /* compiled regular expression */
- struct re_registers regs; /* for subexpression matches */
+ struct re_pattern_buffer *buf;/* compiled regular expression */
+ struct re_registers *regs; /* for subexpression matches */
const char *msg; /* error message from re_compile_pattern */
int startpos; /* start position of match */
int length; /* length of first argument */
@@ -1993,21 +2096,18 @@
return;
}
- init_pattern_buffer (&buf, ®s);
- msg = re_compile_pattern (regexp, strlen (regexp), &buf);
-
+ msg = compile_pattern (regexp, strlen (regexp), &buf, ®s);
if (msg != NULL)
{
M4ERROR ((warning_status, 0,
"bad regular expression: `%s': %s", regexp, msg));
- free_pattern_buffer (&buf, ®s);
return;
}
length = strlen (victim);
/* Avoid overhead of allocating regs if we won't use it. */
- startpos = re_search (&buf, victim, length, 0, length,
- argc == 3 ? NULL : ®s);
+ startpos = re_search (buf, victim, length, 0, length,
+ argc == 3 ? NULL : regs);
if (startpos == -2)
M4ERROR ((warning_status, 0,
@@ -2015,9 +2115,7 @@
else if (argc == 3)
shipout_int (obs, startpos);
else if (startpos >= 0)
- substitute (obs, victim, repl, ®s);
-
- free_pattern_buffer (&buf, ®s);
+ substitute (obs, victim, repl, regs);
}
/*--------------------------------------------------------------------------.
@@ -2034,8 +2132,8 @@
const char *regexp; /* regular expression */
const char *repl;
- struct re_pattern_buffer buf; /* compiled regular expression */
- struct re_registers regs; /* for subexpression matches */
+ struct re_pattern_buffer *buf;/* compiled regular expression */
+ struct re_registers *regs; /* for subexpression matches */
const char *msg; /* error message from re_compile_pattern */
int matchpos; /* start position of match */
int offset; /* current match offset */
@@ -2061,14 +2159,11 @@
return;
}
- init_pattern_buffer (&buf, ®s);
- msg = re_compile_pattern (regexp, strlen (regexp), &buf);
-
+ msg = compile_pattern (regexp, strlen (regexp), &buf, ®s);
if (msg != NULL)
{
M4ERROR ((warning_status, 0,
"bad regular expression `%s': %s", regexp, msg));
- free (buf.buffer);
return;
}
@@ -2078,8 +2173,8 @@
matchpos = 0;
while (offset <= length)
{
- matchpos = re_search (&buf, victim, length,
- offset, length - offset, ®s);
+ matchpos = re_search (buf, victim, length,
+ offset, length - offset, regs);
if (matchpos < 0)
{
@@ -2102,19 +2197,17 @@
/* Handle the part of the string that was covered by the match. */
- substitute (obs, victim, repl, ®s);
+ substitute (obs, victim, repl, regs);
/* Update the offset to the end of the match. If the regexp
matched a null string, advance offset one more, to avoid
infinite loops. */
- offset = regs.end[0];
- if (regs.start[0] == regs.end[0])
+ offset = regs->end[0];
+ if (regs->start[0] == regs->end[0])
obstack_1grow (obs, victim[offset++]);
}
obstack_1grow (obs, '\0');
-
- free_pattern_buffer (&buf, ®s);
}
/* Finally, a placeholder builtin. This builtin is not installed by
- Changes to m4/src/Attic/builtin.c,v [branch-1_4], Eric Blake, 2007/10/02
- Changes to m4/src/Attic/builtin.c,v [branch-1_4],
Eric Blake <=
- Changes to m4/src/Attic/builtin.c,v [branch-1_4], Eric Blake, 2007/10/09
- Changes to m4/src/Attic/builtin.c,v [branch-1_4], Eric Blake, 2007/10/22
- Changes to m4/src/Attic/builtin.c,v [branch-1_4], Eric Blake, 2007/10/22