[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Rational Range Interpretation patches, 1/3
From: |
Aharon Robbins |
Subject: |
Rational Range Interpretation patches, 1/3 |
Date: |
Mon, 16 Jan 2012 22:24:44 +0200 |
User-agent: |
Heirloom mailx 12.4 7/29/08 |
Hello All.
Here is my 2nd try at RRI. The 3 patches are for dfa.c, grep.texi,
and gnulib/reg*.c.
Paolo - I still think that when compiling the dfa my change is correct,
since the regex routines were called only for checking if the range is
valid.
Thanks,
Arnold
---------------
>From 5d4a1e56345e14c19224b47424a01b51120cf234 Mon Sep 17 00:00:00 2001
From: Arnold D. Robbins <address@hidden>
Date: Mon, 16 Jan 2012 22:04:11 +0200
Subject: [PATCH 1/2] Rational Range Interpretation implemented.
* dfa.c (hard_LC_COLLATE): Removed.
(parse_bracket_exp): Compare lower and upper range bounds directly.
(dfaparse): Don't set hard_LC_COLLATE.
(match_mb_charset): Test wide character directly instead of using wcscoll.
---
src/dfa.c | 41 ++++++-----------------------------------
1 files changed, 6 insertions(+), 35 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 6ab0ab4..edc6bd9 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -29,6 +29,7 @@
#include <limits.h>
#include <string.h>
#include <locale.h>
+#include <stdbool.h>
#define STREQ(a, b) (strcmp (a, b) == 0)
@@ -46,7 +47,7 @@
#include "gettext.h"
#define _(str) gettext (str)
-#include "mbsupport.h" /* defines MBS_SUPPORT if appropriate */
+#include "mbsupport.h" /* defines MBS_SUPPORT to 1 or 0, as appropriate */
#include <wchar.h>
#include <wctype.h>
@@ -56,7 +57,6 @@
#include "regex.h"
#include "dfa.h"
-#include "hard-locale.h"
#include "xalloc.h"
/* HPUX, define those as macros in sys/param.h */
@@ -657,7 +657,6 @@ static int laststart; /* True if we're
separated from beginning or (, |
only by zero-width characters. */
static int parens; /* Count of outstanding left parens. */
static int minrep, maxrep; /* Repeat counts for {m,n}. */
-static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */
static int cur_mb_len = 1; /* Length of the multibyte representation of
wctok. */
@@ -992,26 +991,8 @@ parse_bracket_exp (void)
c1 = tolower (c1);
c2 = tolower (c2);
}
- if (!hard_LC_COLLATE)
- for (c = c1; c <= c2; c++)
- setbit_case_fold_c (c, ccl);
- else
- {
- /* Defer to the system regex library about the meaning
- of range expressions. */
- regex_t re;
- char pattern[6] = { '[', c1, '-', c2, ']', 0 };
- char subject[2] = { 0, 0 };
- regcomp (&re, pattern, REG_NOSUB);
- for (c = 0; c < NOTCHAR; ++c)
- {
- subject[0] = c;
- if (!(case_fold && isupper (c))
- && regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH)
- setbit_case_fold_c (c, ccl);
- }
- regfree (&re);
- }
+ for (c = c1; c <= c2; c++)
+ setbit_case_fold_c (c, ccl);
}
colon_warning_state |= 8;
@@ -1792,9 +1773,6 @@ dfaparse (char const *s, size_t len, struct dfa *d)
lasttok = END;
laststart = 1;
parens = 0;
-#ifdef LC_COLLATE
- hard_LC_COLLATE = hard_locale (LC_COLLATE);
-#endif
if (MB_CUR_MAX > 1)
{
cur_mb_len = 0;
@@ -2884,7 +2862,6 @@ match_mb_charset (struct dfa *d, int s, position pos, int
idx)
with which this operator match. */
int op_len; /* Length of the operator. */
char buffer[128];
- wchar_t wcbuf[6];
/* Pointer to the structure to which we are currently refering. */
struct mb_char_classes *work_mbc;
@@ -2961,17 +2938,11 @@ match_mb_charset (struct dfa *d, int s, position pos,
int idx)
}
}
- wcbuf[0] = wc;
- wcbuf[1] = wcbuf[3] = wcbuf[5] = '\0';
-
/* match with a range? */
for (i = 0; i<work_mbc->nranges; i++)
{
- wcbuf[2] = work_mbc->range_sts[i];
- wcbuf[4] = work_mbc->range_ends[i];
-
- if (wcscoll(wcbuf, wcbuf+2) >= 0 &&
- wcscoll(wcbuf+4, wcbuf) >= 0)
+ if (work_mbc->range_sts[i] <= wc &&
+ wc <= work_mbc->range_ends[i])
goto charset_matched;
}
--
1.7.1
- Rational Range Interpretation patches, 1/3,
Aharon Robbins <=