[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
grep branch, master, updated. v2.18-17-gd9f7791
From: |
Paul Eggert |
Subject: |
grep branch, master, updated. v2.18-17-gd9f7791 |
Date: |
Mon, 03 Mar 2014 07:03:45 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".
The branch, master has been updated
via d9f7791cf94cfe7ac8227298aa780c4462cfc312 (commit)
via 41c233f4fa77a1aec09639b656f6a29d6e6f325f (commit)
via 0ffd7d26dc8f7627d2b4bd745ede7049a09b0dd9 (commit)
from 248f8490f1cf4e3e4f4e1b79a4c3fd61c23251c3 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=d9f7791cf94cfe7ac8227298aa780c4462cfc312
commit d9f7791cf94cfe7ac8227298aa780c4462cfc312
Author: Paul Eggert <address@hidden>
Date: Sun Mar 2 23:02:22 2014 -0800
grep: fix some unlikely bugs in trivial_case_ignore
* src/main.c (MBRTOWC, WCRTOMB): Reformat as per usual GNU style.
(trivial_case_ignore): Don't overrun buffer in the unusual case
when a character has both lowercase and uppercase counterparts.
Don't rely on undefined behavior when assigning out-of-range value
to an 'int'. Simplify by avoiding unnecessary buffer copies.
Work even with shift encodings, by using mbsinit to
disable the optimization if we are not in the initial state
when we replace B by [BCD].
diff --git a/src/main.c b/src/main.c
index 2ee585a..14b7be2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1867,19 +1867,20 @@ parse_grep_colors (void)
return;
}
+#define MBRTOWC(pwc, s, n, ps) \
+ (MB_CUR_MAX == 1 \
+ ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \
+ : mbrtowc (pwc, s, n, ps))
+#define WCRTOMB(s, wc, ps) \
+ (MB_CUR_MAX == 1 \
+ ? (*(s) = wctob ((wint_t) (wc)), 1) \
+ : wcrtomb (s, wc, ps))
+
/* If the newline-separated regular expressions, KEYS (with length, LEN
and no trailing NUL byte), are amenable to transformation into
otherwise equivalent case-ignoring ones, perform the transformation,
put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
and return true. Otherwise, return false. */
-#define MBRTOWC(pwc, s, n, ps) \
- (MB_CUR_MAX == 1 ? \
- (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
- mbrtowc ((pwc), (s), (n), (ps)))
-#define WCRTOMB(s, wc, ps) \
- (MB_CUR_MAX == 1 ? \
- (*(s) = wctob ((wint_t) (wc)), 1) : \
- wcrtomb ((s), (wc), (ps)))
static bool
trivial_case_ignore (size_t len, char const *keys,
@@ -1890,21 +1891,23 @@ trivial_case_ignore (size_t len, char const *keys,
if (memchr (keys, '\\', len) || memchr (keys, '[', len))
return false;
- /* Worst case is that each byte B of KEYS is ASCII alphabetic and each
- other_case(B) character, C, occupies MB_CUR_MAX bytes, so each B
- maps to [BC], which requires MB_CUR_MAX + 3 bytes. */
- *new_keys = xnmalloc (MB_CUR_MAX + 3, len + 1);
+ /* Worst case is that each byte B of KEYS is ASCII alphabetic and
+ the two two other_case(B) characters, C and D, each occupies
+ MB_CUR_MAX bytes, so each B maps to [BCD], which requires 2 *
+ MB_CUR_MAX + 3 bytes; this is bounded above by the constant
+ expression 2 * MB_LEN_MAX + 3. */
+ *new_keys = xnmalloc (len + 1, 2 * MB_LEN_MAX + 3);
char *p = *new_keys;
- mbstate_t mb_state;
- memset (&mb_state, 0, sizeof mb_state);
+ mbstate_t mb_state = { 0 };
while (len)
{
+ bool initial_state = mbsinit (&mb_state) != 0;
wchar_t wc;
- int n = MBRTOWC (&wc, keys, len, &mb_state);
+ size_t n = MBRTOWC (&wc, keys, len, &mb_state);
/* For an invalid, incomplete or L'\0', skip this optimization. */
- if (n <= 0)
+ if ((size_t) -2 <= n)
{
skip_case_ignore_optimization:
free (*new_keys);
@@ -1915,39 +1918,30 @@ trivial_case_ignore (size_t len, char const *keys,
keys += n;
len -= n;
- if (!iswalpha (wc))
+ wint_t lc = towlower (wc);
+ wint_t uc = towupper (wc);
+ if (lc == wc && uc == wc)
{
memcpy (p, orig, n);
p += n;
}
+ else if (! initial_state)
+ goto skip_case_ignore_optimization;
else
{
*p++ = '[';
memcpy (p, orig, n);
p += n;
- wint_t folded = towlower (wc);
- if (folded != wc)
- {
- char buf[MB_CUR_MAX];
- int n2 = WCRTOMB (buf, folded, &mb_state);
- if (n2 <= 0)
- goto skip_case_ignore_optimization;
- assert (n2 <= MB_CUR_MAX);
- memcpy (p, buf, n2);
- p += n2;
- }
- folded = towupper (wc);
- if (folded != wc)
- {
- char buf[MB_CUR_MAX];
- int n2 = WCRTOMB (buf, folded, &mb_state);
- if (n2 <= 0)
- goto skip_case_ignore_optimization;
- assert (n2 <= MB_CUR_MAX);
- memcpy (p, buf, n2);
- p += n2;
- }
+ size_t lcbytes = WCRTOMB (p, lc, &mb_state);
+ if (lcbytes == (size_t) -1)
+ goto skip_case_ignore_optimization;
+ p += lcbytes;
+
+ size_t ucbytes = WCRTOMB (p, uc, &mb_state);
+ if (ucbytes == (size_t) -1 || ! mbsinit (&mb_state))
+ goto skip_case_ignore_optimization;
+ p += ucbytes;
*p++ = ']';
}
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=41c233f4fa77a1aec09639b656f6a29d6e6f325f
commit d9f7791cf94cfe7ac8227298aa780c4462cfc312
Author: Paul Eggert <address@hidden>
Date: Sun Mar 2 23:02:22 2014 -0800
grep: fix some unlikely bugs in trivial_case_ignore
* src/main.c (MBRTOWC, WCRTOMB): Reformat as per usual GNU style.
(trivial_case_ignore): Don't overrun buffer in the unusual case
when a character has both lowercase and uppercase counterparts.
Don't rely on undefined behavior when assigning out-of-range value
to an 'int'. Simplify by avoiding unnecessary buffer copies.
Work even with shift encodings, by using mbsinit to
disable the optimization if we are not in the initial state
when we replace B by [BCD].
diff --git a/src/main.c b/src/main.c
index 2ee585a..14b7be2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1867,19 +1867,20 @@ parse_grep_colors (void)
return;
}
+#define MBRTOWC(pwc, s, n, ps) \
+ (MB_CUR_MAX == 1 \
+ ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \
+ : mbrtowc (pwc, s, n, ps))
+#define WCRTOMB(s, wc, ps) \
+ (MB_CUR_MAX == 1 \
+ ? (*(s) = wctob ((wint_t) (wc)), 1) \
+ : wcrtomb (s, wc, ps))
+
/* If the newline-separated regular expressions, KEYS (with length, LEN
and no trailing NUL byte), are amenable to transformation into
otherwise equivalent case-ignoring ones, perform the transformation,
put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
and return true. Otherwise, return false. */
-#define MBRTOWC(pwc, s, n, ps) \
- (MB_CUR_MAX == 1 ? \
- (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
- mbrtowc ((pwc), (s), (n), (ps)))
-#define WCRTOMB(s, wc, ps) \
- (MB_CUR_MAX == 1 ? \
- (*(s) = wctob ((wint_t) (wc)), 1) : \
- wcrtomb ((s), (wc), (ps)))
static bool
trivial_case_ignore (size_t len, char const *keys,
@@ -1890,21 +1891,23 @@ trivial_case_ignore (size_t len, char const *keys,
if (memchr (keys, '\\', len) || memchr (keys, '[', len))
return false;
- /* Worst case is that each byte B of KEYS is ASCII alphabetic and each
- other_case(B) character, C, occupies MB_CUR_MAX bytes, so each B
- maps to [BC], which requires MB_CUR_MAX + 3 bytes. */
- *new_keys = xnmalloc (MB_CUR_MAX + 3, len + 1);
+ /* Worst case is that each byte B of KEYS is ASCII alphabetic and
+ the two two other_case(B) characters, C and D, each occupies
+ MB_CUR_MAX bytes, so each B maps to [BCD], which requires 2 *
+ MB_CUR_MAX + 3 bytes; this is bounded above by the constant
+ expression 2 * MB_LEN_MAX + 3. */
+ *new_keys = xnmalloc (len + 1, 2 * MB_LEN_MAX + 3);
char *p = *new_keys;
- mbstate_t mb_state;
- memset (&mb_state, 0, sizeof mb_state);
+ mbstate_t mb_state = { 0 };
while (len)
{
+ bool initial_state = mbsinit (&mb_state) != 0;
wchar_t wc;
- int n = MBRTOWC (&wc, keys, len, &mb_state);
+ size_t n = MBRTOWC (&wc, keys, len, &mb_state);
/* For an invalid, incomplete or L'\0', skip this optimization. */
- if (n <= 0)
+ if ((size_t) -2 <= n)
{
skip_case_ignore_optimization:
free (*new_keys);
@@ -1915,39 +1918,30 @@ trivial_case_ignore (size_t len, char const *keys,
keys += n;
len -= n;
- if (!iswalpha (wc))
+ wint_t lc = towlower (wc);
+ wint_t uc = towupper (wc);
+ if (lc == wc && uc == wc)
{
memcpy (p, orig, n);
p += n;
}
+ else if (! initial_state)
+ goto skip_case_ignore_optimization;
else
{
*p++ = '[';
memcpy (p, orig, n);
p += n;
- wint_t folded = towlower (wc);
- if (folded != wc)
- {
- char buf[MB_CUR_MAX];
- int n2 = WCRTOMB (buf, folded, &mb_state);
- if (n2 <= 0)
- goto skip_case_ignore_optimization;
- assert (n2 <= MB_CUR_MAX);
- memcpy (p, buf, n2);
- p += n2;
- }
- folded = towupper (wc);
- if (folded != wc)
- {
- char buf[MB_CUR_MAX];
- int n2 = WCRTOMB (buf, folded, &mb_state);
- if (n2 <= 0)
- goto skip_case_ignore_optimization;
- assert (n2 <= MB_CUR_MAX);
- memcpy (p, buf, n2);
- p += n2;
- }
+ size_t lcbytes = WCRTOMB (p, lc, &mb_state);
+ if (lcbytes == (size_t) -1)
+ goto skip_case_ignore_optimization;
+ p += lcbytes;
+
+ size_t ucbytes = WCRTOMB (p, uc, &mb_state);
+ if (ucbytes == (size_t) -1 || ! mbsinit (&mb_state))
+ goto skip_case_ignore_optimization;
+ p += ucbytes;
*p++ = ']';
}
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=0ffd7d26dc8f7627d2b4bd745ede7049a09b0dd9
commit d9f7791cf94cfe7ac8227298aa780c4462cfc312
Author: Paul Eggert <address@hidden>
Date: Sun Mar 2 23:02:22 2014 -0800
grep: fix some unlikely bugs in trivial_case_ignore
* src/main.c (MBRTOWC, WCRTOMB): Reformat as per usual GNU style.
(trivial_case_ignore): Don't overrun buffer in the unusual case
when a character has both lowercase and uppercase counterparts.
Don't rely on undefined behavior when assigning out-of-range value
to an 'int'. Simplify by avoiding unnecessary buffer copies.
Work even with shift encodings, by using mbsinit to
disable the optimization if we are not in the initial state
when we replace B by [BCD].
diff --git a/src/main.c b/src/main.c
index 2ee585a..14b7be2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1867,19 +1867,20 @@ parse_grep_colors (void)
return;
}
+#define MBRTOWC(pwc, s, n, ps) \
+ (MB_CUR_MAX == 1 \
+ ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \
+ : mbrtowc (pwc, s, n, ps))
+#define WCRTOMB(s, wc, ps) \
+ (MB_CUR_MAX == 1 \
+ ? (*(s) = wctob ((wint_t) (wc)), 1) \
+ : wcrtomb (s, wc, ps))
+
/* If the newline-separated regular expressions, KEYS (with length, LEN
and no trailing NUL byte), are amenable to transformation into
otherwise equivalent case-ignoring ones, perform the transformation,
put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
and return true. Otherwise, return false. */
-#define MBRTOWC(pwc, s, n, ps) \
- (MB_CUR_MAX == 1 ? \
- (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
- mbrtowc ((pwc), (s), (n), (ps)))
-#define WCRTOMB(s, wc, ps) \
- (MB_CUR_MAX == 1 ? \
- (*(s) = wctob ((wint_t) (wc)), 1) : \
- wcrtomb ((s), (wc), (ps)))
static bool
trivial_case_ignore (size_t len, char const *keys,
@@ -1890,21 +1891,23 @@ trivial_case_ignore (size_t len, char const *keys,
if (memchr (keys, '\\', len) || memchr (keys, '[', len))
return false;
- /* Worst case is that each byte B of KEYS is ASCII alphabetic and each
- other_case(B) character, C, occupies MB_CUR_MAX bytes, so each B
- maps to [BC], which requires MB_CUR_MAX + 3 bytes. */
- *new_keys = xnmalloc (MB_CUR_MAX + 3, len + 1);
+ /* Worst case is that each byte B of KEYS is ASCII alphabetic and
+ the two two other_case(B) characters, C and D, each occupies
+ MB_CUR_MAX bytes, so each B maps to [BCD], which requires 2 *
+ MB_CUR_MAX + 3 bytes; this is bounded above by the constant
+ expression 2 * MB_LEN_MAX + 3. */
+ *new_keys = xnmalloc (len + 1, 2 * MB_LEN_MAX + 3);
char *p = *new_keys;
- mbstate_t mb_state;
- memset (&mb_state, 0, sizeof mb_state);
+ mbstate_t mb_state = { 0 };
while (len)
{
+ bool initial_state = mbsinit (&mb_state) != 0;
wchar_t wc;
- int n = MBRTOWC (&wc, keys, len, &mb_state);
+ size_t n = MBRTOWC (&wc, keys, len, &mb_state);
/* For an invalid, incomplete or L'\0', skip this optimization. */
- if (n <= 0)
+ if ((size_t) -2 <= n)
{
skip_case_ignore_optimization:
free (*new_keys);
@@ -1915,39 +1918,30 @@ trivial_case_ignore (size_t len, char const *keys,
keys += n;
len -= n;
- if (!iswalpha (wc))
+ wint_t lc = towlower (wc);
+ wint_t uc = towupper (wc);
+ if (lc == wc && uc == wc)
{
memcpy (p, orig, n);
p += n;
}
+ else if (! initial_state)
+ goto skip_case_ignore_optimization;
else
{
*p++ = '[';
memcpy (p, orig, n);
p += n;
- wint_t folded = towlower (wc);
- if (folded != wc)
- {
- char buf[MB_CUR_MAX];
- int n2 = WCRTOMB (buf, folded, &mb_state);
- if (n2 <= 0)
- goto skip_case_ignore_optimization;
- assert (n2 <= MB_CUR_MAX);
- memcpy (p, buf, n2);
- p += n2;
- }
- folded = towupper (wc);
- if (folded != wc)
- {
- char buf[MB_CUR_MAX];
- int n2 = WCRTOMB (buf, folded, &mb_state);
- if (n2 <= 0)
- goto skip_case_ignore_optimization;
- assert (n2 <= MB_CUR_MAX);
- memcpy (p, buf, n2);
- p += n2;
- }
+ size_t lcbytes = WCRTOMB (p, lc, &mb_state);
+ if (lcbytes == (size_t) -1)
+ goto skip_case_ignore_optimization;
+ p += lcbytes;
+
+ size_t ucbytes = WCRTOMB (p, uc, &mb_state);
+ if (ucbytes == (size_t) -1 || ! mbsinit (&mb_state))
+ goto skip_case_ignore_optimization;
+ p += ucbytes;
*p++ = ']';
}
-----------------------------------------------------------------------
Summary of changes:
src/dfa.c | 8 ++--
src/main.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 118 insertions(+), 4 deletions(-)
hooks/post-receive
--
grep
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- grep branch, master, updated. v2.18-17-gd9f7791,
Paul Eggert <=