[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#17081: [PATCH] dfa: avoid undefined behavior
From: |
Paul Eggert |
Subject: |
bug#17081: [PATCH] dfa: avoid undefined behavior |
Date: |
Sun, 23 Mar 2014 23:04:26 -0700 |
* src/dfa.c (FETCH_WC, addtok_wc): Don't rely on undefined behavior
when converting an out-of-range value to 'int'.
(FETCH_WC, prepare_wc_buf): Don't rely on conversion state after
mbrtowc returns a special value, as it's undefined for (size_t) -1.
(prepare_wc_buf): Simplify test for valid character.
---
src/dfa.c | 42 ++++++++++++++++++++++++------------------
1 file changed, 24 insertions(+), 18 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 92ac1b9..0a2b8b8 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -807,7 +807,7 @@ static int minrep, maxrep; /* Repeat counts for {m,n}.
*/
static int cur_mb_len = 1; /* Length of the multibyte representation of
wctok. */
/* These variables are used only if (MB_CUR_MAX > 1). */
-static mbstate_t mbs; /* Mbstate for mbrlen. */
+static mbstate_t mbs; /* mbstate for mbrtowc. */
static wchar_t wctok; /* Wide character representation of the current
multibyte character. */
static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec.
@@ -844,15 +844,18 @@ static unsigned char const *buf_end; /* reference to
end in dfaexec. */
else \
{ \
wchar_t _wc; \
- cur_mb_len = mbrtowc (&_wc, lexptr, lexleft, &mbs); \
- if (cur_mb_len <= 0) \
+ size_t nbytes = mbrtowc (&_wc, lexptr, lexleft, &mbs); \
+ bool valid_char = 1 <= nbytes && nbytes < (size_t) -2; \
+ if (! valid_char) \
{ \
+ memset (&mbs, 0, sizeof mbs); \
cur_mb_len = 1; \
--lexleft; \
(wc) = (c) = to_uchar (*lexptr++); \
} \
else \
{ \
+ cur_mb_len = nbytes; \
lexptr += cur_mb_len; \
lexleft -= cur_mb_len; \
(wc) = _wc; \
@@ -1685,16 +1688,19 @@ static void
addtok_wc (wint_t wc)
{
unsigned char buf[MB_LEN_MAX];
- mbstate_t s;
+ mbstate_t s = { 0 };
int i;
- memset (&s, 0, sizeof s);
- cur_mb_len = wcrtomb ((char *) buf, wc, &s);
+ size_t stored_bytes = wcrtomb ((char *) buf, wc, &s);
- /* This is merely stop-gap. When cur_mb_len is 0 or negative,
- buf[0] is undefined, yet skipping the addtok_mb call altogether
- can result in heap corruption. */
- if (cur_mb_len <= 0)
- buf[0] = 0;
+ if (stored_bytes != (size_t) -1)
+ cur_mb_len = stored_bytes;
+ else
+ {
+ /* This is merely stop-gap. buf[0] is undefined, yet skipping
+ the addtok_mb call altogether can corrupt the heap. */
+ cur_mb_len = 1;
+ buf[0] = 0;
+ }
addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1);
for (i = 1; i < cur_mb_len; i++)
@@ -3328,13 +3334,13 @@ prepare_wc_buf (const char *begin, const char *end)
{
if (remain_bytes == 0)
{
- remain_bytes
+ size_t nbytes
= mbrtowc (inputwcs + i, begin + i, end - begin - i + 1, &mbs);
- if (remain_bytes < 1
- || remain_bytes == (size_t) -1
- || remain_bytes == (size_t) -2
- || (remain_bytes == 1 && inputwcs[i] == (wchar_t) begin[i]))
+ if (! (1 <= nbytes && nbytes < (size_t) -2)
+ || (nbytes == 1 && inputwcs[i] == (wchar_t) begin[i]))
{
+ if ((size_t) -2 <= nbytes)
+ memset (&mbs, 0, sizeof mbs);
remain_bytes = 0;
inputwcs[i] = (wchar_t) begin[i];
mblen_buf[i] = 0;
@@ -3343,8 +3349,8 @@ prepare_wc_buf (const char *begin, const char *end)
}
else
{
- mblen_buf[i] = remain_bytes;
- remain_bytes--;
+ mblen_buf[i] = nbytes;
+ remain_bytes = nbytes - 1;
}
}
else
--
1.8.5.3
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- bug#17081: [PATCH] dfa: avoid undefined behavior,
Paul Eggert <=