Il 24/03/2014 07:04, Paul Eggert ha scritto:
* src/dfa.c (FETCH_WC, addtok_wc): Don't rely on undefined behavior
when converting an out-of-range value to 'int'.
(FETCH_WC, prepare_wc_buf): Don't rely on conversion state after
mbrtowc returns a special value, as it's undefined for (size_t) -1.
(prepare_wc_buf): Simplify test for valid character.
---
src/dfa.c | 42 ++++++++++++++++++++++++------------------
1 file changed, 24 insertions(+), 18 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 92ac1b9..0a2b8b8 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -807,7 +807,7 @@ static int minrep, maxrep; /* Repeat counts
for {m,n}. */
static int cur_mb_len = 1; /* Length of the multibyte
representation of
wctok. */
/* These variables are used only if (MB_CUR_MAX > 1). */
-static mbstate_t mbs; /* Mbstate for mbrlen. */
+static mbstate_t mbs; /* mbstate for mbrtowc. */
static wchar_t wctok; /* Wide character representation of
the current
multibyte character. */
static unsigned char *mblen_buf;/* Correspond to the input buffer in
dfaexec.
@@ -844,15 +844,18 @@ static unsigned char const *buf_end; /*
reference to end in dfaexec. */
else \
{ \
wchar_t _wc; \
- cur_mb_len = mbrtowc (&_wc, lexptr, lexleft, &mbs); \
- if (cur_mb_len <= 0) \
+ size_t nbytes = mbrtowc (&_wc, lexptr, lexleft, &mbs); \
+ bool valid_char = 1 <= nbytes && nbytes < (size_t) -2; \
I find these conditionals complicated to follow. In addition, a return
value of nbytes == 0 is valid, so I believe you should have simply
bool valid_char = nbytes < (size_t) -2;
or better:
+ if (! valid_char) \
if (nbytes >= (size_t) -2)