[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v2 28/60] json: Fix \uXXXX for surrogate pairs
From: |
Markus Armbruster |
Subject: |
[Qemu-devel] [PATCH v2 28/60] json: Fix \uXXXX for surrogate pairs |
Date: |
Fri, 17 Aug 2018 17:05:27 +0200 |
The JSON parser treats each half of a surrogate pair as unpaired
surrogate. Fix it to recognize surrogate pairs.
Signed-off-by: Markus Armbruster <address@hidden>
Reviewed-by: Eric Blake <address@hidden>
---
qobject/json-parser.c | 60 ++++++++++++++++++++++++++++---------------
tests/check-qjson.c | 3 +--
2 files changed, 40 insertions(+), 23 deletions(-)
diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 9985d9929b..35c201c53f 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -64,16 +64,27 @@ static void GCC_FMT_ATTR(3, 4)
parse_error(JSONParserContext *ctxt,
error_setg(&ctxt->err, "JSON parse error, %s", message);
}
-static int hex2decimal(char ch)
+static int cvt4hex(const char *s)
{
- if (ch >= '0' && ch <= '9') {
- return (ch - '0');
- } else if (ch >= 'a' && ch <= 'f') {
- return 10 + (ch - 'a');
- } else if (ch >= 'A' && ch <= 'F') {
- return 10 + (ch - 'A');
+ int cp, i;
+
+ cp = 0;
+ for (i = 0; i < 4; i++) {
+ if (!qemu_isxdigit(s[i])) {
+ return -1;
+ }
+ cp <<= 4;
+ if (s[i] >= '0' && s[i] <= '9') {
+ cp |= s[i] - '0';
+ } else if (s[i] >= 'a' && s[i] <= 'f') {
+ cp |= 10 + s[i] - 'a';
+ } else if (s[i] >= 'A' && s[i] <= 'F') {
+ cp |= 10 + s[i] - 'A';
+ } else {
+ return -1;
+ }
}
- abort();
+ return cp;
}
/**
@@ -115,7 +126,8 @@ static QString *parse_string(JSONParserContext *ctxt,
JSONToken *token)
const char *ptr = token->str;
QString *str;
char quote;
- int cp, i;
+ const char *beg;
+ int cp, trailing;
char *end;
ssize_t len;
char utf8_buf[5];
@@ -127,7 +139,7 @@ static QString *parse_string(JSONParserContext *ctxt,
JSONToken *token)
while (*ptr != quote) {
assert(*ptr);
if (*ptr == '\\') {
- ptr++;
+ beg = ptr++;
switch (*ptr++) {
case '"':
qstring_append_chr(str, '"');
@@ -157,22 +169,28 @@ static QString *parse_string(JSONParserContext *ctxt,
JSONToken *token)
qstring_append_chr(str, '\t');
break;
case 'u':
- cp = 0;
- for (i = 0; i < 4; i++) {
- if (!qemu_isxdigit(*ptr)) {
- parse_error(ctxt, token,
- "invalid hex escape sequence in string");
- goto out;
+ cp = cvt4hex(ptr);
+ ptr += 4;
+
+ /* handle surrogate pairs */
+ if (cp >= 0xD800 && cp <= 0xDBFF
+ && ptr[0] == '\\' && ptr[1] == 'u') {
+ /* leading surrogate followed by \u */
+ cp = 0x10000 + ((cp & 0x3FF) << 10);
+ trailing = cvt4hex(ptr + 2);
+ if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
+ /* followed by trailing surrogate */
+ cp |= trailing & 0x3FF;
+ ptr += 6;
+ } else {
+ cp = -1; /* invalid */
}
- cp <<= 4;
- cp |= hex2decimal(*ptr);
- ptr++;
}
if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
parse_error(ctxt, token,
- "\\u%.4s is not a valid Unicode character",
- ptr - 3);
+ "%.*s is not a valid Unicode character",
+ (int)(ptr - beg), beg);
goto out;
}
qstring_append(str, utf8_buf);
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 5c94c80241..3be32f3fcb 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -63,8 +63,7 @@ static void escaped_string(void)
{ "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" },
{ "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
{ "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */
- /* bug: want \xF0\x9D\x84\x9E */
- NULL },
+ "quadruple byte utf-8 \xF0\x9D\x84\x9E" },
{ "\\", NULL },
{ "\\z", NULL },
{ "\\ux", NULL },
--
2.17.1
- [Qemu-devel] [PATCH v2 09/60] check-qjson: Cover escaped characters more thoroughly, part 2, (continued)
- [Qemu-devel] [PATCH v2 09/60] check-qjson: Cover escaped characters more thoroughly, part 2, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 42/60] json: Improve names of lexer states related to numbers, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 34/60] json: Don't pass null @tokens to json_parser_parse(), Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 23/60] json: Leave rejecting invalid UTF-8 to parser, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 30/60] json: remove useless return value from lexer/parser, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 25/60] json: Leave rejecting invalid escape sequences to parser, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 38/60] json: Pass lexical errors and limit violations to callback, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 44/60] json: Fix latent parser aborts at end of input, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 28/60] json: Fix \uXXXX for surrogate pairs,
Markus Armbruster <=
- [Qemu-devel] [PATCH v2 21/60] json: Reject invalid UTF-8 sequences, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 24/60] json: Accept overlong \xC0\x80 as U+0000 ("modified UTF-8"), Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 13/60] check-qjson: Fix utf8_string() to test all invalid sequences, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 18/60] json: Revamp lexer documentation, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 27/60] json: Reject invalid \uXXXX, fix \u0000, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 36/60] json: Rename token JSON_ESCAPE & friends to JSON_INTERPOL, Markus Armbruster, 2018/08/17
- [Qemu-devel] [PATCH v2 52/60] json: Eliminate lexer state IN_WHITESPACE, pseudo-token JSON_SKIP, Markus Armbruster, 2018/08/17