[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v3 28/58] json: Reject invalid \uXXXX, fix \u0000
From: |
Markus Armbruster |
Subject: |
[Qemu-devel] [PATCH v3 28/58] json: Reject invalid \uXXXX, fix \u0000 |
Date: |
Thu, 23 Aug 2018 18:39:55 +0200 |
The JSON parser translates invalid \uXXXX to garbage instead of
rejecting it, and swallows \u0000.
Fix by using mod_utf8_encode() instead of flawed wchar_to_utf8().
Valid surrogate pairs are now differently broken: they're rejected
instead of translated to garbage. The next commit will fix them.
Signed-off-by: Markus Armbruster <address@hidden>
Reviewed-by: Eric Blake <address@hidden>
---
qobject/json-parser.c | 35 ++++++-----------------------------
tests/check-qjson.c | 41 +++++++++++------------------------------
2 files changed, 17 insertions(+), 59 deletions(-)
diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 9cb363f7e1..e49da192fe 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -64,34 +64,6 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext
*ctxt,
error_setg(&ctxt->err, "JSON parse error, %s", message);
}
-/**
- * String helpers
- *
- * These helpers are used to unescape strings.
- */
-static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
-{
- if (wchar <= 0x007F) {
- BUG_ON(buffer_length < 2);
-
- buffer[0] = wchar & 0x7F;
- buffer[1] = 0;
- } else if (wchar <= 0x07FF) {
- BUG_ON(buffer_length < 3);
-
- buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
- buffer[1] = 0x80 | (wchar & 0x3F);
- buffer[2] = 0;
- } else {
- BUG_ON(buffer_length < 4);
-
- buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
- buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
- buffer[2] = 0x80 | (wchar & 0x3F);
- buffer[3] = 0;
- }
-}
-
static int hex2decimal(char ch)
{
if (ch >= '0' && ch <= '9') {
@@ -197,7 +169,12 @@ static QString *parse_string(JSONParserContext *ctxt,
JSONToken *token)
ptr++;
}
- wchar_to_utf8(cp, utf8_buf, sizeof(utf8_buf));
+ if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
+ parse_error(ctxt, token,
+ "\\u%.4s is not a valid Unicode character",
+ ptr - 3);
+ goto out;
+ }
qstring_append(str, utf8_buf);
break;
default:
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 3abf12b4d2..4abb5847ad 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -64,7 +64,7 @@ static void escaped_string(void)
{ "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
{ "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */
/* bug: want \xF0\x9D\x84\x9E */
- "quadruple byte utf-8 \xED\xA0\xB4\xED\xB4\x9E", .skip = 1 },
+ NULL },
{ "\\", NULL },
{ "\\z", NULL },
{ "\\ux", NULL },
@@ -72,35 +72,16 @@ static void escaped_string(void)
{ "\\u12x", NULL },
{ "\\u123x", NULL },
{ "\\u12345", "\341\210\2645" },
- { "\\u0000x", "x", .skip = 1}, /* bug: want \xC0\x80x */
- { "unpaired leading surrogate \\uD800",
- /* bug: not rejected */
- "unpaired leading surrogate \355\240\200", .skip = 1 },
- { "unpaired leading surrogate \\uD800\\uCAFE",
- /* bug: not rejected */
- "unpaired leading surrogate \355\240\200\354\253\276", .skip = 1 },
- { "unpaired leading surrogate \\uD800\\uD801\\uDC02",
- /* bug: not rejected */
- "unpaired leading surrogate \355\240\200\355\240\201\355\260\202",
- .skip = 1 },
- { "unpaired trailing surrogate \\uDC00",
- /* bug: not rejected */
- "unpaired trailing surrogate \355\260\200", .skip = 1},
- { "backward surrogate pair \\uDC00\\uD800",
- /* bug: not rejected */
- "backward surrogate pair \355\260\200\355\240\200", .skip = 1},
- { "noncharacter U+FDD0 \\uFDD0",
- /* bug: not rejected */
- "noncharacter U+FDD0 \xEF\xB7\x90", .skip = 1},
- { "noncharacter U+FDEF \\uFDEF",
- /* bug: not rejected */
- "noncharacter U+FDEF \xEF\xB7\xAF", .skip = 1},
- { "noncharacter U+1FFFE \\uD87F\\uDFFE",
- /* bug: not rejected */
- "noncharacter U+1FFFE \xED\xA1\xBF\xED\xBF\xBE", .skip = 1},
- { "noncharacter U+10FFFF \\uDC3F\\uDFFF",
- /* bug: not rejected */
- "noncharacter U+10FFFF \xED\xB0\xBF\xED\xBF\xBF", .skip = 1},
+ { "\\u0000x", "\xC0\x80x" },
+ { "unpaired leading surrogate \\uD800", NULL },
+ { "unpaired leading surrogate \\uD800\\uCAFE", NULL },
+ { "unpaired leading surrogate \\uD800\\uD801\\uDC02", NULL },
+ { "unpaired trailing surrogate \\uDC00", NULL },
+ { "backward surrogate pair \\uDC00\\uD800", NULL },
+ { "noncharacter U+FDD0 \\uFDD0", NULL },
+ { "noncharacter U+FDEF \\uFDEF", NULL },
+ { "noncharacter U+1FFFE \\uD87F\\uDFFE", NULL },
+ { "noncharacter U+10FFFF \\uDC3F\\uDFFF", NULL },
{}
};
int i, j;
--
2.17.1
- [Qemu-devel] [PATCH v3 34/58] json: Redesign the callback to consume JSON values, (continued)
- [Qemu-devel] [PATCH v3 34/58] json: Redesign the callback to consume JSON values, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 21/58] check-qjson: Document we expect invalid UTF-8 to be rejected, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 33/58] json: Have lexer call streamer directly, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 39/58] json: Pass lexical errors and limit violations to callback, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 26/58] json: Leave rejecting invalid escape sequences to parser, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 31/58] json: remove useless return value from lexer/parser, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 22/58] json: Reject invalid UTF-8 sequences, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 29/58] json: Fix \uXXXX for surrogate pairs, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 37/58] json: Rename token JSON_ESCAPE & friends to JSON_INTERP, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 44/58] json: Fix latent parser aborts at end of input, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 28/58] json: Reject invalid \uXXXX, fix \u0000,
Markus Armbruster <=
- [Qemu-devel] [PATCH v3 19/58] json: Revamp lexer documentation, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 17/58] json: Fix lexer to include the bad character in JSON_ERROR token, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 18/58] json: Reject unescaped control characters, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 35/58] json: Don't pass null @tokens to json_parser_parse(), Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 36/58] json: Don't create JSON_ERROR tokens that won't be used, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 27/58] json: Simplify parse_string(), Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 10/58] check-qjson: Cover escaped characters more thoroughly, part 2, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 16/58] check-qjson: Cover interpolation more thoroughly, Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 25/58] json: Accept overlong \xC0\x80 as U+0000 ("modified UTF-8"), Markus Armbruster, 2018/08/23
- [Qemu-devel] [PATCH v3 04/58] check-qjson: Cover whitespace more thoroughly, Markus Armbruster, 2018/08/23