[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 4/4] nfkc.c: reuse internally calculated lengths for buffer ov
From: |
Karlson2k |
Subject: |
[PATCH v2 4/4] nfkc.c: reuse internally calculated lengths for buffer overrun protection, adjust internal API to public API: use ether string length OR null-termination, and do not stop processing on null char if length is specified. |
Date: |
Mon, 4 Apr 2016 16:05:40 +0300 |
---
lib/nfkc.c | 40 ++++++++++++++++++++++++----------------
1 file changed, 24 insertions(+), 16 deletions(-)
diff --git a/lib/nfkc.c b/lib/nfkc.c
index 77ebc04..e6557ec 100644
--- a/lib/nfkc.c
+++ b/lib/nfkc.c
@@ -440,7 +440,7 @@ g_utf8_to_ucs4_fast (const gchar * str, gssize len, gsize *
items_written)
/*
* g_ucs4_to_utf8:
* @str: a UCS-4 encoded string
- * @len: the maximum length (number of characters) of @str to use.
+ * @len: the length (number of characters) of @str to use.
* If @len < 0, then the string is nul-terminated.
* @items_read: location to store number of characters read, or %NULL.
* @items_written: location to store number of bytes written or %NULL.
@@ -471,11 +471,8 @@ g_ucs4_to_utf8 (const gunichar * str,
gsize i;
result_length = 0;
- for (i = 0; len < 0 || i < len; i++)
+ for (i = 0; (len < 0) ? (!str[i]) : (i < len); i++)
{
- if (!str[i])
- break;
-
if (str[i] > 0x10FFFF)
goto err_out;
@@ -761,10 +758,12 @@ combine (gunichar a, gunichar b, gunichar * result)
return FALSE;
}
+/* result_len is in wide chars, not including terminating zero */
static gunichar *
-_g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode)
+_g_utf8_normalize_wc (const gchar * str, gssize len, GNormalizeMode mode,
gsize * result_len)
{
gsize n_wc;
+ gsize calc_len;
gunichar *wc_buffer;
const char *p;
gsize last_start;
@@ -773,7 +772,7 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len,
GNormalizeMode mode)
n_wc = 0;
p = str;
- while ((max_len < 0 || p < str + max_len) && *p)
+ while ((len < 0) ? *p : (p < str + len))
{
const gchar *decomp;
gunichar wc = g_utf8_get_char (p);
@@ -797,6 +796,7 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len,
GNormalizeMode mode)
p = g_utf8_next_char (p);
}
+ calc_len = p - str;
wc_buffer = g_malloc (sizeof (gunichar) * (n_wc + 1));
if (!wc_buffer)
return NULL;
@@ -804,7 +804,7 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len,
GNormalizeMode mode)
last_start = 0;
n_wc = 0;
p = str;
- while ((max_len < 0 || p < str + max_len) && *p)
+ while (p < str + calc_len)
{
gunichar wc = g_utf8_get_char (p);
const gchar *decomp;
@@ -893,6 +893,8 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len,
GNormalizeMode mode)
}
wc_buffer[n_wc] = 0;
+ if (result_len)
+ *result_len = n_wc;
return wc_buffer;
}
@@ -902,6 +904,9 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len,
GNormalizeMode mode)
* @str: a UTF-8 encoded string.
* @len: length of @str, in bytes, or -1 if @str is nul-terminated.
* @mode: the type of normalization to perform.
+ * @result_len: location to store length of returned string
+ * in bytes (not including terminating zero),
+ * ignored if %NULL
*
* Converts a string into canonical form, standardizing
* such issues as whether a character with an accent
@@ -934,12 +939,14 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len,
GNormalizeMode mode)
* valid UTF-8.
**/
static gchar *
-g_utf8_normalize (const gchar * str, gssize len, GNormalizeMode mode)
+g_utf8_normalize (const gchar * str, gssize len, GNormalizeMode mode,
+ gsize * result_len)
{
- gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
+ gsize wc_len;
+ gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode, &wc_len);
gchar *result;
- result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL);
+ result = g_ucs4_to_utf8 (result_wc, (gssize)wc_len, NULL, result_len);
g_free (result_wc);
return result;
@@ -1011,13 +1018,13 @@ stringprep_utf8_to_ucs4 (const char *str, ssize_t len,
size_t * items_written)
if (u8_check ((const uint8_t *) str, n))
return NULL;
- return g_utf8_to_ucs4_fast (str, len, items_written);
+ return g_utf8_to_ucs4_fast (str, (ssize_t)n, items_written);
}
/**
* stringprep_ucs4_to_utf8:
* @str: a UCS-4 encoded string
- * @len: the maximum length of @str to use. If @len < 0, then
+ * @len: the length of @str to use. If @len < 0, then
* the string is terminated with a 0 character.
* @items_read: location to store number of characters read read, or %NULL.
* @items_written: location to store number of bytes written or %NULL.
@@ -1073,7 +1080,7 @@ stringprep_utf8_nfkc_normalize (const char *str, ssize_t
len)
if (u8_check ((const uint8_t *) str, n))
return NULL;
- return g_utf8_normalize (str, len, G_NORMALIZE_NFKC);
+ return g_utf8_normalize (str, (ssize_t)n, G_NORMALIZE_NFKC, NULL);
}
/**
@@ -1092,9 +1099,10 @@ stringprep_ucs4_nfkc_normalize (const uint32_t * str,
ssize_t len)
{
char *p;
uint32_t *result_wc;
+ size_t wc_len;
- p = stringprep_ucs4_to_utf8 (str, len, 0, 0);
- result_wc = _g_utf8_normalize_wc (p, -1, G_NORMALIZE_NFKC);
+ p = stringprep_ucs4_to_utf8 (str, len, 0, &wc_len);
+ result_wc = _g_utf8_normalize_wc (p, (ssize_t)wc_len, G_NORMALIZE_NFKC,
NULL);
free (p);
return result_wc;
--
2.8.0.windows.1
- [PATCH 0/3] Series of fixes for Win x64, Karlson2k, 2016/04/04
- [PATCH 1/3] nfkc.c: Fixed invalid variable types and invalid pointer casting., Karlson2k, 2016/04/04
- [PATCH 3/3] nfkc.c: reuse internally calculated lengths for buffer overrun protection, adjust internal API to public API: use ether string length OR null-termination, and do not stop processing on null char if length is specified., Karlson2k, 2016/04/04
- [PATCH 2/3] nfkc.c: int/size_t fixes in find_decomposition(), Karlson2k, 2016/04/04
- [PATCH v2 0/4] Crash fixed on Win x64, Karlson2k, 2016/04/04
- [PATCH v2 4/4] nfkc.c: reuse internally calculated lengths for buffer overrun protection, adjust internal API to public API: use ether string length OR null-termination, and do not stop processing on null char if length is specified.,
Karlson2k <=
- [PATCH v2 1/4] nfkc.c: Fix incorrect "items_written" returned by stringprep_utf8_to_ucs4() on platforms with sizeof(size_t) != sizeof(long) (i.e. Win x64). Also fixed crash in various function which use value of "items_written" (like tld_check_8z(), tld_check_lz()), Karlson2k, 2016/04/04
- [PATCH v2 3/4] nfkc.c: int/size_t fixes in find_decomposition(), Karlson2k, 2016/04/04
- [PATCH v2 2/4] nfkc.c: Fixed invalid variable types and invalid pointer casting., Karlson2k, 2016/04/04