>From 3df90147719110350d9a674cc37e99cbd27a9c3e Mon Sep 17 00:00:00 2001
From: Bruno Haible
Date: Fri, 3 Jan 2020 22:34:07 +0100
Subject: [PATCH 1/5] mbrtowc: Refactor locale charset dispatching.
* lib/lc-charset-dispatch.h: New file, extracted from lib/mbrtowc.c.
* lib/lc-charset-dispatch.c: New file, extracted from lib/mbrtowc.c.
* lib/mbrtowc.c: Include lc-charset-dispatch.h. Don't include
localcharset.h, streq.h.
(enc_t): Remove type.
(locale_enc): Remove function.
(cached_locale_enc): Remove variable.
(locale_enc_cached): Remove function.
(mbrtowc): Invoke locale_encoding_classification.
* m4/mbrtowc.m4 (gl_PREREQ_MBRTOWC): Update comment.
* modules/mbrtowc (Files): Add lc-charset-dispatch.h,
lc-charset-dispatch.c.
(configure.ac): Arrange to compile lc-charset-dispatch.c.
---
ChangeLog | 17 ++++++++++
lib/lc-charset-dispatch.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++
lib/lc-charset-dispatch.h | 40 +++++++++++++++++++++++
lib/mbrtowc.c | 53 ++----------------------------
m4/mbrtowc.m4 | 2 +-
modules/mbrtowc | 3 ++
6 files changed, 145 insertions(+), 52 deletions(-)
create mode 100644 lib/lc-charset-dispatch.c
create mode 100644 lib/lc-charset-dispatch.h
diff --git a/ChangeLog b/ChangeLog
index 6c0d925..930f715 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2020-01-03 Bruno Haible
+
+ mbrtowc: Refactor locale charset dispatching.
+ * lib/lc-charset-dispatch.h: New file, extracted from lib/mbrtowc.c.
+ * lib/lc-charset-dispatch.c: New file, extracted from lib/mbrtowc.c.
+ * lib/mbrtowc.c: Include lc-charset-dispatch.h. Don't include
+ localcharset.h, streq.h.
+ (enc_t): Remove type.
+ (locale_enc): Remove function.
+ (cached_locale_enc): Remove variable.
+ (locale_enc_cached): Remove function.
+ (mbrtowc): Invoke locale_encoding_classification.
+ * m4/mbrtowc.m4 (gl_PREREQ_MBRTOWC): Update comment.
+ * modules/mbrtowc (Files): Add lc-charset-dispatch.h,
+ lc-charset-dispatch.c.
+ (configure.ac): Arrange to compile lc-charset-dispatch.c.
+
2020-01-03 Paul Eggert
doc: mention 32-bit time_t issue
diff --git a/lib/lc-charset-dispatch.c b/lib/lc-charset-dispatch.c
new file mode 100644
index 0000000..79057d4
--- /dev/null
+++ b/lib/lc-charset-dispatch.c
@@ -0,0 +1,82 @@
+/* Dispatching based on the current locale's character encoding.
+ Copyright (C) 2018-2020 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see . */
+
+/* Written by Bruno Haible , 2018. */
+
+#include
+
+/* Specification. */
+#include "lc-charset-dispatch.h"
+
+#if GNULIB_defined_mbstate_t
+
+# include "localcharset.h"
+# include "streq.h"
+
+# if GNULIB_WCHAR_SINGLE
+/* When we know that the locale does not change, provide a speedup by
+ caching the value of locale_encoding_classification. */
+# define locale_encoding_classification_cached locale_encoding_classification
+# else
+/* By default, don't make assumptions, hence no caching. */
+# define locale_encoding_classification_uncached locale_encoding_classification
+# endif
+
+# if GNULIB_WCHAR_SINGLE
+static inline
+# endif
+enc_t
+locale_encoding_classification_uncached (void)
+{
+ const char *encoding = locale_charset ();
+ if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
+ return enc_utf8;
+ if (STREQ_OPT (encoding, "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))
+ return enc_eucjp;
+ if (STREQ_OPT (encoding, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
+ || STREQ_OPT (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)
+ || STREQ_OPT (encoding, "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))
+ return enc_94;
+ if (STREQ_OPT (encoding, "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))
+ return enc_euctw;
+ if (STREQ_OPT (encoding, "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))
+ return enc_gb18030;
+ if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))
+ return enc_sjis;
+ return enc_other;
+}
+
+# if GNULIB_WCHAR_SINGLE
+
+static int cached_locale_enc = -1;
+
+enc_t
+locale_encoding_classification_cached (void)
+{
+ if (cached_locale_enc < 0)
+ cached_locale_enc = locale_encoding_classification_uncached ();
+ return cached_locale_enc;
+}
+
+# endif
+
+#else
+
+/* This declaration is solely to ensure that after preprocessing
+ this file is never empty. */
+typedef int dummy;
+
+#endif
diff --git a/lib/lc-charset-dispatch.h b/lib/lc-charset-dispatch.h
new file mode 100644
index 0000000..95c2316
--- /dev/null
+++ b/lib/lc-charset-dispatch.h
@@ -0,0 +1,40 @@
+/* Dispatching based on the current locale's character encoding.
+ Copyright (C) 2018-2020 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see . */
+
+/* Written by Bruno Haible , 2018. */
+
+#include
+
+#if GNULIB_defined_mbstate_t
+
+/* A classification of special values of the encoding of the current locale. */
+typedef enum
+ {
+ enc_other, /* other */
+ enc_utf8, /* UTF-8 */
+ enc_eucjp, /* EUC-JP */
+ enc_94, /* EUC-KR, GB2312, BIG5 */
+ enc_euctw, /* EUC-TW */
+ enc_gb18030, /* GB18030 */
+ enc_sjis /* SJIS */
+ }
+ enc_t;
+
+/* Returns a classification of special values of the encoding of the current
+ locale. */
+extern enc_t locale_encoding_classification (void);
+
+#endif
diff --git a/lib/mbrtowc.c b/lib/mbrtowc.c
index 066d949..fdef8f9 100644
--- a/lib/mbrtowc.c
+++ b/lib/mbrtowc.c
@@ -54,9 +54,8 @@
# endif
-# include "localcharset.h"
-# include "streq.h"
# include "verify.h"
+# include "lc-charset-dispatch.h"
# include "mbtowc-lock.h"
# ifndef FALLTHROUGH
@@ -67,54 +66,6 @@
# endif
# endif
-/* Returns a classification of special values of the encoding of the current
- locale. */
-typedef enum {
- enc_other, /* other */
- enc_utf8, /* UTF-8 */
- enc_eucjp, /* EUC-JP */
- enc_94, /* EUC-KR, GB2312, BIG5 */
- enc_euctw, /* EUC-TW */
- enc_gb18030, /* GB18030 */
- enc_sjis /* SJIS */
-} enc_t;
-static inline enc_t
-locale_enc (void)
-{
- const char *encoding = locale_charset ();
- if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
- return enc_utf8;
- if (STREQ_OPT (encoding, "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))
- return enc_eucjp;
- if (STREQ_OPT (encoding, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
- || STREQ_OPT (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)
- || STREQ_OPT (encoding, "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))
- return enc_94;
- if (STREQ_OPT (encoding, "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))
- return enc_euctw;
- if (STREQ_OPT (encoding, "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))
- return enc_gb18030;
- if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))
- return enc_sjis;
- return enc_other;
-}
-
-# if GNULIB_WCHAR_SINGLE
-/* When we know that the locale does not change, provide a speedup by
- caching the value of locale_enc. */
-static int cached_locale_enc = -1;
-static inline enc_t
-locale_enc_cached (void)
-{
- if (cached_locale_enc < 0)
- cached_locale_enc = locale_enc ();
- return cached_locale_enc;
-}
-# else
-/* By default, don't make assumptions, hence no caching. */
-# define locale_enc_cached locale_enc
-# endif
-
verify (sizeof (mbstate_t) >= 4);
static char internal_state[4];
@@ -177,7 +128,7 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
/* Here m > 0. */
- enc = locale_enc_cached ();
+ enc = locale_encoding_classification ();
if (enc == enc_utf8) /* UTF-8 */
{
diff --git a/m4/mbrtowc.m4 b/m4/mbrtowc.m4
index bd9225b..755f8c9 100644
--- a/m4/mbrtowc.m4
+++ b/m4/mbrtowc.m4
@@ -821,7 +821,7 @@ AC_DEFUN([gl_MBRTOWC_C_LOCALE],
])
])
-# Prerequisites of lib/mbrtowc.c.
+# Prerequisites of lib/mbrtowc.c and lib/lc-charset-dispatch.c.
AC_DEFUN([gl_PREREQ_MBRTOWC], [
AC_REQUIRE([AC_C_INLINE])
:
diff --git a/modules/mbrtowc b/modules/mbrtowc
index db10256..22afc96 100644
--- a/modules/mbrtowc
+++ b/modules/mbrtowc
@@ -3,6 +3,8 @@ mbrtowc() function: convert multibyte character to wide character.
Files:
lib/mbrtowc.c
+lib/lc-charset-dispatch.h
+lib/lc-charset-dispatch.c
lib/mbtowc-lock.h
lib/mbtowc-lock.c
lib/windows-initguard.h
@@ -29,6 +31,7 @@ configure.ac:
gl_FUNC_MBRTOWC
if test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1; then
AC_LIBOBJ([mbrtowc])
+ AC_LIBOBJ([lc-charset-dispatch])
AC_LIBOBJ([mbtowc-lock])
gl_PREREQ_MBRTOWC
gl_PREREQ_MBTOWC_LOCK
--
2.7.4