>From 80ae5aa1307114f51bb47b1b1a4ac89a31422956 Mon Sep 17 00:00:00 2001
From: Bruno Haible
Date: Sun, 5 Jan 2020 02:32:18 +0100
Subject: [PATCH 1/2] mbsnrtoc32s: New module.
* lib/uchar.in.h (mbsnrtoc32s): New declaration.
* lib/mbsnrtowcs-impl.h: Parameterize: Use macros FUNC, DCHAR_T,
INTERNAL_STATE, MBRTOWC.
* lib/mbsnrtowcs.c (FUNC, DCHAR_T, INTERNAL_STATE, MBRTOWC): New macros.
* lib/mbsnrtoc32s.c: New file.
* m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_MBSNRTOC32S.
* modules/uchar (Makefile.am): Substitute GNULIB_MBSNRTOC32S.
* modules/mbsnrtoc32s: New file.
* tests/test-uchar-c++.cc: Test the signature of mbsnrtoc32s.
* doc/posix-functions/mbsnrtowcs.texi: Mention the new module.
---
ChangeLog | 14 +++++++++
doc/posix-functions/mbsnrtowcs.texi | 7 +++--
lib/mbsnrtoc32s.c | 63 +++++++++++++++++++++++++++++++++++++
lib/mbsnrtowcs-impl.h | 10 +++---
lib/mbsnrtowcs.c | 4 +++
lib/uchar.in.h | 13 ++++++++
m4/uchar.m4 | 3 +-
modules/mbsnrtoc32s | 38 ++++++++++++++++++++++
modules/uchar | 1 +
tests/test-uchar-c++.cc | 6 ++++
10 files changed, 151 insertions(+), 8 deletions(-)
create mode 100644 lib/mbsnrtoc32s.c
create mode 100644 modules/mbsnrtoc32s
diff --git a/ChangeLog b/ChangeLog
index 2825437..fb16eee 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
2020-01-04 Bruno Haible
+ mbsnrtoc32s: New module.
+ * lib/uchar.in.h (mbsnrtoc32s): New declaration.
+ * lib/mbsnrtowcs-impl.h: Parameterize: Use macros FUNC, DCHAR_T,
+ INTERNAL_STATE, MBRTOWC.
+ * lib/mbsnrtowcs.c (FUNC, DCHAR_T, INTERNAL_STATE, MBRTOWC): New macros.
+ * lib/mbsnrtoc32s.c: New file.
+ * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_MBSNRTOC32S.
+ * modules/uchar (Makefile.am): Substitute GNULIB_MBSNRTOC32S.
+ * modules/mbsnrtoc32s: New file.
+ * tests/test-uchar-c++.cc: Test the signature of mbsnrtoc32s.
+ * doc/posix-functions/mbsnrtowcs.texi: Mention the new module.
+
+2020-01-04 Bruno Haible
+
mbsrtoc32s tests: Enhance test.
* tests/test-mbsrtoc32s.c (main): Include a non-BMP character in the
test strings for UTF-8 and GB18030.
diff --git a/doc/posix-functions/mbsnrtowcs.texi b/doc/posix-functions/mbsnrtowcs.texi
index bd7911b..c6defd2 100644
--- a/doc/posix-functions/mbsnrtowcs.texi
+++ b/doc/posix-functions/mbsnrtowcs.texi
@@ -19,8 +19,11 @@ Solaris 11.4.
Portability problems not fixed by Gnulib:
@itemize
@item
-On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and therefore cannot
-accommodate all Unicode characters.
+On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and
+therefore cannot accommodate all Unicode characters.
+However, the Gnulib function @code{mbsnrtoc32s}, provided by Gnulib module
+@code{mbsnrtoc32s}, operates on 32-bit wide characters and therefore does not
+have this limitation.
@item
The specification is not clear about whether this function should update the
conversion state when the first argument (the destination pointer) is NULL.
diff --git a/lib/mbsnrtoc32s.c b/lib/mbsnrtoc32s.c
new file mode 100644
index 0000000..7ba0415
--- /dev/null
+++ b/lib/mbsnrtoc32s.c
@@ -0,0 +1,63 @@
+/* Convert string to 32-bit wide string.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Written by Bruno Haible , 2020.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see . */
+
+#include
+
+/* Specification. */
+#include
+
+#include
+
+#if _GL_LARGE_CHAR32_T
+
+/* For Cygwin >= 1.7 it would be possible to speed this up a bit by cutting
+ the source into chunks, calling mbsnrtowcs on a chunk, then u16_to_u32 on
+ the result, then proceed with the next chunk, and so on.
+ But speed is not critical here so far. */
+
+/* Reuse the implementation of mbsnrtowcs with a different parameterization. */
+
+# include
+# include
+# include
+
+# include "minmax.h"
+# include "strnlen1.h"
+
+extern mbstate_t _gl_mbsrtoc32s_state;
+
+# define FUNC mbsnrtoc32s
+# define DCHAR_T char32_t
+# define INTERNAL_STATE _gl_mbsrtoc32s_state
+# define MBRTOWC mbrtoc32
+# include "mbsnrtowcs-impl.h"
+
+#else
+/* char32_t and wchar_t are equivalent. */
+
+# include "verify.h"
+
+verify (sizeof (char32_t) == sizeof (wchar_t));
+
+size_t
+mbsnrtoc32s (char32_t *dest, const char **srcp, size_t srclen, size_t len,
+ mbstate_t *ps)
+{
+ return mbsnrtowcs ((wchar_t *) dest, srcp, srclen, len, ps);
+}
+
+#endif
diff --git a/lib/mbsnrtowcs-impl.h b/lib/mbsnrtowcs-impl.h
index 29ed870..ede6fea 100644
--- a/lib/mbsnrtowcs-impl.h
+++ b/lib/mbsnrtowcs-impl.h
@@ -16,16 +16,16 @@
along with this program. If not, see . */
size_t
-mbsnrtowcs (wchar_t *dest, const char **srcp, size_t srclen, size_t len, mbstate_t *ps)
+FUNC (DCHAR_T *dest, const char **srcp, size_t srclen, size_t len, mbstate_t *ps)
{
if (ps == NULL)
- ps = &_gl_mbsrtowcs_state;
+ ps = &INTERNAL_STATE;
{
const char *src = *srcp;
if (dest != NULL)
{
- wchar_t *destptr = dest;
+ DCHAR_T *destptr = dest;
for (; srclen > 0 && len > 0; destptr++, len--)
{
@@ -46,7 +46,7 @@ mbsnrtowcs (wchar_t *dest, const char **srcp, size_t srclen, size_t len, mbstate
src_avail = 4 + strnlen1 (src + 4, MIN (srclen, MB_LEN_MAX) - 4);
/* Parse the next multibyte character. */
- ret = mbrtowc (destptr, src, src_avail, ps);
+ ret = MBRTOWC (destptr, src, src_avail, ps);
if (ret == (size_t)(-2))
/* Encountered a multibyte character that extends past a '\0' byte
@@ -94,7 +94,7 @@ mbsnrtowcs (wchar_t *dest, const char **srcp, size_t srclen, size_t len, mbstate
src_avail = 4 + strnlen1 (src + 4, MIN (srclen, MB_LEN_MAX) - 4);
/* Parse the next multibyte character. */
- ret = mbrtowc (NULL, src, src_avail, &state);
+ ret = MBRTOWC (NULL, src, src_avail, &state);
if (ret == (size_t)(-2))
/* Encountered a multibyte character that extends past a '\0' byte
diff --git a/lib/mbsnrtowcs.c b/lib/mbsnrtowcs.c
index 2fa5c3e..63bac59 100644
--- a/lib/mbsnrtowcs.c
+++ b/lib/mbsnrtowcs.c
@@ -30,4 +30,8 @@
extern mbstate_t _gl_mbsrtowcs_state;
+#define FUNC mbsnrtowcs
+#define DCHAR_T wchar_t
+#define INTERNAL_STATE _gl_mbsrtowcs_state
+#define MBRTOWC mbrtowc
#include "mbsnrtowcs-impl.h"
diff --git a/lib/uchar.in.h b/lib/uchar.in.h
index 318cf8e..f31b18c 100644
--- a/lib/uchar.in.h
+++ b/lib/uchar.in.h
@@ -108,6 +108,19 @@ _GL_WARN_ON_USE (mbrtoc32, "mbrtoc32 is not portable - "
/* Convert a string to a 32-bit wide string. */
+#if @GNULIB_MBSNRTOC32S@
+_GL_FUNCDECL_SYS (mbsnrtoc32s, size_t,
+ (char32_t *dest, const char **srcp, size_t srclen, size_t len,
+ mbstate_t *ps)
+ _GL_ARG_NONNULL ((2)));
+_GL_CXXALIAS_SYS (mbsnrtoc32s, size_t,
+ (char32_t *dest, const char **srcp, size_t srclen, size_t len,
+ mbstate_t *ps));
+_GL_CXXALIASWARN (mbsnrtoc32s);
+#endif
+
+
+/* Convert a string to a 32-bit wide string. */
#if @GNULIB_MBSRTOC32S@
_GL_FUNCDECL_SYS (mbsrtoc32s, size_t,
(char32_t *dest, const char **srcp, size_t len, mbstate_t *ps)
diff --git a/m4/uchar.m4 b/m4/uchar.m4
index e92f5d6..4e0f43a 100644
--- a/m4/uchar.m4
+++ b/m4/uchar.m4
@@ -1,4 +1,4 @@
-# uchar.m4 serial 6
+# uchar.m4 serial 7
dnl Copyright (C) 2019-2020 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
@@ -50,6 +50,7 @@ AC_DEFUN([gl_UCHAR_H_DEFAULTS],
GNULIB_BTOC32=0; AC_SUBST([GNULIB_BTOC32])
GNULIB_C32TOB=0; AC_SUBST([GNULIB_C32TOB])
GNULIB_MBRTOC32=0; AC_SUBST([GNULIB_MBRTOC32])
+ GNULIB_MBSNRTOC32S=0; AC_SUBST([GNULIB_MBSNRTOC32S])
GNULIB_MBSRTOC32S=0; AC_SUBST([GNULIB_MBSRTOC32S])
dnl Assume proper GNU behavior unless another module says otherwise.
HAVE_MBRTOC32=1; AC_SUBST([HAVE_MBRTOC32])
diff --git a/modules/mbsnrtoc32s b/modules/mbsnrtoc32s
new file mode 100644
index 0000000..44784d8
--- /dev/null
+++ b/modules/mbsnrtoc32s
@@ -0,0 +1,38 @@
+Description:
+mbsnrtoc32s() function: convert string to 32-bit wide string.
+
+Files:
+lib/mbsnrtoc32s.c
+lib/mbsnrtowcs-impl.h
+lib/mbsrtoc32s-state.c
+
+Depends-on:
+uchar
+wchar
+verify
+mbrtoc32 [test $SMALL_WCHAR_T = 1]
+minmax [test $SMALL_WCHAR_T = 1]
+strnlen1 [test $SMALL_WCHAR_T = 1]
+mbsnrtowcs [test $SMALL_WCHAR_T = 0]
+
+configure.ac:
+AC_REQUIRE([gl_UCHAR_H])
+if test $SMALL_WCHAR_T = 1; then
+ AC_LIBOBJ([mbsrtoc32s-state])
+fi
+gl_UCHAR_MODULE_INDICATOR([mbsnrtoc32s])
+
+Makefile.am:
+lib_SOURCES += mbsnrtoc32s.c
+
+Include:
+
+
+Link:
+$(LIB_MBRTOWC)
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible
diff --git a/modules/uchar b/modules/uchar
index 03101c1..a50eb5a 100644
--- a/modules/uchar
+++ b/modules/uchar
@@ -31,6 +31,7 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H)
-e 's/@''GNULIB_BTOC32''@/$(GNULIB_BTOC32)/g' \
-e 's/@''GNULIB_C32TOB''@/$(GNULIB_C32TOB)/g' \
-e 's/@''GNULIB_MBRTOC32''@/$(GNULIB_MBRTOC32)/g' \
+ -e 's/@''GNULIB_MBSNRTOC32S''@/$(GNULIB_MBSNRTOC32S)/g' \
-e 's/@''GNULIB_MBSRTOC32S''@/$(GNULIB_MBSRTOC32S)/g' \
-e 's|@''HAVE_MBRTOC32''@|$(HAVE_MBRTOC32)|g' \
-e 's|@''REPLACE_MBRTOC32''@|$(REPLACE_MBRTOC32)|g' \
diff --git a/tests/test-uchar-c++.cc b/tests/test-uchar-c++.cc
index a7132a1..a630eec 100644
--- a/tests/test-uchar-c++.cc
+++ b/tests/test-uchar-c++.cc
@@ -37,6 +37,12 @@ SIGNATURE_CHECK (GNULIB_NAMESPACE::mbrtoc32, size_t,
(char32_t *, const char *, size_t, mbstate_t *));
#endif
+#if GNULIB_TEST_MBSNRTOC32S
+SIGNATURE_CHECK (GNULIB_NAMESPACE::mbsnrtoc32s, size_t,
+ (char32_t *, const char **, size_t, size_t, mbstate_t *));
+
+#endif
+
#if GNULIB_TEST_MBSRTOC32S
SIGNATURE_CHECK (GNULIB_NAMESPACE::mbsrtoc32s, size_t,
(char32_t *, const char **, size_t, mbstate_t *));
--
2.7.4