>From 7bd3aacff73e31b213a58170cf1ed6cba6ecd6d1 Mon Sep 17 00:00:00 2001
From: Bruno Haible
Date: Sat, 25 Jan 2020 23:36:41 +0100
Subject: [PATCH 02/25] c32isalnum: New module.
* lib/c32isalnum.c: New file.
* lib/c32is-impl.h: New file.
* modules/c32isalnum: New file.
* doc/posix-functions/iswalnum.texi: Mention the new module.
---
ChangeLog | 8 ++++
doc/posix-functions/iswalnum.texi | 7 ++-
lib/c32is-impl.h | 95 +++++++++++++++++++++++++++++++++++++++
lib/c32isalnum.c | 25 +++++++++++
modules/c32isalnum | 33 ++++++++++++++
5 files changed, 166 insertions(+), 2 deletions(-)
create mode 100644 lib/c32is-impl.h
create mode 100644 lib/c32isalnum.c
create mode 100644 modules/c32isalnum
diff --git a/ChangeLog b/ChangeLog
index a41e8ac..4ffcc0b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
2020-01-25 Bruno Haible
+ c32isalnum: New module.
+ * lib/c32isalnum.c: New file.
+ * lib/c32is-impl.h: New file.
+ * modules/c32isalnum: New file.
+ * doc/posix-functions/iswalnum.texi: Mention the new module.
+
+2020-01-25 Bruno Haible
+
uchar: Preparations for modules c32isalnum, ..., c32isxdigit.
* lib/uchar.in.h (c32isalnum, c32isalpha, c32isblank, c32iscntrl,
c32isdigit, c32isgraph, c32islower, c32isprint, c32ispunct, c32isspace,
diff --git a/doc/posix-functions/iswalnum.texi b/doc/posix-functions/iswalnum.texi
index 1ccb315..b3b6d24 100644
--- a/doc/posix-functions/iswalnum.texi
+++ b/doc/posix-functions/iswalnum.texi
@@ -20,6 +20,9 @@ OS X 10.8.
Portability problems not fixed by Gnulib:
@itemize
@item
-On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and therefore cannot
-accommodate all Unicode characters.
+On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and
+therefore cannot accommodate all Unicode characters.
+However, the Gnulib function @code{c32isalnum}, provided by Gnulib module
+@code{c32isalnum}, operates on 32-bit wide characters and therefore does not
+have this limitation.
@end itemize
diff --git a/lib/c32is-impl.h b/lib/c32is-impl.h
new file mode 100644
index 0000000..fd5f06a
--- /dev/null
+++ b/lib/c32is-impl.h
@@ -0,0 +1,95 @@
+/* Test whether a 32-bit wide character belongs to a specific character class.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, see . */
+
+/* Written by Bruno Haible , 2020. */
+
+#include
+#include
+
+#ifdef __CYGWIN__
+# include
+#endif
+
+#if GNULIB_defined_mbstate_t
+# include "localcharset.h"
+# include "streq.h"
+#endif
+
+#include "unictype.h"
+#include "verify.h"
+
+int
+FUNC (wint_t wc)
+{
+ /* The char32_t encoding of a multibyte character is defined by the way
+ mbrtoc32() is defined. */
+
+#if GNULIB_defined_mbstate_t /* AIX, IRIX */
+ /* mbrtoc32() is defined on top of mbtowc() for the non-UTF-8 locales
+ and directly for the UTF-8 locales. */
+ if (wc != WEOF)
+ {
+ const char *encoding = locale_charset ();
+ if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
+ return UCS_FUNC (wc);
+ else
+ return WCHAR_FUNC (wc);
+ }
+ else
+ return 0;
+
+#elif HAVE_WORKING_MBRTOC32 /* glibc */
+ /* mbrtoc32() is essentially defined by the system libc. */
+
+# if defined __GLIBC__
+ /* The char32_t encoding of a multibyte character is known to be the same as
+ the wchar_t encoding. */
+ return WCHAR_FUNC (wc);
+# else
+ /* The char32_t encoding of a multibyte character is known to be UCS-4,
+ different from the the wchar_t encoding. */
+ if (wc != WEOF)
+ return UCS_FUNC (wc);
+ else
+ return 0;
+# endif
+
+#elif _GL_LARGE_CHAR32_T /* Cygwin, mingw, MSVC */
+ /* The wchar_t encoding is UTF-16.
+ The char32_t encoding is UCS-4. */
+
+# if defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007
+ /* As an extension to POSIX, the iswalnum() function of Cygwin >= 1.7
+ supports also wc arguments outside the Unicode BMP, that is, outside
+ the 'wchar_t' range. See
+
+ = . */
+ return WCHAR_FUNC (wc);
+# else
+ if (wc == WEOF || wc == (wchar_t) wc)
+ /* wc is in the range for the isw* functions. */
+ return WCHAR_FUNC (wc);
+ else
+ return UCS_FUNC (wc);
+# endif
+
+#else /* macOS, FreeBSD, NetBSD, OpenBSD, HP-UX, Solaris, Minix, Android */
+ /* char32_t and wchar_t are equivalent. */
+ verify (sizeof (char32_t) == sizeof (wchar_t));
+
+ return WCHAR_FUNC (wc);
+#endif
+}
diff --git a/lib/c32isalnum.c b/lib/c32isalnum.c
new file mode 100644
index 0000000..c81b833
--- /dev/null
+++ b/lib/c32isalnum.c
@@ -0,0 +1,25 @@
+/* Test 32-bit wide character for being alphanumeric.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, see . */
+
+#include
+
+/* Specification. */
+#include
+
+#define FUNC c32isalnum
+#define WCHAR_FUNC iswalnum
+#define UCS_FUNC uc_is_alnum
+#include "c32is-impl.h"
diff --git a/modules/c32isalnum b/modules/c32isalnum
new file mode 100644
index 0000000..213ffdd
--- /dev/null
+++ b/modules/c32isalnum
@@ -0,0 +1,33 @@
+Description:
+c32isalnum() function: test 32-bit wide character for being alphanumeric.
+
+Files:
+lib/c32isalnum.c
+lib/c32is-impl.h
+m4/mbrtoc32.m4
+
+Depends-on:
+uchar
+wchar
+wctype-h
+localcharset [test $REPLACE_MBSTATE_T = 1]
+streq [test $REPLACE_MBSTATE_T = 1]
+unictype/ctype-alnum
+verify
+
+configure.ac:
+AC_REQUIRE([gl_UCHAR_H])
+AC_REQUIRE([gl_MBRTOC32_SANITYCHECK])
+gl_UCHAR_MODULE_INDICATOR([c32isalnum])
+
+Makefile.am:
+lib_SOURCES += c32isalnum.c
+
+Include:
+
+
+License:
+LGPLv3+ or GPLv2
+
+Maintainer:
+Bruno Haible
--
2.7.4