From 3730d6f212dcb667594bb1be9fcb28dd419915f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?=
Date: Fri, 18 Mar 2022 14:52:36 +0000
Subject: [PATCH] printf: support printing the numeric value of multi-byte
chars
* src/printf.c (STRTOX): Update to support multi-byte chars.
* tests/misc/printf-mb.sh: Add a new test.
* tests/local.mk: Reference the new test.
* NEWS: Mention the bug fix.
Fixes https://bugs.gnu.org/54388
---
NEWS | 3 +++
src/printf.c | 16 +++++++++++++
tests/local.mk | 1 +
tests/misc/printf-mb.sh | 52 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 72 insertions(+)
create mode 100755 tests/misc/printf-mb.sh
diff --git a/NEWS b/NEWS
index fe66f496f..7eaa1d158 100644
--- a/NEWS
+++ b/NEWS
@@ -42,6 +42,9 @@ GNU coreutils NEWS -*- outline -*-
for B when A is a directory, possibly inflooping.
[bug introduced in coreutils-6.3]
+ printf now supports printing the numeric value of multi-byte characters.
+ [This bug was present in "the beginning".]
+
AIX builds no longer fail because some library functions are not found.
[bug introduced in coreutils-8.32]
diff --git a/src/printf.c b/src/printf.c
index 5f84475fd..a0e81c02e 100644
--- a/src/printf.c
+++ b/src/printf.c
@@ -53,6 +53,7 @@
#include
#include
#include
+#include
#include "system.h"
#include "cl-strtod.h"
@@ -170,6 +171,21 @@ FUNC_NAME (char const *s) \
{ \
unsigned char ch = *++s; \
val = ch; \
+ \
+ if (MB_CUR_MAX > 1) \
+ { \
+ mbstate_t mbstate = { 0, }; \
+ wchar_t wc; \
+ size_t slen = strlen (s); \
+ ssize_t bytes; \
+ bytes = mbrtowc (&wc, s, slen, &mbstate); \
+ if (0 < bytes) \
+ { \
+ val = wc; \
+ s += bytes - 1; \
+ } \
+ } \
+ \
/* If POSIXLY_CORRECT is not set, then give a warning that there \
are characters following the character constant and that GNU \
printf is ignoring those characters. If POSIXLY_CORRECT *is* \
diff --git a/tests/local.mk b/tests/local.mk
index f97ddcb98..0f7778619 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -344,6 +344,7 @@ all_tests = \
tests/misc/printf.sh \
tests/misc/printf-cov.pl \
tests/misc/printf-hex.sh \
+ tests/misc/printf-mb.sh \
tests/misc/printf-surprise.sh \
tests/misc/printf-quote.sh \
tests/misc/pwd-long.sh \
diff --git a/tests/misc/printf-mb.sh b/tests/misc/printf-mb.sh
new file mode 100755
index 000000000..ad21dbe67
--- /dev/null
+++ b/tests/misc/printf-mb.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+# tests for printing multi-byte values of characters
+
+# Copyright (C) 2022 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ printf
+
+prog='env printf'
+
+unset LC_ALL
+f=$LOCALE_FR_UTF8
+: ${LOCALE_FR_UTF8=none}
+if test "$LOCALE_FR_UTF8" != "none"; then
+ (
+ #valid multi-byte
+ LC_ALL=$f $prog '%04x\n' '"á' >>out 2>>err
+ #invalid multi-byte
+ LC_ALL=$f $prog '%04x\n' "'$($prog '\xe1')" >>out 2>>err
+ #uni-byte
+ LC_ALL=C $prog '%04x\n' "'$($prog '\xe1')" >>out 2>>err
+ #valid multi-byte, with trailing
+ LC_ALL=$f $prog '%04x\n' '"á"' >>out 2>>err
+ )
+ cat <<\EOF > exp || framework_failure_
+00e1
+00e1
+00e1
+00e1
+EOF
+ compare exp out || fail=1
+
+ cat < exp_err
+printf: warning: ": character(s) following character constant have been ignored
+EOF
+ compare exp_err err || fail=1
+fi
+
+Exit $fail
--
2.26.2