From 3730d6f212dcb667594bb1be9fcb28dd419915f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Fri, 18 Mar 2022 14:52:36 +0000 Subject: [PATCH] printf: support printing the numeric value of multi-byte chars * src/printf.c (STRTOX): Update to support multi-byte chars. * tests/misc/printf-mb.sh: Add a new test. * tests/local.mk: Reference the new test. * NEWS: Mention the bug fix. Fixes https://bugs.gnu.org/54388 --- NEWS | 3 +++ src/printf.c | 16 +++++++++++++ tests/local.mk | 1 + tests/misc/printf-mb.sh | 52 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+) create mode 100755 tests/misc/printf-mb.sh diff --git a/NEWS b/NEWS index fe66f496f..7eaa1d158 100644 --- a/NEWS +++ b/NEWS @@ -42,6 +42,9 @@ GNU coreutils NEWS -*- outline -*- for B when A is a directory, possibly inflooping. [bug introduced in coreutils-6.3] + printf now supports printing the numeric value of multi-byte characters. + [This bug was present in "the beginning".] + AIX builds no longer fail because some library functions are not found. [bug introduced in coreutils-8.32] diff --git a/src/printf.c b/src/printf.c index 5f84475fd..a0e81c02e 100644 --- a/src/printf.c +++ b/src/printf.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "system.h" #include "cl-strtod.h" @@ -170,6 +171,21 @@ FUNC_NAME (char const *s) \ { \ unsigned char ch = *++s; \ val = ch; \ + \ + if (MB_CUR_MAX > 1) \ + { \ + mbstate_t mbstate = { 0, }; \ + wchar_t wc; \ + size_t slen = strlen (s); \ + ssize_t bytes; \ + bytes = mbrtowc (&wc, s, slen, &mbstate); \ + if (0 < bytes) \ + { \ + val = wc; \ + s += bytes - 1; \ + } \ + } \ + \ /* If POSIXLY_CORRECT is not set, then give a warning that there \ are characters following the character constant and that GNU \ printf is ignoring those characters. If POSIXLY_CORRECT *is* \ diff --git a/tests/local.mk b/tests/local.mk index f97ddcb98..0f7778619 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -344,6 +344,7 @@ all_tests = \ tests/misc/printf.sh \ tests/misc/printf-cov.pl \ tests/misc/printf-hex.sh \ + tests/misc/printf-mb.sh \ tests/misc/printf-surprise.sh \ tests/misc/printf-quote.sh \ tests/misc/pwd-long.sh \ diff --git a/tests/misc/printf-mb.sh b/tests/misc/printf-mb.sh new file mode 100755 index 000000000..ad21dbe67 --- /dev/null +++ b/tests/misc/printf-mb.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# tests for printing multi-byte values of characters + +# Copyright (C) 2022 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ printf + +prog='env printf' + +unset LC_ALL +f=$LOCALE_FR_UTF8 +: ${LOCALE_FR_UTF8=none} +if test "$LOCALE_FR_UTF8" != "none"; then + ( + #valid multi-byte + LC_ALL=$f $prog '%04x\n' '"á' >>out 2>>err + #invalid multi-byte + LC_ALL=$f $prog '%04x\n' "'$($prog '\xe1')" >>out 2>>err + #uni-byte + LC_ALL=C $prog '%04x\n' "'$($prog '\xe1')" >>out 2>>err + #valid multi-byte, with trailing + LC_ALL=$f $prog '%04x\n' '"á"' >>out 2>>err + ) + cat <<\EOF > exp || framework_failure_ +00e1 +00e1 +00e1 +00e1 +EOF + compare exp out || fail=1 + + cat < exp_err +printf: warning: ": character(s) following character constant have been ignored +EOF + compare exp_err err || fail=1 +fi + +Exit $fail -- 2.26.2