>From 1b0f778e32f73c8601e7c517a0b83098996363a9 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 22 Sep 2015 12:17:06 -0700 Subject: [PATCH] c-ctype: port better to EBCDIC Problems reported by Daniel Richard G. in http://lists.gnu.org/archive/html/bug-gnulib/2015-09/msg00020.html * lib/c-ctype.c: Include , for CHAR_MIN and CHAR_MAX. Include "verify.h". (C_CTYPE_ASCII, C_CTYPE_CONSECUTIVE_DIGITS) (C_CTYPE_CONSECUTIVE_LOWERCASE, C_CTYPE_CONSECUTIVE_UPPERCASE): Define as enum constants with value false, if not defined, so that code can use 'if' instead of 'ifdef'. Using 'if' helps make the code more portable, as both branches of the 'if' are compiled on all platforms. (C_CTYPE_EBCDIC): New constant. (to_char): New static function. (c_isalnum, c_isalpha, c_isdigit, c_islower, c_isgraph, c_isprint) (c_ispunct, c_isupper, c_isxdigit, c_tolower, c_toupper): Rewrite to use 'if' instead of 'ifdef'. Use to_char if non-ASCII. Prefer <= to >=. Prefer true and false to 1 and 0, for booleans. (c_iscntrl): Use 'if', not 'ifdef'. Special case for EBCDIC. Verify that the character set is either ASCII or EBCDIC. * tests/test-c-ctype.c: Include , for CHAR_MIN (to_char): New function. (test_all): Port to EBCDIC. Add some more tests, e.g., for c_ispunct. --- ChangeLog | 26 ++++++ lib/c-ctype.c | 253 ++++++++++++++++++++++++++------------------------- tests/test-c-ctype.c | 106 +++++++++++---------- 3 files changed, 216 insertions(+), 169 deletions(-) diff --git a/ChangeLog b/ChangeLog index c552225..8723b38 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +2015-09-22 Paul Eggert + + c-ctype: port better to EBCDIC + Problems reported by Daniel Richard G. in + http://lists.gnu.org/archive/html/bug-gnulib/2015-09/msg00020.html + * lib/c-ctype.c: Include , for CHAR_MIN and CHAR_MAX. + Include "verify.h". + (C_CTYPE_ASCII, C_CTYPE_CONSECUTIVE_DIGITS) + (C_CTYPE_CONSECUTIVE_LOWERCASE, C_CTYPE_CONSECUTIVE_UPPERCASE): + Define as enum constants with value false, if not defined, so that + code can use 'if' instead of 'ifdef'. Using 'if' helps make the + code more portable, as both branches of the 'if' are compiled on + all platforms. + (C_CTYPE_EBCDIC): New constant. + (to_char): New static function. + (c_isalnum, c_isalpha, c_isdigit, c_islower, c_isgraph, c_isprint) + (c_ispunct, c_isupper, c_isxdigit, c_tolower, c_toupper): + Rewrite to use 'if' instead of 'ifdef'. + Use to_char if non-ASCII. Prefer <= to >=. + Prefer true and false to 1 and 0, for booleans. + (c_iscntrl): Use 'if', not 'ifdef'. Special case for EBCDIC. + Verify that the character set is either ASCII or EBCDIC. + * tests/test-c-ctype.c: Include , for CHAR_MIN + (to_char): New function. + (test_all): Port to EBCDIC. Add some more tests, e.g., for c_ispunct. + 2015-09-21 Pádraig Brady nanosleep: fix return code for interrupted replacement diff --git a/lib/c-ctype.c b/lib/c-ctype.c index 6635d34..916d46e 100644 --- a/lib/c-ctype.c +++ b/lib/c-ctype.c @@ -21,6 +21,34 @@ along with this program; if not, see . */ #define NO_C_CTYPE_MACROS #include "c-ctype.h" +#include +#include "verify.h" + +#ifndef C_CTYPE_ASCII +enum { C_CTYPE_ASCII = false }; +#endif +#ifndef C_CTYPE_CONSECUTIVE_DIGITS +enum { C_CTYPE_CONSECUTIVE_DIGITS = false }; +#endif +#ifndef C_CTYPE_CONSECUTIVE_LOWERCASE +enum { C_CTYPE_CONSECUTIVE_LOWERCASE = false }; +#endif +#ifndef C_CTYPE_CONSECUTIVE_UPPERCASE +enum { C_CTYPE_CONSECUTIVE_UPPERCASE = false }; +#endif + +/* Convert an int, which may be promoted from either an unsigned or a + signed char, to the corresponding char. */ + +static char +to_char (int c) +{ + enum { nchars = CHAR_MAX - CHAR_MIN + 1 }; + if (CHAR_MIN < 0 && CHAR_MAX < c && c < nchars) + return c - nchars; + return c; +} + /* The function isascii is not locale dependent. Its use in EBCDIC is questionable. */ bool @@ -32,18 +60,20 @@ c_isascii (int c) bool c_isalnum (int c) { -#if C_CTYPE_CONSECUTIVE_DIGITS \ - && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE -#if C_CTYPE_ASCII - return ((c >= '0' && c <= '9') - || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z')); -#else - return ((c >= '0' && c <= '9') - || (c >= 'A' && c <= 'Z') - || (c >= 'a' && c <= 'z')); -#endif -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_DIGITS + && C_CTYPE_CONSECUTIVE_UPPERCASE + && C_CTYPE_CONSECUTIVE_LOWERCASE) + { + if (C_CTYPE_ASCII) + return (('0' <= c && c <= '9') + || ('A' <= (c & ~0x20) && (c & ~0x20) <= 'Z')); + else + return (('0' <= c && c <= '9') + || ('A' <= c && c <= 'Z') + || ('a' <= c && c <= 'z')); + } + + switch (to_char (c)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -57,24 +87,24 @@ c_isalnum (int c) case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_isalpha (int c) { -#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE -#if C_CTYPE_ASCII - return ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'); -#else - return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); -#endif -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE) + { + if (C_CTYPE_ASCII) + return 'A' <= (c & ~0x20) && (c & ~0x20) <= 'Z'; + else + return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); + } + + switch (to_char (c)) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': @@ -86,11 +116,10 @@ c_isalpha (int c) case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - return 1; + return true; default: - return 0; + return false; } -#endif } bool @@ -102,81 +131,65 @@ c_isblank (int c) bool c_iscntrl (int c) { -#if C_CTYPE_ASCII - return ((c & ~0x1f) == 0 || c == 0x7f); -#else - switch (c) - { - case ' ': case '!': case '"': case '#': case '$': case '%': - case '&': case '\'': case '(': case ')': case '*': case '+': - case ',': case '-': case '.': case '/': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - case ':': case ';': case '<': case '=': case '>': case '?': - case '@': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '[': case '\\': case ']': case '^': case '_': case '`': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '{': case '|': case '}': case '~': - return 0; - default: - return 1; - } -#endif + enum { C_CTYPE_EBCDIC = (' ' == 64 && '0' == 240 + && 'A' == 193 && 'J' == 209 && 'S' == 226 + && 'A' == 129 && 'J' == 145 && 'S' == 162) }; + verify (C_CTYPE_ASCII || C_CTYPE_EBCDIC); + + if (0 <= c && c < ' ') + return true; + if (C_CTYPE_ASCII) + return c == 0x7f; + else + return c == 0xff || c == -1; } bool c_isdigit (int c) { -#if C_CTYPE_CONSECUTIVE_DIGITS - return (c >= '0' && c <= '9'); -#else + if (C_CTYPE_ASCII) + return '0' <= c && c <= '9'; + + c = to_char (c); + if (C_CTYPE_CONSECUTIVE_DIGITS) + return '0' <= c && c <= '9'; + switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_islower (int c) { -#if C_CTYPE_CONSECUTIVE_LOWERCASE - return (c >= 'a' && c <= 'z'); -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_LOWERCASE) + return 'a' <= c && c <= 'z'; + + switch (to_char (c)) { case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_isgraph (int c) { -#if C_CTYPE_ASCII - return (c >= '!' && c <= '~'); -#else - switch (c) + if (C_CTYPE_ASCII) + return '!' <= c && c <= '~'; + + switch (to_char (c)) { case '!': case '"': case '#': case '$': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': @@ -197,20 +210,19 @@ c_isgraph (int c) case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_isprint (int c) { -#if C_CTYPE_ASCII - return (c >= ' ' && c <= '~'); -#else - switch (c) + if (C_CTYPE_ASCII) + return ' ' <= c && c <= '~'; + + switch (to_char (c)) { case ' ': case '!': case '"': case '#': case '$': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': @@ -231,22 +243,21 @@ c_isprint (int c) case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_ispunct (int c) { -#if C_CTYPE_ASCII - return ((c >= '!' && c <= '~') - && !((c >= '0' && c <= '9') - || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'))); -#else - switch (c) + if (C_CTYPE_ASCII) + return (('!' <= c && c <= '~') + && !(('0' <= c && c <= '9') + || ('A' <= (c & ~0x20) && (c & ~0x20) <= 'Z'))); + + switch (to_char (c)) { case '!': case '"': case '#': case '$': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': @@ -255,11 +266,10 @@ c_ispunct (int c) case '@': case '[': case '\\': case ']': case '^': case '_': case '`': case '{': case '|': case '}': case '~': - return 1; + return true; default: - return 0; + return false; } -#endif } bool @@ -272,57 +282,56 @@ c_isspace (int c) bool c_isupper (int c) { -#if C_CTYPE_CONSECUTIVE_UPPERCASE - return (c >= 'A' && c <= 'Z'); -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_UPPERCASE) + return 'A' <= c && c <= 'Z'; + + switch (to_char (c)) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': - return 1; + return true; default: - return 0; + return false; } -#endif } bool c_isxdigit (int c) { -#if C_CTYPE_CONSECUTIVE_DIGITS \ - && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE -#if C_CTYPE_ASCII - return ((c >= '0' && c <= '9') - || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'F')); -#else - return ((c >= '0' && c <= '9') - || (c >= 'A' && c <= 'F') - || (c >= 'a' && c <= 'f')); -#endif -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_DIGITS + && C_CTYPE_CONSECUTIVE_UPPERCASE + && C_CTYPE_CONSECUTIVE_LOWERCASE) + { + if ('0' <= c && c <= '9') + return true; + if (C_CTYPE_ASCII) + return 'A' <= (c & ~0x20) && (c & ~0x20) <= 'F'; + return (('A' <= c && c <= 'F') + || ('a' <= c && c <= 'f')); + } + + switch (to_char (c)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - return 1; + return true; default: - return 0; + return false; } -#endif } int c_tolower (int c) { -#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE - return (c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c); -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE) + return c_isupper (c) ? c - 'A' + 'a' : c; + + switch (to_char (c)) { case 'A': return 'a'; case 'B': return 'b'; @@ -352,16 +361,15 @@ c_tolower (int c) case 'Z': return 'z'; default: return c; } -#endif } int c_toupper (int c) { -#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE - return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c); -#else - switch (c) + if (C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE) + return c_islower (c) ? c - 'a' + 'A' : c; + + switch (to_char (c)) { case 'a': return 'A'; case 'b': return 'B'; @@ -391,5 +399,4 @@ c_toupper (int c) case 'z': return 'Z'; default: return c; } -#endif } diff --git a/tests/test-c-ctype.c b/tests/test-c-ctype.c index 81fe936..63d0af9 100644 --- a/tests/test-c-ctype.c +++ b/tests/test-c-ctype.c @@ -20,10 +20,19 @@ #include "c-ctype.h" +#include #include #include "macros.h" +static char +to_char (int c) +{ + if (CHAR_MIN < 0 && CHAR_MAX < c) + return c - CHAR_MAX - 1 + CHAR_MIN; + return c; +} + static void test_all (void) { @@ -31,49 +40,32 @@ test_all (void) for (c = -0x80; c < 0x100; c++) { - ASSERT (c_isascii (c) == (c >= 0 && c < 0x80)); - - switch (c) + if (c < 0) { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - ASSERT (c_isalnum (c) == 1); - break; - default: - ASSERT (c_isalnum (c) == 0); - break; + ASSERT (c_isascii (c) == c_isascii (c + 0x100)); + ASSERT (c_isalnum (c) == c_isalnum (c + 0x100)); + ASSERT (c_isalpha (c) == c_isalpha (c + 0x100)); + ASSERT (c_isblank (c) == c_isblank (c + 0x100)); + ASSERT (c_iscntrl (c) == c_iscntrl (c + 0x100)); + ASSERT (c_isdigit (c) == c_isdigit (c + 0x100)); + ASSERT (c_islower (c) == c_islower (c + 0x100)); + ASSERT (c_isgraph (c) == c_isgraph (c + 0x100)); + ASSERT (c_isprint (c) == c_isprint (c + 0x100)); + ASSERT (c_ispunct (c) == c_ispunct (c + 0x100)); + ASSERT (c_isspace (c) == c_isspace (c + 0x100)); + ASSERT (c_isupper (c) == c_isupper (c + 0x100)); + ASSERT (c_isxdigit (c) == c_isxdigit (c + 0x100)); + ASSERT (to_char (c_tolower (c)) == to_char (c_tolower (c + 0x100))); + ASSERT (to_char (c_toupper (c)) == to_char (c_toupper (c + 0x100))); } - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - ASSERT (c_isalpha (c) == 1); - break; - default: - ASSERT (c_isalpha (c) == 0); - break; - } + ASSERT (c_isascii (c) == (c >= 0 && c < 0x80)); + + ASSERT (c_isalnum (c) == (c_isalpha (c) || c_isdigit (c))); + + ASSERT (c_isalpha (c) == (c_islower (c) || c_isupper (c))); - switch (c) + switch (to_char (c)) { case '\t': case ' ': ASSERT (c_isblank (c) == 1); @@ -83,9 +75,13 @@ test_all (void) break; } +#ifdef C_CTYPE_ASCII ASSERT (c_iscntrl (c) == ((c >= 0 && c < 0x20) || c == 0x7f)); +#endif - switch (c) + ASSERT (! (c_iscntrl (c) && c_isprint (c))); + + switch (to_char (c)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -96,7 +92,7 @@ test_all (void) break; } - switch (c) + switch (to_char (c)) { case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': @@ -110,13 +106,31 @@ test_all (void) break; } +#ifdef C_CTYPE_ASCII ASSERT (c_isgraph (c) == ((c >= 0x20 && c < 0x7f) && c != ' ')); ASSERT (c_isprint (c) == (c >= 0x20 && c < 0x7f)); +#endif + + ASSERT (c_isgraph (c) == (c_isalnum (c) || c_ispunct (c))); + + ASSERT (c_isprint (c) == (c_isgraph (c) || c == ' ')); - ASSERT (c_ispunct (c) == (c_isgraph (c) && !c_isalnum (c))); + switch (to_char (c)) + { + case '!': case '"': case '#': case '$': case '%': case '&': case '\'': + case '(': case ')': case '*': case '+': case ',': case '-': case '.': + case '/': case ':': case ';': case '<': case '=': case '>': case '?': + case '@': case '[': case'\\': case ']': case '^': case '_': case '`': + case '{': case '|': case '}': case '~': + ASSERT (c_ispunct (c) == 1); + break; + default: + ASSERT (c_ispunct (c) == 0); + break; + } - switch (c) + switch (to_char (c)) { case ' ': case '\t': case '\n': case '\v': case '\f': case '\r': ASSERT (c_isspace (c) == 1); @@ -126,7 +140,7 @@ test_all (void) break; } - switch (c) + switch (to_char (c)) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': @@ -140,7 +154,7 @@ test_all (void) break; } - switch (c) + switch (to_char (c)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -153,7 +167,7 @@ test_all (void) break; } - switch (c) + switch (to_char (c)) { case 'A': ASSERT (c_tolower (c) == 'a'); -- 2.1.0