bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

wc ignores locale notion of white space


From: Bruno Haible
Subject: wc ignores locale notion of white space
Date: Tue, 11 Dec 2001 12:49:22 +0100 (CET)

Dear textutils maintainer,

"wc" from unpatched textutils-2.0.19 ignores the locale's notion of
white space. This leads to a text suite failure of the Li18nux2000
testsuite.

        ************************************************************************
        /tset/LI18NUX2K.L1/utils/wc/wc 2        Failed
        
                Test Information:
        [36-wc-2] Reference 4.Utils 'wc' Assertion #2.
        When this utility writes to the standard output the number of words,
                this utility correctly recognizes the boundaries of words. The
                boundaries are shown as white-space characters constituted in
                current locale.
        Can't count number of words.

        ************************************************************************

Here is a patch which fixes this testsuite failure. Both for the multibyte
case (e.g. UTF-8) and 8-bit locales (e.g. for CP1046 0xF6 = U+200B or
CP256 0xE1 = U+2003, which are both whitespace != ' ').


For this patch to work, please also add AC_CHECK_FUNCS(iswspace) to your
macrology. I can't give a patch for this, because you are using nonstandard
versions of autoconf and automake.

2001-12-09  Bruno Haible  <address@hidden>

        * wc.c (wc): Use ISSPACE, iswspace instead of hardcoding the ASCII
        space character.

diff -r -c3 --unidirectional-new-file textutils/src/wc.c textutils-i18n/src/wc.c
*** textutils/src/wc.c  Sat Dec  1 18:29:26 2001
--- textutils-i18n/src/wc.c     Tue Dec 11 00:09:40 2001
***************
*** 32,44 ****
  # include <wchar.h>
  #endif
  
! /* Get iswprint().  */
  #if HAVE_WCTYPE_H
  # include <wctype.h>
  #endif
  #if !defined iswprint && !HAVE_ISWPRINT
  # define iswprint(wc) 1
  #endif
  
  /* Include this after wctype.h so that we `#undef' ISPRINT
     (from Solaris's euc.h, from widec.h, from wctype.h) before
--- 35,50 ----
  # include <wchar.h>
  #endif
  
! /* Get iswprint(), iswspace().  */
  #if HAVE_WCTYPE_H
  # include <wctype.h>
  #endif
  #if !defined iswprint && !HAVE_ISWPRINT
  # define iswprint(wc) 1
  #endif
+ #if !defined iswspace && !HAVE_ISWSPACE
+ # define iswspace(wc) 1
+ #endif
  
  /* Include this after wctype.h so that we `#undef' ISPRINT
     (from Solaris's euc.h, from widec.h, from wctype.h) before
***************
*** 361,369 ****
                    case '\t':
                      linepos += 8 - (linepos % 8);
                      goto mb_word_separator;
-                   case ' ':
-                     linepos++;
-                     /* Fall through. */
                    case '\v':
                    mb_word_separator:
                      if (in_word)
--- 368,373 ----
***************
*** 378,383 ****
--- 382,389 ----
                          int width = wcwidth (wide_char);
                          if (width > 0)
                            linepos += width;
+                         if (iswspace (wide_char))
+                           goto mb_word_separator;
                          in_word = 1;
                        }
                      break;
***************
*** 437,445 ****
                case '\t':
                  linepos += 8 - (linepos % 8);
                  goto word_separator;
-               case ' ':
-                 linepos++;
-                 /* Fall through. */
                case '\v':
                word_separator:
                  if (in_word)
--- 443,448 ----
***************
*** 452,457 ****
--- 455,462 ----
                  if (ISPRINT ((unsigned char) p[-1]))
                    {
                      linepos++;
+                     if (ISSPACE ((unsigned char) p[-1]))
+                       goto word_separator;
                      in_word = 1;
                    }
                  break;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]