bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

new module mbscasecmp, reduce goal of module strcase


From: Bruno Haible
Subject: new module mbscasecmp, reduce goal of module strcase
Date: Mon, 5 Feb 2007 03:00:51 +0100
User-agent: KMail/1.5.4

This creates a module for the function mbscasecmp(), a variant of strcasecmp()
that works with multibyte strings.

The module strcase now NO LONGER takes care of providing an internalionalized
strcasecmp()!! It only provides a replacement for platforms which don't have
this function.

2007-02-04  Bruno Haible  <address@hidden>

        New module mbscasecmp, reduced goal of strcasecmp.
        * modules/mbscasecmp: New file.
        * lib/mbscasecmp.c: New file, copied from lib/strcasecmp.c.
        (mbscasecmp): Renamed from strcasecmp.
        * lib/strcasecmp.c: Don't include mbuiter.h.
        (strcasecmp): Remove support for multibyte locales.
        * lib/string_.h (strcasecmp): Don`t rename. Declare only if missing.
        Change the conditional link warning.
        (mbscasecmp): New declaration.
        * m4/mbscasecmp.m4: New file.
        * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize
        GNULIB_MBSCASECMP.
        * modules/string (string.h): Also substitute GNULIB_MBSCASECMP.
        * MODULES.html.sh (Internationalization functions): Add mbscasecmp.

========================== modules/mbscasecmp ==============================
Description:
mbscasecmp() function: case-insensitive string comparison.

Files:
lib/mbscasecmp.c
m4/mbscasecmp.m4
m4/mbrtowc.m4

Depends-on:
mbuiter
string

configure.ac:
gl_FUNC_MBSCASECMP
gl_STRING_MODULE_INDICATOR([mbscasecmp])

Makefile.am:
lib_SOURCES += mbscasecmp.c

Include:
<string.h>

License:
LGPL

Maintainer:
Bruno Haible

============================= m4/mbscasecmp.m4 =============================
# mbscasecmp.m4 serial 1
dnl Copyright (C) 2007 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.

AC_DEFUN([gl_FUNC_MBSCASECMP],
[
  gl_PREREQ_MBSCASECMP
])

# Prerequisites of lib/mbscasecmp.c.
AC_DEFUN([gl_PREREQ_MBSCASECMP], [
  AC_REQUIRE([gl_FUNC_MBRTOWC])
  :
])
============================================================================
--- MODULES.html.sh     5 Feb 2007 01:36:34 -0000       1.183
+++ MODULES.html.sh     5 Feb 2007 01:52:10 -0000
@@ -2163,6 +2163,7 @@
   func_module mbschr
   func_module mbsrchr
   func_module mbsstr
+  func_module mbscasecmp
   func_module mbswidth
   func_module memcasecmp
   func_module memcoll
--- lib/mbscasecmp.c    5 Feb 2007 01:40:45 -0000       1.1
+++ lib/mbscasecmp.c    5 Feb 2007 01:52:10 -0000
@@ -31,13 +31,13 @@
 
 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
 
-/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
-   greater than zero if S1 is lexicographically less than, equal to or greater
-   than S2.
+/* Compare the character strings S1 and S2, ignoring case, returning less than,
+   equal to or greater than zero if S1 is lexicographically less than, equal to
+   or greater than S2.
    Note: This function may, in multibyte locales, return 0 for strings of
    different lengths!  */
 int
-strcasecmp (const char *s1, const char *s2)
+mbscasecmp (const char *s1, const char *s2)
 {
   if (s1 == s2)
     return 0;
--- lib/strcasecmp.c    26 Jan 2007 22:16:55 -0000      1.13
+++ lib/strcasecmp.c    5 Feb 2007 01:52:11 -0000
@@ -1,7 +1,5 @@
 /* Case-insensitive string comparison function.
    Copyright (C) 1998-1999, 2005-2007 Free Software Foundation, Inc.
-   Written by Bruno Haible <address@hidden>, 2005,
-   based on earlier glibc code.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -25,79 +23,41 @@
 #include <ctype.h>
 #include <limits.h>
 
-#if HAVE_MBRTOWC
-# include "mbuiter.h"
-#endif
-
 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
 
 /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
    greater than zero if S1 is lexicographically less than, equal to or greater
    than S2.
-   Note: This function may, in multibyte locales, return 0 for strings of
-   different lengths!  */
+   Note: This function does not work with multibyte strings!  */
+
 int
 strcasecmp (const char *s1, const char *s2)
 {
-  if (s1 == s2)
+  const unsigned char *p1 = (const unsigned char *) s1;
+  const unsigned char *p2 = (const unsigned char *) s2;
+  unsigned char c1, c2;
+
+  if (p1 == p2)
     return 0;
 
-  /* Be careful not to look at the entire extent of s1 or s2 until needed.
-     This is useful because when two strings differ, the difference is
-     most often already in the very few first characters.  */
-#if HAVE_MBRTOWC
-  if (MB_CUR_MAX > 1)
+  do
     {
-      mbui_iterator_t iter1;
-      mbui_iterator_t iter2;
+      c1 = TOLOWER (*p1);
+      c2 = TOLOWER (*p2);
 
-      mbui_init (iter1, s1);
-      mbui_init (iter2, s2);
+      if (c1 == '\0')
+       break;
 
-      while (mbui_avail (iter1) && mbui_avail (iter2))
-       {
-         int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2));
-
-         if (cmp != 0)
-           return cmp;
-
-         mbui_advance (iter1);
-         mbui_advance (iter2);
-       }
-      if (mbui_avail (iter1))
-       /* s2 terminated before s1.  */
-       return 1;
-      if (mbui_avail (iter2))
-       /* s1 terminated before s2.  */
-       return -1;
-      return 0;
+      ++p1;
+      ++p2;
     }
+  while (c1 == c2);
+
+  if (UCHAR_MAX <= INT_MAX)
+    return c1 - c2;
   else
-#endif
-    {
-      const unsigned char *p1 = (const unsigned char *) s1;
-      const unsigned char *p2 = (const unsigned char *) s2;
-      unsigned char c1, c2;
-
-      do
-       {
-         c1 = TOLOWER (*p1);
-         c2 = TOLOWER (*p2);
-
-         if (c1 == '\0')
-           break;
-
-         ++p1;
-         ++p2;
-       }
-      while (c1 == c2);
-
-      if (UCHAR_MAX <= INT_MAX)
-       return c1 - c2;
-      else
-       /* On machines where 'char' and 'int' are types of the same size, the
-          difference of two 'unsigned char' values - including the sign bit -
-          doesn't fit in an 'int'.  */
-       return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
-    }
+    /* On machines where 'char' and 'int' are types of the same size, the
+       difference of two 'unsigned char' values - including the sign bit -
+       doesn't fit in an 'int'.  */
+    return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
 }
--- lib/string_.h       5 Feb 2007 01:36:34 -0000       1.10
+++ lib/string_.h       5 Feb 2007 01:52:11 -0000
@@ -115,20 +115,17 @@
 /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
    greater than zero if S1 is lexicographically less than, equal to or greater
    than S2.
-   Note: This function may, in multibyte locales, return 0 for strings of
-   different lengths!
-   No known system has a strcasecmp() function that works correctly in
-   multibyte locales.  Therefore use our version always, if the
-   strcase module is available.  */
-#if @GNULIB_STRCASE@
-# if @REPLACE_STRCASECMP@
-#  define strcasecmp rpl_strcasecmp
-extern int strcasecmp (char const *__s1, char const *__s2);
-# endif
-#elif defined GNULIB_POSIXCHECK
+   Note: This function does not work in multibyte locales.  */
+#if ! @HAVE_STRCASECMP@
+extern int strcasecmp (char const *s1, char const *s2);
+#endif
+#if defined GNULIB_POSIXCHECK
+/* strcasecmp() does not work with multibyte strings:
+   POSIX says that it operates on "strings", and "string" in POSIX is defined
+   as a sequence of bytes, not of characters.   */
 # undef strcasecmp
 # define strcasecmp(a,b) \
-    (GL_LINK_WARNING ("strcasecmp is often incorrectly implemented for 
multibyte locales - use gnulib module 'strcase' for correct and portable 
internationalization"), \
+    (GL_LINK_WARNING ("strcasecmp cannot work correctly on character strings 
in multibyte locales - use mbscasecmp if you care about internationalization, 
or use c_strcasecmp (from gnulib module c-strcase) if you want a locale 
independent function"), \
      strcasecmp (a, b))
 #endif
 
@@ -337,6 +334,16 @@
 extern char * mbsstr (const char *haystack, const char *needle);
 #endif
 
+#if @GNULIB_MBSCASECMP@
+/* Compare the character strings S1 and S2, ignoring case, returning less than,
+   equal to or greater than zero if S1 is lexicographically less than, equal to
+   or greater than S2.
+   Note: This function may, in multibyte locales, return 0 for strings of
+   different lengths!
+   Unlike strcasecmp(), this function works correctly in multibyte locales.  */
+extern int mbscasecmp (const char *s1, const char *s2);
+#endif
+
 
 #ifdef __cplusplus
 }
--- m4/string_h.m4      5 Feb 2007 01:36:34 -0000       1.8
+++ m4/string_h.m4      5 Feb 2007 01:52:11 -0000
@@ -71,4 +71,5 @@
   GNULIB_MBSCHR=0;      AC_SUBST([GNULIB_MBSCHR])
   GNULIB_MBSRCHR=0;     AC_SUBST([GNULIB_MBSRCHR])
   GNULIB_MBSSTR=0;      AC_SUBST([GNULIB_MBSSTR])
+  GNULIB_MBSCASECMP=0;  AC_SUBST([GNULIB_MBSCASECMP])
 ])
--- modules/string      5 Feb 2007 01:36:34 -0000       1.7
+++ modules/string      5 Feb 2007 01:52:11 -0000
@@ -24,6 +24,7 @@
              -e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \
              -e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \
              -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \
+             -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \
              -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
              -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
              -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \





reply via email to

[Prev in Thread] Current Thread [Next in Thread]