bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Add !HAVE_MBRTOWC fallbacks for mbchar, mbiter and mbuiter


From: Miloslav Trmac
Subject: [PATCH] Add !HAVE_MBRTOWC fallbacks for mbchar, mbiter and mbuiter
Date: Wed, 14 Feb 2007 18:13:00 +0100
User-agent: Thunderbird 1.5.0.9 (X11/20070212)

Hello,
I'm adding multibyte character support to the info viewer using the
mbiter and mbuiter modules.  Those modules currently depend on mbrtowc
() and friends.

The mbs* functions in gnulib currently have two separate implementations
of most algorithms, one for HAVE_MBRTOWC using mbchar, and one for
!HAVE_MBRTOWC.  To avoid duplicating the large amount of text-processing
code in the info viewer in a similar way, the attached patches add a
!HAVE_MBRTOWC implementation of the mbiter, mbuiter and mbchar interfaces.

        * lib/mbchar.h: #include <stddef.h> for ptrdiff_t.

        * lib/mbchar.h
        * lib/mbiter.h
        * lib/mbuiter.h: Add an implementation for !HAVE_MBRTOWC.
        * m4/mbchar.m4: Require gl_FUNC_MBRTOWC.
        * m4/mbiter.m4: Remove an obsolete comment.
        * modules/mbchar: Include m4/mbrtowc.m4.
        * modules/mbiter
        * modules/mbuiter: Don't suggest #if HAVE_MBRTOWC around header
        #includes.

Thanks,
        Mirek
Index: lib/mbchar.h
===================================================================
RCS file: /sources/gnulib/gnulib/lib/mbchar.h,v
retrieving revision 1.10
diff -u -r1.10 mbchar.h
--- lib/mbchar.h        27 Dec 2006 19:54:25 -0000      1.10
+++ lib/mbchar.h        14 Feb 2007 16:59:55 -0000
@@ -146,8 +146,15 @@
 #define _MBCHAR_H 1
 
 #include <stdbool.h>
+#include <stddef.h>
 #include <string.h>
 
+/* Multibyte characters could in principle be handled without mbrtowc (), but
+   all current users of mbchar (mbfile, mbiter and mbuiter) need mbrtowc (),
+   so their !HAVE_MBRTOWC fallbacks need a non-multibyte mbchar
+   implementation. */
+#if HAVE_MBRTOWC
+
 /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
    <wchar.h>.
    BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
@@ -274,6 +281,110 @@
     new_mbc->wc = old_mbc->wc;
 }
 
+#else /* not HAVE_MBRTOWC */
+
+#include <ctype.h>
+
+struct mbchar
+{
+  const char *ptr;     /* pointer to current character */
+  bool c_valid;                /* true if c is a valid character */
+  unsigned char c;     /* if c_valid: the current character */
+  char buf[1];         /* room for the character, used for file input only */
+};
+
+/* EOF (not a real character) is represented with c_valid = false.  */
+
+typedef struct mbchar mbchar_t;
+
+/* Access the current character.  */
+#define mb_ptr(mbc) ((mbc).ptr)
+#define mb_len(mbc) ((mbc).c_valid ? 1 : 0)
+
+/* Comparison of characters.  */
+#define mb_iseq(mbc, sc) ((mbc).c_valid && (mbc).c == (sc))
+#define mb_isnul(mbc) ((mbc).c_valid && (mbc).c == 0)
+#define mb_cmp(mbc1, mbc2) \
+  ((mbc1).c_valid                                                      \
+   ? ((mbc2).c_valid                                                   \
+      ? (int) (mbc1).c - (int) (mbc2).c                                        
\
+      : -1)                                                            \
+   : ((mbc2).c_valid                                                   \
+      ? 1                                                              \
+      : 0))
+#define mb_casecmp(mbc1, mbc2) \
+  ((mbc1).c_valid                                                      \
+   ? ((mbc2).c_valid                                                   \
+      ? ((int) (unsigned char) tolower ((mbc1).c)                      \
+        - (int) (unsigned char) tolower ((mbc2).c))                    \
+      : -1)                                                            \
+   : ((mbc2).c_valid                                                   \
+      ? 1                                                              \
+      : 0))
+#define mb_equal(mbc1, mbc2) \
+  ((mbc1).c_valid && (mbc2).c_valid                                    \
+   ? (mbc1).c == (mbc2).c                                              \
+   : !(mbc1).c_valid && !(mbc2).c_valid)
+#define mb_caseequal(mbc1, mbc2) \
+  ((mbc1).c_valid && (mbc2).c_valid                                    \
+   ? tolower ((mbc1).c) == tolower ((mbc2).c)                          \
+   : !(mbc1).c_valid && !(mbc2).c_valid)
+
+/* <ctype.h>, <wctype.h> classification.  */
+#define mb_isascii(mbc) ((mbc).c_valid && (mbc).c <= 127)
+#define mb_isalnum(mbc) ((mbc).c_valid && isalnum ((mbc).c))
+#define mb_isalpha(mbc) ((mbc).c_valid && isalpha ((mbc).c))
+#define mb_isblank(mbc) ((mbc).c_valid && isblank ((mbc).c))
+#define mb_iscntrl(mbc) ((mbc).c_valid && iscntrl ((mbc).c))
+#define mb_isdigit(mbc) ((mbc).c_valid && isdigit ((mbc).c))
+#define mb_isgraph(mbc) ((mbc).c_valid && isgraph ((mbc).c))
+#define mb_islower(mbc) ((mbc).c_valid && islower ((mbc).c))
+#define mb_isprint(mbc) ((mbc).c_valid && isprint ((mbc).c))
+#define mb_ispunct(mbc) ((mbc).c_valid && ispunct ((mbc).c))
+#define mb_isspace(mbc) ((mbc).c_valid && isspace ((mbc).c))
+#define mb_isupper(mbc) ((mbc).c_valid && isupper ((mbc).c))
+#define mb_isxdigit(mbc) ((mbc).c_valid && isxdigit ((mbc).c))
+
+/* Extra <wchar.h> function.  */
+
+/* Unprintable characters appear as a small box of width 1.  */
+#define MB_UNPRINTABLE_WIDTH 1
+
+static inline int
+mb_width_aux (int c)
+{
+  /* For unprintable characters, arbitrarily return 0 for control characters
+     and MB_UNPRINTABLE_WIDTH otherwise.  */
+  return isprint (c) ? 1 : iscntrl (c) ? 0 : MB_UNPRINTABLE_WIDTH;
+}
+
+#define mb_width(mbc) \
+  ((mbc).c_valid ? mb_width_aux ((mbc).c) : MB_UNPRINTABLE_WIDTH)
+
+/* Output.  */
+#define mb_putc(mbc, stream)  fwrite ((mbc).ptr, 1, mb_len (mbc), (stream))
+
+/* Assignment.  */
+#define mb_setascii(mbc, sc) \
+  ((mbc)->ptr = (mbc)->buf, (mbc)->c_valid = 1, \
+   (mbc)->c = (mbc)->buf[0] = (sc))
+
+/* Copying a character.  */
+static inline void
+mb_copy (mbchar_t *new_mbc, const mbchar_t *old_mbc)
+{
+  if (old_mbc->ptr == &old_mbc->buf[0])
+    {
+      new_mbc->buf[0] = old_mbc->buf[0];
+      new_mbc->ptr = &new_mbc->buf[0];
+    }
+  else
+    new_mbc->ptr = old_mbc->ptr;
+  if ((new_mbc->c_valid = old_mbc->c_valid))
+    new_mbc->c = old_mbc->c;
+}
+
+#endif /* not HAVE_MBRTOWC */
 
 /* is_basic(c) tests whether the single-byte character c is in the
    ISO C "basic character set".
Index: lib/mbiter.h
===================================================================
RCS file: /sources/gnulib/gnulib/lib/mbiter.h,v
retrieving revision 1.3
diff -u -r1.3 mbiter.h
--- lib/mbiter.h        11 Feb 2007 17:17:09 -0000      1.3
+++ lib/mbiter.h        14 Feb 2007 16:59:55 -0000
@@ -87,6 +87,8 @@
 #include <stdbool.h>
 #include <string.h>
 
+#ifdef HAVE_MBRTOWC
+
 /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
    <wchar.h>.
    BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
@@ -173,13 +175,6 @@
 }
 
 static inline void
-mbiter_multi_reloc (struct mbiter_multi *iter, ptrdiff_t ptrdiff)
-{
-  iter->cur.ptr += ptrdiff;
-  iter->limit += ptrdiff;
-}
-
-static inline void
 mbiter_multi_copy (struct mbiter_multi *new_iter, const struct mbiter_multi 
*old_iter)
 {
   new_iter->limit = old_iter->limit;
@@ -202,6 +197,59 @@
 #define mbi_advance(iter) \
   ((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false)
 
+#else /* not HAVE_MBRTOWC */
+
+#include "mbchar.h"
+
+struct mbiter_multi
+{
+  const char *limit;   /* pointer to end of string */
+  bool next_done;      /* true if mbi_avail has already filled the following */
+  struct mbchar cur;   /* the current character:
+       const char *cur.ptr             pointer to current character
+       The following are only valid after mbi_avail.
+       bool cur.c_valid                true if c is a valid wide character
+       unsigned char cur.c             if c_valid: the current character
+       */
+};
+
+static inline void
+mbiter_multi_next (struct mbiter_multi *iter)
+{
+  if (iter->next_done)
+    return;
+  iter->cur.c = *iter->cur.ptr;
+  iter->cur.c_valid = true;
+  iter->next_done = true;
+}
+
+static inline void
+mbiter_multi_copy (struct mbiter_multi *new_iter, const struct mbiter_multi 
*old_iter)
+{
+  new_iter->limit = old_iter->limit;
+  new_iter->next_done = old_iter->next_done;
+  mb_copy (&new_iter->cur, &old_iter->cur);
+}
+
+/* Iteration macros.  */
+typedef struct mbiter_multi mbi_iterator_t;
+#define mbi_init(iter, startptr, length) \
+  ((iter).cur.ptr = (startptr), (iter).limit = (iter).cur.ptr + (length), \
+   (iter).next_done = false)
+#define mbi_avail(iter) \
+  ((iter).cur.ptr < (iter).limit && (mbiter_multi_next (&(iter)), true))
+#define mbi_advance(iter) \
+  ((iter).cur.ptr++, (iter).next_done = false)
+
+#endif /* not HAVE_MBRTOWC */
+
+static inline void
+mbiter_multi_reloc (struct mbiter_multi *iter, ptrdiff_t ptrdiff)
+{
+  iter->cur.ptr += ptrdiff;
+  iter->limit += ptrdiff;
+}
+
 /* Access to the current character.  */
 #define mbi_cur(iter) (iter).cur
 #define mbi_cur_ptr(iter) (iter).cur.ptr
Index: lib/mbuiter.h
===================================================================
RCS file: /sources/gnulib/gnulib/lib/mbuiter.h,v
retrieving revision 1.2
diff -u -r1.2 mbuiter.h
--- lib/mbuiter.h       11 Feb 2007 17:17:09 -0000      1.2
+++ lib/mbuiter.h       14 Feb 2007 16:59:55 -0000
@@ -95,6 +95,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#ifdef HAVE_MBRTOWC
+
 /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
    <wchar.h>.
    BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
@@ -182,12 +184,6 @@
 }
 
 static inline void
-mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff)
-{
-  iter->cur.ptr += ptrdiff;
-}
-
-static inline void
 mbuiter_multi_copy (struct mbuiter_multi *new_iter, const struct mbuiter_multi 
*old_iter)
 {
   if ((new_iter->in_shift = old_iter->in_shift))
@@ -209,6 +205,56 @@
 #define mbui_advance(iter) \
   ((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false)
 
+#else /* not HAVE_MBRTOWC */
+
+#include "mbchar.h"
+
+struct mbuiter_multi
+{
+  bool next_done;      /* true if mbui_avail has already filled the following 
*/
+  struct mbchar cur;   /* the current character:
+       const char *cur.ptr             pointer to current character
+       The following are only valid after mbui_avail.
+       bool cur.c_valid                true if wc is a valid wide character
+       unsigned char cur.c             if c_valid: the current character
+       */
+};
+
+static inline void
+mbuiter_multi_next (struct mbuiter_multi *iter)
+{
+  if (iter->next_done)
+    return;
+  iter->cur.c = *iter->cur.ptr;
+  iter->cur.c_valid = true;
+  iter->next_done = true;
+}
+
+static inline void
+mbuiter_multi_copy (struct mbuiter_multi *new_iter, const struct mbuiter_multi 
*old_iter)
+{
+  new_iter->next_done = old_iter->next_done;
+  mb_copy (&new_iter->cur, &old_iter->cur);
+}
+
+/* Iteration macros.  */
+typedef struct mbuiter_multi mbui_iterator_t;
+#define mbui_init(iter, startptr) \
+  ((iter).cur.ptr = (startptr), \
+   (iter).next_done = false)
+#define mbui_avail(iter) \
+  (mbuiter_multi_next (&(iter)), !mb_isnul ((iter).cur))
+#define mbui_advance(iter) \
+  ((iter).cur.ptr++, (iter).next_done = false)
+
+#endif /* not HAVE_MBRTOWC */
+
+static inline void
+mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff)
+{
+  iter->cur.ptr += ptrdiff;
+}
+
 /* Access to the current character.  */
 #define mbui_cur(iter) (iter).cur
 #define mbui_cur_ptr(iter) (iter).cur.ptr
Index: m4/mbchar.m4
===================================================================
RCS file: /sources/gnulib/gnulib/m4/mbchar.m4,v
retrieving revision 1.7
diff -u -r1.7 mbchar.m4
--- m4/mbchar.m4        28 Jan 2007 16:00:03 -0000      1.7
+++ m4/mbchar.m4        14 Feb 2007 16:59:55 -0000
@@ -10,4 +10,5 @@
 AC_DEFUN([gl_MBCHAR],
 [
   AC_REQUIRE([AC_GNU_SOURCE])
+  AC_REQUIRE([gl_FUNC_MBRTOWC])
 ])
Index: m4/mbiter.m4
===================================================================
RCS file: /sources/gnulib/gnulib/m4/mbiter.m4,v
retrieving revision 1.2
diff -u -r1.2 mbiter.m4
--- m4/mbiter.m4        26 Sep 2005 13:58:51 -0000      1.2
+++ m4/mbiter.m4        14 Feb 2007 16:59:55 -0000
@@ -10,8 +10,6 @@
 AC_DEFUN([gl_MBITER],
 [
   AC_REQUIRE([AC_TYPE_MBSTATE_T])
-  dnl The following line is that so the user can test HAVE_MBRTOWC before
-  dnl #include "mbiter.h" or "mbuiter.h".
   AC_REQUIRE([gl_FUNC_MBRTOWC])
   :
 ])
Index: modules/mbchar
===================================================================
RCS file: /sources/gnulib/gnulib/modules/mbchar,v
retrieving revision 1.10
diff -u -r1.10 mbchar
--- modules/mbchar      28 Jan 2007 16:00:02 -0000      1.10
+++ modules/mbchar      14 Feb 2007 16:59:55 -0000
@@ -5,6 +5,7 @@
 lib/mbchar.h
 lib/mbchar.c
 m4/mbchar.m4
+m4/mbrtowc.m4
 
 Depends-on:
 stdbool
Index: modules/mbiter
===================================================================
RCS file: /sources/gnulib/gnulib/modules/mbiter,v
retrieving revision 1.2
diff -u -r1.2 mbiter
--- modules/mbiter      26 Sep 2005 13:58:51 -0000      1.2
+++ modules/mbiter      14 Feb 2007 16:59:55 -0000
@@ -17,9 +17,7 @@
 lib_SOURCES += mbiter.h
 
 Include:
-#if HAVE_MBRTOWC
 #include "mbiter.h"
-#endif
 
 License:
 LGPL
Index: modules/mbuiter
===================================================================
RCS file: /sources/gnulib/gnulib/modules/mbuiter,v
retrieving revision 1.2
diff -u -r1.2 mbuiter
--- modules/mbuiter     26 Sep 2005 13:58:51 -0000      1.2
+++ modules/mbuiter     14 Feb 2007 16:59:55 -0000
@@ -18,9 +18,7 @@
 lib_SOURCES += mbuiter.h
 
 Include:
-#if HAVE_MBRTOWC
 #include "mbuiter.h"
-#endif
 
 License:
 LGPL

reply via email to

[Prev in Thread] Current Thread [Next in Thread]