bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#3745: 23.0.95; emacs-23.0.95: unibyte-display-via-language-environme


From: Kenichi Handa
Subject: bug#3745: 23.0.95; emacs-23.0.95: unibyte-display-via-language-environment
Date: Mon, 06 Jul 2009 15:50:58 +0900

In article <tl74otqk501.fsf@m17n.org>, Kenichi Handa <handa@m17n.org> writes:

> But, using unibyte_char_to_multibyte here is a clear bug.
> If the overhead by DECODE_CHAR is untolerable (I don't
> believe it), we can do this:

> (1) modify unibyte_char_to_multibyte to use BYTE8_TO_CHAR
>     instead of the table unibyte_to_multibyte_table.
> (2) Setup unibyte_to_multibyte_table for unibyte_charset.
> (3) Just lookup that table in x_produce_glyphs.

To minimize the changes, I made the attached patch.  It
doesn't touch unibyte_to_multibyte_table, but introduced
charset_unibyte_decoder[128].  I confirmed it didn't make
the display code slow.

---
Kenichi Handa
handa@m17n.org

Index: character.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/character.c,v
retrieving revision 1.24
diff -u -r1.24 character.c
--- character.c 5 Feb 2009 08:46:52 -0000       1.24
+++ character.c 6 Jul 2009 06:42:31 -0000
@@ -90,9 +90,9 @@
 /* Mapping table from unibyte chars to multibyte chars.  */
 int unibyte_to_multibyte_table[256];
 
-/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
-   char.  */
-char unibyte_has_multibyte_table[256];
+/* Decoding table for 8-bit byte codes of the charset charset_unibyte.
+   Nth element is for the code (N-0x80).  */
+int charset_unibyte_decoder[128];
 
 
 
@@ -270,9 +270,8 @@
   return c;
 }
 
-/* Convert the multibyte character C to unibyte 8-bit character based
-   on the current value of charset_unibyte.  If dimension of
-   charset_unibyte is more than one, return (C & 0xFF).
+/* Convert ASCII or 8-bit character C to unibyte.  If C is none of
+   them, return (C & 0xFF).
 
    The argument REV_TBL is now ignored.  It will be removed in the
    future.  */
@@ -282,14 +281,11 @@
      int c;
      Lisp_Object rev_tbl;
 {
-  struct charset *charset;
-  unsigned c1;
-
+  if (c < 0x80)
+    return c;
   if (CHAR_BYTE8_P (c))
     return CHAR_TO_BYTE8 (c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c1 = ENCODE_CHAR (charset, c);
-  return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
+  return (c & 0xFF);
 }
 
 /* Like multibyte_char_to_unibyte, but return -1 if C is not supported
@@ -302,11 +298,11 @@
   struct charset *charset;
   unsigned c1;
 
+  if (c < 0x80)
+    return c;
   if (CHAR_BYTE8_P (c))
     return CHAR_TO_BYTE8 (c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c1 = ENCODE_CHAR (charset, c);
-  return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1);
+  return -1;
 }
 
 DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
@@ -337,10 +333,8 @@
   c = XFASTINT (ch);
   if (c >= 0400)
     error ("Invalid unibyte character: %d", c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c = DECODE_CHAR (charset, c);
-  if (c < 0)
-    c = BYTE8_TO_CHAR (XFASTINT (ch));
+  if (c >= 0x80)
+    c = BYTE8_TO_CHAR (c);
   return make_number (c);
 }
 
Index: character.h
===================================================================
RCS file: /cvsroot/emacs/emacs/src/character.h,v
retrieving revision 1.15
diff -u -r1.15 character.h
--- character.h 8 Jan 2009 03:15:27 -0000       1.15
+++ character.h 6 Jul 2009 06:42:31 -0000
@@ -87,11 +87,15 @@
 #define unibyte_char_to_multibyte(c)   \
   ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))
 
-/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
-   char.  */
-extern char unibyte_has_multibyte_table[256];
-
-#define UNIBYTE_CHAR_HAS_MULTIBYTE_P(c) (unibyte_has_multibyte_table[(c)])
+/* Decoding table for 8-bit byte codes of the charset charset_unibyte.
+   Nth element is for the code (N-0x80).  */
+extern int charset_unibyte_decoder[128];
+
+/* Return a character correspoinding to the code BYTE of
+   charset_unibyte.  BYTE must be a byte; i.e. less than 0x100.  If
+   BYTE is not a valid code of charset_unibyte, return -1.  */
+#define DECODE_UNIBYTE(BYTE)   \
+  ((BYTE) < 0x80 ? (int) (BYTE) : charset_unibyte_decoder[(BYTE) - 0x80])
 
 /* If C is not ASCII, make it unibyte. */
 #define MAKE_CHAR_UNIBYTE(c)   \
Index: charset.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/charset.c,v
retrieving revision 1.179
diff -u -r1.179 charset.c
--- charset.c   9 Jun 2009 02:53:07 -0000       1.179
+++ charset.c   6 Jul 2009 06:42:32 -0000
@@ -2260,6 +2260,7 @@
   Vcharset_ordered_list = Fnconc (2, arglist);
   charset_ordered_list_tick++;
 
+  charset_unibyte = -1;
   for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
        CONSP (old_list); old_list = XCDR (old_list))
     {
@@ -2267,9 +2268,25 @@
        list_2022 = Fcons (XCAR (old_list), list_2022);
       if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
        list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
+      if (charset_unibyte < 0)
+       {
+         struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
+
+         if (CHARSET_DIMENSION (charset) == 1
+             && CHARSET_ASCII_COMPATIBLE_P (charset)
+             && CHARSET_MAX_CHAR (charset) >= 0x80)
+           charset_unibyte = CHARSET_ID (charset);
+       }
     }
   Viso_2022_charset_list = Fnreverse (list_2022);
   Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
+  if (charset_unibyte < 0)
+    charset_unibyte = charset_iso_8859_1;
+  {
+    struct charset *charset = CHARSET_FROM_ID (charset_unibyte);
+    for (i = 128; i < 256; i++)
+      charset_unibyte_decoder[i - 128] = DECODE_CHAR (charset, i);
+  }
 
   return Qnil;
 }
@@ -2328,6 +2345,10 @@
     unibyte_to_multibyte_table[i] = i;
   for (; i < 256; i++)
     unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
+  for (i = 0; i < 32; i++)
+    charset_unibyte_decoder[i] = -1;
+  for (; i < 128; i++)
+    charset_unibyte_decoder[i] = 128 + i;
 }
 
 #ifdef emacs
@@ -2429,6 +2450,7 @@
     = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
                               128, 255, -1, 0, -1, 0, 1,
                               MAX_5_BYTE_CHAR + 1);
+  charset_unibyte = charset_iso_8859_1;
 }
 
 #endif /* emacs */
Index: xdisp.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/xdisp.c,v
retrieving revision 1.1288
diff -u -r1.1288 xdisp.c
--- xdisp.c     18 Jun 2009 09:49:07 -0000      1.1288
+++ xdisp.c     6 Jul 2009 06:42:34 -0000
@@ -5743,7 +5743,7 @@
                                  || it->c == 0xAD /* SOFT HYPHEN */)))
                       : (it->c >= 127
                          && (! unibyte_display_via_language_environment
-                             || (UNIBYTE_CHAR_HAS_MULTIBYTE_P (it->c)))))))
+                             || (DECODE_UNIBYTE (it->c) <= 0xA0))))))
            {
              /* IT->c is a control character which must be displayed
                 either as '\003' or as `^C' where the '\\' and '^'
@@ -21196,9 +21196,8 @@
        {
          if (SINGLE_BYTE_CHAR_P (it->c)
              && unibyte_display_via_language_environment)
-           it->char_to_display = unibyte_char_to_multibyte (it->c);
-         if (! SINGLE_BYTE_CHAR_P (it->char_to_display))
            {
+             it->char_to_display = DECODE_UNIBYTE (it->c);
              it->multibyte_p = 1;
              it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display,
                                           -1, Qnil);





reply via email to

[Prev in Thread] Current Thread [Next in Thread]