emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Editing the 0x80..0x90 characters


From: Stefan Monnier
Subject: Re: Editing the 0x80..0x90 characters
Date: Wed, 08 May 2002 20:15:04 -0400

> > I'm more interested in the fundamental idea of using
> > the mule-unicode charset instead of the eight-bit-(graphic|control)
> > charset to encode the non-iso-8859-5 characters.
> 
> IMHO, there's nothing wrong with that idea.  Of course, users who use such
> code will have to make sure their preferences are set up correctly because
> saving the resulting buffer with anything but the same cpNNN encoding will
> be, well, tricky (due to mixed character sets).

I don't understand what you mean.  Currently those coding systems
decode into (or encode from) ascii + latin-iso8859-5 + eight-bit-control
+ eight-bit-graphic.  The idea is to change it to decode into
ascii + latin-iso8859-5 + mule-unicode.  So I don't see how the problem
is mixed character sets is changed.

I don't understand anything about cpXXX charsets, so I'm not sure
how to fix the problems you mentioned earlier.
What do you think of the patch below ?
I'm not sure what the koi8-u stuff is about.  I suspect it's also
meant for ukrainian so the new language environment should maybe
be "Ukrainian" rather than "Cyrillic-KOI8-U" since that's what
mule-cmds.el seems to expect for the "uk" locale.


        Stefan


Index: cyrillic.el
===================================================================
RCS file: /cvsroot/emacs/emacs/lisp/language/cyrillic.el,v
retrieving revision 1.30
diff -u -u -b -r1.30 cyrillic.el
--- cyrillic.el 18 Dec 2001 17:50:12 -0000      1.30
+++ cyrillic.el 9 May 2002 00:13:53 -0000
@@ -25,8 +25,10 @@
 ;;; Commentary:
 
 ;; The character set ISO8859-5 is supported.  See
-;; http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM.  KOI-8 and
+;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>.  KOI-8 and
 ;; ALTERNATIVNYJ are converted to ISO8859-5 internally.
+;; For more info on Cyrillic charsets, see
+;; <URL:http://czyborra.com/charsets/cyrillic.html>.  
 
 ;;; Code:
 
@@ -56,8 +58,11 @@
                  (documentation . "Support for Cyrillic ISO-8859-5."))
  '("Cyrillic"))
 
-;; KOI-8 staff
+;; KOI-8 stuff
 
+;; The mule-unicode portion of this is from
+;; http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT,
+;; which references RFC 1489.
 (defvar cyrillic-koi8-r-decode-table
   [
    0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
@@ -68,10 +73,10 @@
    80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
    96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
    112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
-   128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
-   144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
-   160 161 162 ?164 165 166 167 168 169 170 171 172 173 174 175  176 177 178 
?180 181 182 183 184 185 186 187 188 189 190 191  ? ? ¢ ? ¬ ? ° ? ´ ? ¸ ? ¼ ? Ä 
? Ì ? Ô ? Ü ?¡À ?¡Ä ?¡È ?¡Ì ?¡Ð  ? ?¡Ò ?¡Ó ? ? ? ?øº ?øè ?ù¤ ?ù¥ ? ?² ?· ?÷
+   ? ? ñ ? ò ? ? ô ? õ ? ö ? ÷ ? ø ? ù ? ú ? û ? ü ? ý ? þ  ? ?¡  ?¡¡ ? ?¡£ 
?¡¤ ?¡¥ ?¡¦ ?¡§ ?¡¨ ?¡© ?¡ª ?¡« ?¡¬ ?   ??Ð  ?Ñ  ?æ  ?Ô  ?Õ  ?ä  ?Ó  ?å  ?Ø  ?Ù 
 ?Ú  ?Û  ?Ü  ?Ý  ?Þ   ??ï  ?à  ?á  ?â  ?ã  ?Ö  ?Ò  ?ì  ?ë  ?×  ?è  ?í  ?é  ?ç  
?ê   ??°  ?±  ?Æ  ?´  ?µ  ?Ä  ?³  ?Å  ?¸  ?¹  ?º  ?»  ?¼  ?½  ?¾  -94,16 +99,15 
@@
        ((translate-character cyrillic-koi8-r-nonascii-translation-table r0 r1)
         (write-multibyte-character r0 r1)
         (repeat))))))
-  "CCL program to decode KOI8.")
+  "CCL program to decode KOI8-R.")
 
 (define-ccl-program ccl-encode-koi8
   `(1
     ((loop
       (read-multibyte-character r0 r1)
-      (if (r0 == ,(charset-id 'cyrillic-iso8859-5))
-         (translate-character cyrillic-koi8-r-encode-table r0 r1))
+      (translate-character cyrillic-koi8-r-encode-table r0 r1)
       (write-repeat r1))))
-  "CCL program to encode KOI8.")
+  "CCL program to encode KOI8-R.")
             
 (make-coding-system
  'cyrillic-koi8 4
@@ -127,6 +131,7 @@
 
 (define-coding-system-alias 'koi8-r 'cyrillic-koi8)
 (define-coding-system-alias 'koi8 'cyrillic-koi8)
+;; (define-coding-system-alias 'cp878 'cyrillic-koi8)
 
 (define-ccl-program ccl-encode-koi8-font
   `(0
@@ -150,6 +155,90 @@
                   (documentation . "Support for Cyrillic KOI8-R."))
  '("Cyrillic"))
 
+
+(defvar cyrillic-koi8-u-decode-table
+  [
+   0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+   16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+   32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
+   48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
+   64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
+   80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
+   96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
+   112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
+   ? ? ¢ ? ¬ ? ° ? ´ ? ¸ ? ¼ ? Ä ? Ì ? Ô ? Ü ?¡À ?¡Ä ?¡È ?¡Ì ?¡Ð  ? ?¡Ò ?¡Ó ? 
? ? ?øº ?øè ?ù¤ ?ù¥ ? ?² ?· ?÷
+   ? ? ñ ? ò ?ô ? ?÷ ? ? ø ? ù ? ú ? û ? ? ? þ  ? ?¡  ?¡¡ ?¤ ? ?§ ? ?¡§ ?¡¨ 
?¡© ?¡ª ? ? ?   ??Ð  ?Ñ  ?æ  ?Ô  ?Õ  ?ä  ?Ó  ?å  ?Ø  ?Ù  ?Ú  ?Û  ?Ü  ?Ý  ?Þ   
??ï  ?à  ?á  ?â  ?ã  ?Ö  ?Ò  ?ì  ?ë  ?×  ?è  ?í  ?é  ?ç  ?ê   ??°  ?±  ?Æ  ?´  
?µ  ?Ä  ?³  ?Å  ?¸  ?¹  ?º  ?»  ?¼  ?½  ?¾   ??Ï  ?À  ?Á  ?  ?à  ?¶  ?²  ?Ì  
?Ë  ?·  ?È  ?Í  ?É  ?Ç  ?Ê ] "Cyrillic KOI8-U decoding table.")
+
+(let ((table (make-translation-table-from-vector
+             cyrillic-koi8-u-decode-table)))
+  (define-translation-table 'cyrillic-koi8-u-nonascii-translation-table table)
+  (define-translation-table 'cyrillic-koi8-u-encode-table
+    (char-table-extra-slot table 0)))
+
+(define-ccl-program ccl-decode-koi8-u
+  `(3
+    ((loop
+      (r0 = 0)
+      (read r1)
+      (if (r1 < 128)
+         (write-repeat r1)
+       ((translate-character cyrillic-koi8-u-nonascii-translation-table r0 r1)
+        (write-multibyte-character r0 r1)
+        (repeat))))))
+  "CCL program to decode KOI8-U.")
+
+(define-ccl-program ccl-encode-koi8-u
+  `(1
+    ((loop
+      (read-multibyte-character r0 r1)
+      (translate-character cyrillic-koi8-u-encode-table r0 r1)
+      (write-repeat r1))))
+  "CCL program to encode KOI8-U.")
+
+(make-coding-system
+ 'koi8-u 4
+ ?U "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-U)"
+ '(ccl-decode-koi8-u . ccl-encode-koi8-u)
+ `((safe-chars . ,(let ((table (make-char-table 'safe-chars))
+                       (i 0))
+                   (while (< i 256)
+                     (aset table (aref cyrillic-koi8-u-decode-table i) t)
+                     (setq i (1+ i)))
+                   table))
+   (mime-charset . koi8-u)
+   (valid-codes (0 . 127) 163 179 (192 . 255))
+   (charset-origin-alist (cyrillic-iso8859-5 "KOI8-U"
+                                            cyrillic-encode-koi8-u-char))))
+
+(define-ccl-program ccl-encode-koi8-u-font
+  `(0
+    ((translate-character cyrillic-koi8-u-encode-table r0 r1)))
+  "CCL program to encode Cyrillic chars to KOI8-U font.")
+
+(setq font-ccl-encoder-alist
+      (cons '("koi8-u" . ccl-encode-koi8-u-font) font-ccl-encoder-alist))
+
+(set-language-info-alist
+ "Cyrillic-KOI8-U" `((charset cyrillic-iso8859-5)
+                  (nonascii-translation
+                   . ,(get 'cyrillic-koi8-u-nonascii-translation-table
+                           'translation-table))
+                  (coding-system cyrillic-koi8-u)
+                  (coding-priority cyrillic-koi8-u)
+                  (input-method . "cyrillic-jcuken")
+                  (features cyril-util)
+                  (unibyte-display . cyrillic-koi8-u)
+                  (sample-text . "Russian (áÚØÙ)L·ÔàÐÒáâÒãÙâÕ!")          
(documentation . "Support for Cyrillic KOI8-U."))
+ '("Cyrillic"))
+
 ;;; ALTERNATIVNYJ staff
 
 (defvar cyrillic-alternativnyj-decode-table
@@ -165,11 +254,11 @@
    ??±  ?²  ?³  ?´  ?µ  ?¶  ?·  ?¸  ?¹  ?º  ?»  ?¼  ?½  ?¾  ?¿  ??Á  ?  ?à  
?Ä  ?Å  ?Æ  ?Ç  ?È  ?É  ?Ê  ?Ë  ?Ì  ?Í  ?Î  ?Ï  ??Ñ  ?Ò  ?Ó  ?Ô  ?Õ  ?Ö  ?×  ?Ø 
 ?Ù  ?Ú  ?Û  ?Ü  ?Ý  ?Þ  ?ß  176 177 178 179 180 181 182 183 184 185 186 187 
188 189 190 191
-   192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
-   208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
+   ?  ?¡Ò  ?¡Ó  ? ¢  ? Ä  ?¡¡  ?¡¢  ? ö  ? õ  ?¡£  ? ñ  ? ÷  ? ý  ? ü  ? û  ? 
°  ?  ? Ô  ? Ì  ? ¼  ?    ? Ü  ? þ  ? ÿ  ? ú  ? ô  ?¡©  ?¡¦  ?¡   ? ð  ?¡¬  ?¡§ 
 ?  ?¡¤  ?¡¥  ? ù  ? ø  ? ò  ? ó  ?¡«  ?¡ª  ? ¸  ? ¬  ?¡È  ?¡Ä  ?¡Ì  ?¡Ð  ?¡À  
??á  ?â  ?ã  ?ä  ?å  ?æ  ?ç  ?è  ?é  ?ê  ?ë  ?ì  ?í  ?î  ?ï  ??ñ  242 243 244 
245 246 247 248 249 250 251 252 253 254 ?ð]  ??ñ  ?  ?¨ô  ?¨§  ?¨÷  ?¨®  ?¨þ  
??  ??  ???  ?A]
   "Cyrillic ALTERNATIVNYJ decoding table.")
 
 (let ((table (make-translation-table-from-vector
@@ -213,11 +302,13 @@
                      (setq i (1+ i)))
                    table))
    (valid-codes (0 . 175) (224 . 241) 255)
+   ;; (mime-charset . cp866)
    (charset-origin-alist (cyrillic-iso8859-5 "ALTERNATIVNYJ"
                                             cyrillic-encode-koi8-r-char))))
 
 
 (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj)
+;; (define-coding-system-alias 'cp866 'cyrillic-alternativnyj)
 
 (define-ccl-program ccl-encode-alternativnyj-font
   '(0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]