Line data Source code
1 : ;;; mule-conf.el --- configure multilingual environment
2 :
3 : ;; Copyright (C) 1997-2017 Free Software Foundation, Inc.
4 : ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 : ;; National Institute of Advanced Industrial Science and Technology (AIST)
6 : ;; Registration Number H14PRO021
7 : ;; Copyright (C) 2003
8 : ;; National Institute of Advanced Industrial Science and Technology (AIST)
9 : ;; Registration Number H13PRO009
10 :
11 : ;; Keywords: i18n, mule, multilingual, character set, coding system
12 :
13 : ;; This file is part of GNU Emacs.
14 :
15 : ;; GNU Emacs is free software: you can redistribute it and/or modify
16 : ;; it under the terms of the GNU General Public License as published by
17 : ;; the Free Software Foundation, either version 3 of the License, or
18 : ;; (at your option) any later version.
19 :
20 : ;; GNU Emacs is distributed in the hope that it will be useful,
21 : ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 : ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 : ;; GNU General Public License for more details.
24 :
25 : ;; You should have received a copy of the GNU General Public License
26 : ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27 :
28 : ;;; Commentary:
29 :
30 : ;; This file defines the Emacs charsets and some basic coding systems.
31 : ;; Other coding systems are defined in the files in directory
32 : ;; lisp/language.
33 :
34 : ;;; Code:
35 :
36 : ;;; Remarks
37 :
38 : ;; The ISO-IR registry is maintained by the Information Processing
39 : ;; Society of Japan/Information Technology Standards Commission of
40 : ;; Japan (IPSJ/ITSCJ) at https://www.itscj.ipsj.or.jp/itscj_english/.
41 : ;; Standards docs equivalent to iso-2022 and iso-8859 are at
42 : ;; http://www.ecma.ch/.
43 :
44 : ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
45 : ;; MS Windows, which are presumably the only charsets we really need
46 : ;; to worry about on such systems:
47 : ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
48 : ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
49 : ;; 1258, 874, 932, 936, 949, 950
50 :
51 : ;;; Definitions of character sets.
52 :
53 : ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
54 : ;; in charset.c as below:
55 : ;;
56 : ;; (define-charset 'ascii
57 : ;; ""
58 : ;; :dimension 1
59 : ;; :code-space [0 127]
60 : ;; :iso-final-char ?B
61 : ;; :ascii-compatible-p t
62 : ;; :emacs-mule-id 0
63 : ;; :code-offset 0)
64 : ;;
65 : ;; (define-charset 'unicode
66 : ;; ""
67 : ;; :dimension 3
68 : ;; :code-space [0 255 0 255 0 16]
69 : ;; :ascii-compatible-p t
70 : ;; :code-offset 0)
71 : ;;
72 : ;; (define-charset 'emacs
73 : ;; ""
74 : ;; :dimension 3
75 : ;; :code-space [0 255 0 255 0 63]
76 : ;; :ascii-compatible-p t
77 : ;; :supplementary-p t
78 : ;; :code-offset 0)
79 : ;;
80 : ;; (define-charset 'eight-bit
81 : ;; ""
82 : ;; :dimension 1
83 : ;; :code-space [128 255]
84 : ;; :code-offset #x3FFF80)
85 : ;;
86 : ;; We now set :docstring, :short-name, and :long-name properties.
87 :
88 : (put-charset-property
89 : 'ascii :docstring "ASCII (ISO646 IRV)")
90 : (put-charset-property
91 : 'ascii :short-name "ASCII")
92 : (put-charset-property
93 : 'ascii :long-name "ASCII (ISO646 IRV)")
94 : (put-charset-property
95 : 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
96 : (put-charset-property
97 : 'iso-8859-1 :short-name "Latin-1")
98 : (put-charset-property
99 : 'iso-8859-1 :long-name "Latin-1")
100 : (put-charset-property
101 : 'unicode :docstring "Unicode (ISO10646)")
102 : (put-charset-property
103 : 'unicode :short-name "Unicode")
104 : (put-charset-property
105 : 'unicode :long-name "Unicode (ISO10646)")
106 : (put-charset-property
107 : 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
108 : (put-charset-property
109 : 'emacs :short-name "Emacs")
110 : (put-charset-property
111 : 'emacs :long-name "Emacs")
112 :
113 : (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
114 : (put-charset-property 'eight-bit :short-name "Raw bytes")
115 :
116 : (define-charset-alias 'ucs 'unicode)
117 :
118 : (define-charset 'latin-iso8859-1
119 : "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
120 : :short-name "RHP of Latin-1"
121 : :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
122 : :iso-final-char ?A
123 : :emacs-mule-id 129
124 : :code-space [32 127]
125 : :code-offset 160)
126 :
127 : ;; Name perhaps not ideal, but is XEmacs-compatible.
128 : (define-charset 'control-1
129 : "8-bit control code (0x80..0x9F)"
130 : :short-name "8-bit control code"
131 : :code-space [128 159]
132 : :code-offset 128)
133 :
134 : (define-charset 'eight-bit-control
135 : "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
136 : :short-name "Raw bytes 0x80..0x9F"
137 : :supplementary-p t
138 : :code-space [128 159]
139 : :code-offset #x3FFF80) ; see character.h
140 :
141 : (define-charset 'eight-bit-graphic
142 : "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
143 : :short-name "Raw bytes 0xA0..0xFF"
144 : :supplementary-p t
145 : :code-space [160 255]
146 : :code-offset #x3FFFA0) ; see character.h
147 :
148 : (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
149 : iso-ir iso-final
150 : emacs-mule-id map)
151 14 : `(progn
152 14 : (define-charset ,symbol
153 14 : ,name
154 14 : :short-name ,nickname
155 14 : :long-name ,name
156 : :ascii-compatible-p t
157 : :code-space [0 255]
158 14 : :map ,map)
159 14 : (if ,iso-symbol
160 14 : (define-charset ,iso-symbol
161 14 : (if ,iso-ir
162 : (format "Right-Hand Part of %s (%s): ISO-IR-%d"
163 14 : ,name ,nickname ,iso-ir)
164 14 : (format "Right-Hand Part of %s (%s)" ,name ,nickname))
165 14 : :short-name (format "RHP of %s" ,name)
166 14 : :long-name (format "RHP of %s (%s)" ,name ,nickname)
167 14 : :iso-final-char ,iso-final
168 14 : :emacs-mule-id ,emacs-mule-id
169 : :code-space [32 127]
170 14 : :subset (list ,symbol 160 255 -128)))))
171 :
172 : (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
173 : "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
174 :
175 : (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
176 : "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
177 :
178 : (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
179 : "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
180 :
181 : (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
182 : "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
183 :
184 : (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
185 : "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
186 :
187 : (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
188 : "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
189 :
190 : (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
191 : "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
192 :
193 : (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
194 : "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
195 :
196 : (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
197 : "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
198 :
199 : ;; http://www.nectec.or.th/it-standards/iso8859-11/
200 : ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
201 : ;; plus nbsp
202 : (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
203 : "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
204 :
205 : ;; 8859-12 doesn't (yet?) exist.
206 :
207 : (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
208 : "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
209 :
210 : (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
211 : "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
212 :
213 : (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
214 : "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
215 :
216 : (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
217 : "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
218 :
219 : ;; No point in keeping it around.
220 : (fmakunbound 'define-iso-single-byte-charset)
221 :
222 : ;; Can this be shared with 8859-11?
223 : ;; N.b. not all of these are defined in Unicode.
224 : (define-charset 'thai-tis620
225 : "TIS620.2533"
226 : :short-name "TIS620.2533"
227 : :iso-final-char ?T
228 : :emacs-mule-id 133
229 : :code-space [32 127]
230 : :code-offset #x0E00)
231 :
232 : ;; Fixme: doc for this, c.f. above
233 : (define-charset 'tis620-2533
234 : "TIS620.2533"
235 : :short-name "TIS620.2533"
236 : :ascii-compatible-p t
237 : :code-space [0 255]
238 : :superset '(ascii eight-bit-control (thai-tis620 . 128)))
239 :
240 : (define-charset 'jisx0201
241 : "JISX0201"
242 : :short-name "JISX0201"
243 : :code-space [0 #xDF]
244 : :map "JISX0201")
245 :
246 : (define-charset 'latin-jisx0201
247 : "Roman Part of JISX0201.1976"
248 : :short-name "JISX0201 Roman"
249 : :long-name "Japanese Roman (JISX0201.1976)"
250 : :iso-final-char ?J
251 : :emacs-mule-id 138
252 : :supplementary-p t
253 : :code-space [33 126]
254 : :subset '(jisx0201 33 126 0))
255 :
256 : (define-charset 'katakana-jisx0201
257 : "Katakana Part of JISX0201.1976"
258 : :short-name "JISX0201 Katakana"
259 : :long-name "Japanese Katakana (JISX0201.1976)"
260 : :iso-final-char ?I
261 : :emacs-mule-id 137
262 : :supplementary-p t
263 : :code-space [33 126]
264 : :subset '(jisx0201 161 254 -128))
265 :
266 : (define-charset 'chinese-gb2312
267 : "GB2312 Chinese simplified: ISO-IR-58"
268 : :short-name "GB2312"
269 : :long-name "GB2312: ISO-IR-58"
270 : :iso-final-char ?A
271 : :emacs-mule-id 145
272 : :code-space [33 126 33 126]
273 : :code-offset #x110000
274 : :unify-map "GB2312")
275 :
276 : (define-charset 'chinese-gbk
277 : "GBK Chinese simplified."
278 : :short-name "GBK"
279 : :code-space [#x40 #xFE #x81 #xFE]
280 : :code-offset #x160000
281 : :unify-map "GBK")
282 : (define-charset-alias 'cp936 'chinese-gbk)
283 : (define-charset-alias 'windows-936 'chinese-gbk)
284 :
285 : (define-charset 'chinese-cns11643-1
286 : "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
287 : :short-name "CNS11643-1"
288 : :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
289 : :iso-final-char ?G
290 : :emacs-mule-id 149
291 : :code-space [33 126 33 126]
292 : :code-offset #x114000
293 : :unify-map "CNS-1")
294 :
295 : (define-charset 'chinese-cns11643-2
296 : "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
297 : :short-name "CNS11643-2"
298 : :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
299 : :iso-final-char ?H
300 : :emacs-mule-id 150
301 : :code-space [33 126 33 126]
302 : :code-offset #x118000
303 : :unify-map "CNS-2")
304 :
305 : (define-charset 'chinese-cns11643-3
306 : "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
307 : :short-name "CNS11643-3"
308 : :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
309 : :iso-final-char ?I
310 : :code-space [33 126 33 126]
311 : :emacs-mule-id 246
312 : :code-offset #x11C000
313 : :unify-map "CNS-3")
314 :
315 : (define-charset 'chinese-cns11643-4
316 : "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
317 : :short-name "CNS11643-4"
318 : :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
319 : :iso-final-char ?J
320 : :emacs-mule-id 247
321 : :code-space [33 126 33 126]
322 : :code-offset #x120000
323 : :unify-map "CNS-4")
324 :
325 : (define-charset 'chinese-cns11643-5
326 : "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
327 : :short-name "CNS11643-5"
328 : :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
329 : :iso-final-char ?K
330 : :emacs-mule-id 248
331 : :code-space [33 126 33 126]
332 : :code-offset #x124000
333 : :unify-map "CNS-5")
334 :
335 : (define-charset 'chinese-cns11643-6
336 : "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
337 : :short-name "CNS11643-6"
338 : :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
339 : :iso-final-char ?L
340 : :emacs-mule-id 249
341 : :code-space [33 126 33 126]
342 : :code-offset #x128000
343 : :unify-map "CNS-6")
344 :
345 : (define-charset 'chinese-cns11643-7
346 : "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
347 : :short-name "CNS11643-7"
348 : :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
349 : :iso-final-char ?M
350 : :emacs-mule-id 250
351 : :code-space [33 126 33 126]
352 : :code-offset #x12C000
353 : :unify-map "CNS-7")
354 :
355 : (define-charset 'big5
356 : "Big5 (Chinese traditional)"
357 : :short-name "Big5"
358 : :code-space [#x40 #xFE #xA1 #xFE]
359 : :code-offset #x130000
360 : :unify-map "BIG5")
361 : ;; Fixme: AKA cp950 according to
362 : ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
363 : ;; that correct?
364 :
365 : (define-charset 'chinese-big5-1
366 : "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
367 : :short-name "Big5 (Level-1)"
368 : :long-name "Big5 (Level-1) A141-C67F"
369 : :iso-final-char ?0
370 : :emacs-mule-id 152
371 : :supplementary-p t
372 : :code-space [#x21 #x7E #x21 #x7E]
373 : :code-offset #x135000
374 : :unify-map "BIG5-1")
375 :
376 : (define-charset 'chinese-big5-2
377 : "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
378 : :short-name "Big5 (Level-2)"
379 : :long-name "Big5 (Level-2) C940-FEFE"
380 : :iso-final-char ?1
381 : :emacs-mule-id 153
382 : :supplementary-p t
383 : :code-space [#x21 #x7E #x21 #x7E]
384 : :code-offset #x137800
385 : :unify-map "BIG5-2")
386 :
387 : (define-charset 'japanese-jisx0208
388 : "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
389 : :short-name "JISX0208"
390 : :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
391 : :iso-final-char ?B
392 : :emacs-mule-id 146
393 : :code-space [33 126 33 126]
394 : :code-offset #x140000
395 : :unify-map "JISX0208")
396 :
397 : (define-charset 'japanese-jisx0208-1978
398 : "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
399 : :short-name "JISX0208.1978"
400 : :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
401 : :iso-final-char ?@
402 : :emacs-mule-id 144
403 : :code-space [33 126 33 126]
404 : :code-offset #x144000
405 : :unify-map "JISC6226")
406 :
407 : (define-charset 'japanese-jisx0212
408 : "JISX0212 Japanese supplement: ISO-IR-159"
409 : :short-name "JISX0212"
410 : :long-name "JISX0212 (Japanese): ISO-IR-159"
411 : :iso-final-char ?D
412 : :emacs-mule-id 148
413 : :code-space [33 126 33 126]
414 : :code-offset #x148000
415 : :unify-map "JISX0212")
416 :
417 : ;; Note that jisx0213 contains characters not in Unicode (3.2?). It's
418 : ;; arguable whether it should have a unify-map.
419 : (define-charset 'japanese-jisx0213-1
420 : "JISX0213.2000 Plane 1 (Japanese)"
421 : :short-name "JISX0213-1"
422 : :iso-final-char ?O
423 : :emacs-mule-id 151
424 : :unify-map "JISX2131"
425 : :code-space [33 126 33 126]
426 : :code-offset #x14C000)
427 :
428 : (define-charset 'japanese-jisx0213-2
429 : "JISX0213.2000 Plane 2 (Japanese)"
430 : :short-name "JISX0213-2"
431 : :iso-final-char ?P
432 : :emacs-mule-id 254
433 : :unify-map "JISX2132"
434 : :code-space [33 126 33 126]
435 : :code-offset #x150000)
436 :
437 : (define-charset 'japanese-jisx0213-a
438 : "JISX0213.2004 adds these characters to JISX0213.2000."
439 : :short-name "JISX0213A"
440 : :dimension 2
441 : :code-space [33 126 33 126]
442 : :supplementary-p t
443 : :map "JISX213A")
444 :
445 : (define-charset 'japanese-jisx0213.2004-1
446 : "JISX0213.2004 Plane1 (Japanese)"
447 : :short-name "JISX0213.2004-1"
448 : :dimension 2
449 : :code-space [33 126 33 126]
450 : :iso-final-char ?Q
451 : :superset '(japanese-jisx0213-a japanese-jisx0213-1))
452 :
453 : (define-charset 'katakana-sjis
454 : "Katakana part of Shift-JIS"
455 : :dimension 1
456 : :code-space [#xA1 #xDF]
457 : :subset '(jisx0201 #xA1 #xDF 0)
458 : :supplementary-p t)
459 :
460 : (define-charset 'cp932-2-byte
461 : "2-byte part of CP932"
462 : :dimension 2
463 : :map "CP932-2BYTE"
464 : :code-space [#x40 #xFC #x81 #xFC]
465 : :supplementary-p t)
466 :
467 : (define-charset 'cp932
468 : "CP932 (Microsoft shift-jis)"
469 : :code-space [#x00 #xFF #x00 #xFE]
470 : :short-name "CP932"
471 : :superset '(ascii katakana-sjis cp932-2-byte))
472 :
473 : (define-charset 'korean-ksc5601
474 : "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
475 : :short-name "KSC5601"
476 : :long-name "KSC5601 (Korean): ISO-IR-149"
477 : :iso-final-char ?C
478 : :emacs-mule-id 147
479 : :code-space [33 126 33 126]
480 : :code-offset #x279f94 ; ... #x27c217
481 : :unify-map "KSC5601")
482 :
483 : (define-charset 'big5-hkscs
484 : "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
485 : :short-name "Big5"
486 : :code-space [#x40 #xFE #xA1 #xFE]
487 : :code-offset #x27c218 ; ... #x280839
488 : :unify-map "BIG5-HKSCS")
489 :
490 : (define-charset 'cp949-2-byte
491 : "2-byte part of CP949"
492 : :dimension 2
493 : :map "CP949-2BYTE"
494 : :code-space [#x41 #xFE #x81 #xFD]
495 : :supplementary-p t)
496 :
497 : (define-charset 'cp949
498 : "CP949 (Korean)"
499 : :short-name "CP949"
500 : :long-name "CP949 (Korean)"
501 : :code-space [#x00 #xFE #x00 #xFD]
502 : :superset '(ascii cp949-2-byte))
503 :
504 : (define-charset 'chinese-sisheng
505 : "SiSheng characters for PinYin/ZhuYin"
506 : :short-name "SiSheng"
507 : :long-name "SiSheng (PinYin/ZhuYin)"
508 : :iso-final-char ?0
509 : :emacs-mule-id 160
510 : :code-space [33 126]
511 : :unify-map "MULE-sisheng"
512 : :supplementary-p t
513 : :code-offset #x200000)
514 :
515 : ;; A subset of the 1989 version of IPA. It consists of the consonant
516 : ;; signs used in English, French, German and Italian, and all vowels
517 : ;; signs in the table. [says old MULE doc]
518 : (define-charset 'ipa
519 : "IPA (International Phonetic Association)"
520 : :short-name "IPA"
521 : :iso-final-char ?0
522 : :emacs-mule-id 161
523 : :unify-map "MULE-ipa"
524 : :code-space [32 127]
525 : :supplementary-p t
526 : :code-offset #x200080)
527 :
528 : (define-charset 'viscii
529 : "VISCII1.1"
530 : :short-name "VISCII"
531 : :long-name "VISCII 1.1"
532 : :code-space [0 255]
533 : :map "VISCII")
534 :
535 : (define-charset 'vietnamese-viscii-lower
536 : "VISCII1.1 lower-case"
537 : :short-name "VISCII lower"
538 : :long-name "VISCII lower-case"
539 : :iso-final-char ?1
540 : :emacs-mule-id 162
541 : :code-space [32 127]
542 : :code-offset #x200200
543 : :supplementary-p t
544 : :unify-map "MULE-lviscii")
545 :
546 : (define-charset 'vietnamese-viscii-upper
547 : "VISCII1.1 upper-case"
548 : :short-name "VISCII upper"
549 : :long-name "VISCII upper-case"
550 : :iso-final-char ?2
551 : :emacs-mule-id 163
552 : :code-space [32 127]
553 : :code-offset #x200280
554 : :supplementary-p t
555 : :unify-map "MULE-uviscii")
556 :
557 : (define-charset 'vscii
558 : "VSCII1.1 (TCVN-5712 VN1)"
559 : :short-name "VSCII"
560 : :code-space [0 255]
561 : :map "VSCII")
562 :
563 : (define-charset-alias 'tcvn-5712 'vscii)
564 :
565 : ;; Fixme: see note in tcvn.map about combining characters
566 : (define-charset 'vscii-2
567 : "VSCII-2 (TCVN-5712 VN2)"
568 : :code-space [0 255]
569 : :map "VSCII-2")
570 :
571 : (define-charset 'koi8-r
572 : "KOI8-R"
573 : :short-name "KOI8-R"
574 : :ascii-compatible-p t
575 : :code-space [0 255]
576 : :map "KOI8-R")
577 :
578 : (define-charset-alias 'koi8 'koi8-r)
579 :
580 : (define-charset 'alternativnyj
581 : "ALTERNATIVNYJ"
582 : :short-name "alternativnyj"
583 : :ascii-compatible-p t
584 : :code-space [0 255]
585 : :map "ALTERNATIVNYJ")
586 :
587 : (define-charset 'cp866
588 : "CP866"
589 : :short-name "cp866"
590 : :ascii-compatible-p t
591 : :code-space [0 255]
592 : :map "IBM866")
593 : (define-charset-alias 'ibm866 'cp866)
594 :
595 : (define-charset 'koi8-u
596 : "KOI8-U"
597 : :short-name "KOI8-U"
598 : :ascii-compatible-p t
599 : :code-space [0 255]
600 : :map "KOI8-U")
601 :
602 : (define-charset 'koi8-t
603 : "KOI8-T"
604 : :short-name "KOI8-T"
605 : :ascii-compatible-p t
606 : :code-space [0 255]
607 : :map "KOI8-T")
608 :
609 : (define-charset 'georgian-ps
610 : "GEORGIAN-PS"
611 : :short-name "GEORGIAN-PS"
612 : :ascii-compatible-p t
613 : :code-space [0 255]
614 : :map "KA-PS")
615 :
616 : (define-charset 'georgian-academy
617 : "GEORGIAN-ACADEMY"
618 : :short-name "GEORGIAN-ACADEMY"
619 : :ascii-compatible-p t
620 : :code-space [0 255]
621 : :map "KA-ACADEMY")
622 :
623 : (define-charset 'windows-1250
624 : "WINDOWS-1250 (Central Europe)"
625 : :short-name "WINDOWS-1250"
626 : :ascii-compatible-p t
627 : :code-space [0 255]
628 : :map "CP1250")
629 : (define-charset-alias 'cp1250 'windows-1250)
630 :
631 : (define-charset 'windows-1251
632 : "WINDOWS-1251 (Cyrillic)"
633 : :short-name "WINDOWS-1251"
634 : :ascii-compatible-p t
635 : :code-space [0 255]
636 : :map "CP1251")
637 : (define-charset-alias 'cp1251 'windows-1251)
638 :
639 : (define-charset 'windows-1252
640 : "WINDOWS-1252 (Latin I)"
641 : :short-name "WINDOWS-1252"
642 : :ascii-compatible-p t
643 : :code-space [0 255]
644 : :map "CP1252")
645 : (define-charset-alias 'cp1252 'windows-1252)
646 :
647 : (define-charset 'windows-1253
648 : "WINDOWS-1253 (Greek)"
649 : :short-name "WINDOWS-1253"
650 : :ascii-compatible-p t
651 : :code-space [0 255]
652 : :map "CP1253")
653 : (define-charset-alias 'cp1253 'windows-1253)
654 :
655 : (define-charset 'windows-1254
656 : "WINDOWS-1254 (Turkish)"
657 : :short-name "WINDOWS-1254"
658 : :ascii-compatible-p t
659 : :code-space [0 255]
660 : :map "CP1254")
661 : (define-charset-alias 'cp1254 'windows-1254)
662 :
663 : (define-charset 'windows-1255
664 : "WINDOWS-1255 (Hebrew)"
665 : :short-name "WINDOWS-1255"
666 : :ascii-compatible-p t
667 : :code-space [0 255]
668 : :map "CP1255")
669 : (define-charset-alias 'cp1255 'windows-1255)
670 :
671 : (define-charset 'windows-1256
672 : "WINDOWS-1256 (Arabic)"
673 : :short-name "WINDOWS-1256"
674 : :ascii-compatible-p t
675 : :code-space [0 255]
676 : :map "CP1256")
677 : (define-charset-alias 'cp1256 'windows-1256)
678 :
679 : (define-charset 'windows-1257
680 : "WINDOWS-1257 (Baltic)"
681 : :short-name "WINDOWS-1257"
682 : :ascii-compatible-p t
683 : :code-space [0 255]
684 : :map "CP1257")
685 : (define-charset-alias 'cp1257 'windows-1257)
686 :
687 : (define-charset 'windows-1258
688 : "WINDOWS-1258 (Viet Nam)"
689 : :short-name "WINDOWS-1258"
690 : :ascii-compatible-p t
691 : :code-space [0 255]
692 : :map "CP1258")
693 : (define-charset-alias 'cp1258 'windows-1258)
694 :
695 : (define-charset 'next
696 : "NEXT"
697 : :short-name "NEXT"
698 : :ascii-compatible-p t
699 : :code-space [0 255]
700 : :map "NEXTSTEP")
701 :
702 : (define-charset 'cp1125
703 : "CP1125"
704 : :short-name "CP1125"
705 : :code-space [0 255]
706 : :ascii-compatible-p t
707 : :map "CP1125")
708 : (define-charset-alias 'ruscii 'cp1125)
709 : ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
710 : (define-charset-alias 'cp866u 'cp1125)
711 :
712 : ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
713 : ;; shows this as not ASCII compatible, with various graphics in
714 : ;; 0x01-0x1F.
715 : (define-charset 'cp437
716 : "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
717 : :short-name "CP437"
718 : :code-space [0 255]
719 : :ascii-compatible-p t
720 : :map "IBM437")
721 :
722 : (define-charset 'cp720
723 : "CP720 (Arabic)"
724 : :short-name "CP720"
725 : :code-space [0 255]
726 : :ascii-compatible-p t
727 : :map "CP720")
728 :
729 : (define-charset 'cp737
730 : "CP737 (PC Greek)"
731 : :short-name "CP737"
732 : :code-space [0 255]
733 : :ascii-compatible-p t
734 : :map "CP737")
735 :
736 : (define-charset 'cp775
737 : "CP775 (PC Baltic)"
738 : :short-name "CP775"
739 : :code-space [0 255]
740 : :ascii-compatible-p t
741 : :map "CP775")
742 :
743 : (define-charset 'cp851
744 : "CP851 (Greek)"
745 : :short-name "CP851"
746 : :code-space [0 255]
747 : :ascii-compatible-p t
748 : :map "IBM851")
749 :
750 : (define-charset 'cp852
751 : "CP852 (MS-DOS Latin-2)"
752 : :short-name "CP852"
753 : :code-space [0 255]
754 : :ascii-compatible-p t
755 : :map "IBM852")
756 :
757 : (define-charset 'cp855
758 : "CP855 (IBM Cyrillic)"
759 : :short-name "CP855"
760 : :code-space [0 255]
761 : :ascii-compatible-p t
762 : :map "IBM855")
763 :
764 : (define-charset 'cp857
765 : "CP857 (IBM Turkish)"
766 : :short-name "CP857"
767 : :code-space [0 255]
768 : :ascii-compatible-p t
769 : :map "IBM857")
770 :
771 : (define-charset 'cp858
772 : "CP858 (Multilingual Latin I + Euro)"
773 : :short-name "CP858"
774 : :code-space [0 255]
775 : :ascii-compatible-p t
776 : :map "CP858")
777 : (define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
778 :
779 : (define-charset 'cp860
780 : "CP860 (MS-DOS Portuguese)"
781 : :short-name "CP860"
782 : :code-space [0 255]
783 : :ascii-compatible-p t
784 : :map "IBM860")
785 :
786 : (define-charset 'cp861
787 : "CP861 (MS-DOS Icelandic)"
788 : :short-name "CP861"
789 : :code-space [0 255]
790 : :ascii-compatible-p t
791 : :map "IBM861")
792 :
793 : (define-charset 'cp862
794 : "CP862 (PC Hebrew)"
795 : :short-name "CP862"
796 : :code-space [0 255]
797 : :ascii-compatible-p t
798 : :map "IBM862")
799 :
800 : (define-charset 'cp863
801 : "CP863 (MS-DOS Canadian French)"
802 : :short-name "CP863"
803 : :code-space [0 255]
804 : :ascii-compatible-p t
805 : :map "IBM863")
806 :
807 : (define-charset 'cp864
808 : "CP864 (PC Arabic)"
809 : :short-name "CP864"
810 : :code-space [0 255]
811 : :ascii-compatible-p t
812 : :map "IBM864")
813 :
814 : (define-charset 'cp865
815 : "CP865 (MS-DOS Nordic)"
816 : :short-name "CP865"
817 : :code-space [0 255]
818 : :ascii-compatible-p t
819 : :map "IBM865")
820 :
821 : (define-charset 'cp869
822 : "CP869 (IBM Modern Greek)"
823 : :short-name "CP869"
824 : :code-space [0 255]
825 : :ascii-compatible-p t
826 : :map "IBM869")
827 :
828 : (define-charset 'cp874
829 : "CP874 (IBM Thai)"
830 : :short-name "CP874"
831 : :code-space [0 255]
832 : :ascii-compatible-p t
833 : :map "IBM874")
834 :
835 : ;; For Arabic, we need three different types of character sets.
836 : ;; Digits are of direction left-to-right and of width 1-column.
837 : ;; Others are of direction right-to-left and of width 1-column or
838 : ;; 2-column.
839 : (define-charset 'arabic-digit
840 : "Arabic digit"
841 : :short-name "Arabic digit"
842 : :iso-final-char ?2
843 : :emacs-mule-id 164
844 : :supplementary-p t
845 : :code-space [34 42]
846 : :code-offset #x0600)
847 :
848 : (define-charset 'arabic-1-column
849 : "Arabic 1-column"
850 : :short-name "Arabic 1-col"
851 : :long-name "Arabic 1-column"
852 : :iso-final-char ?3
853 : :emacs-mule-id 165
854 : :supplementary-p t
855 : :code-space [33 126]
856 : :code-offset #x200100)
857 :
858 : (define-charset 'arabic-2-column
859 : "Arabic 2-column"
860 : :short-name "Arabic 2-col"
861 : :long-name "Arabic 2-column"
862 : :iso-final-char ?4
863 : :emacs-mule-id 224
864 : :supplementary-p t
865 : :code-space [33 126]
866 : :code-offset #x200180)
867 :
868 : ;; Lao script.
869 : ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
870 : ;; Not all of them are defined in Unicode.
871 : (define-charset 'lao
872 : "Lao characters (ISO10646 0E81..0EDF)"
873 : :short-name "Lao"
874 : :iso-final-char ?1
875 : :emacs-mule-id 167
876 : :supplementary-p t
877 : :code-space [33 126]
878 : :code-offset #x0E81)
879 :
880 : (define-charset 'mule-lao
881 : "Lao characters (ISO10646 0E81..0EDF)"
882 : :short-name "Lao"
883 : :code-space [0 255]
884 : :supplementary-p t
885 : :superset '(ascii eight-bit-control (lao . 128)))
886 :
887 :
888 : ;; Indian scripts. Symbolic charset for data exchange. Glyphs are
889 : ;; not assigned. They are automatically converted to each Indian
890 : ;; script which IS-13194 supports.
891 :
892 : (define-charset 'indian-is13194
893 : "7-bit representation of IS 13194 (ISCII) for Devanagari"
894 : :short-name "IS 13194 (DEV)"
895 : :long-name "Indian IS 13194 (DEV)"
896 : :iso-final-char ?5
897 : :emacs-mule-id 225
898 : :supplementary-p t
899 : :code-space [33 126]
900 : :code-offset #x180000
901 : :unify-map "MULE-is13194")
902 :
903 : (let ((code-offset #x180100))
904 : (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
905 : oriya kannada malayalam gujarati punjabi))
906 : (define-charset (intern (format "%s-cdac" script))
907 : (format
908 : "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
909 : (capitalize (symbol-name script)))
910 : :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
911 : :supplementary-p t
912 : :code-space [0 255]
913 : :code-offset code-offset)
914 : (setq code-offset (+ code-offset #x100)))
915 :
916 : (dolist (script '(devanagari bengali punjabi gujarati
917 : oriya tamil telugu kannada malayalam))
918 : (define-charset (intern (format "%s-akruti" script))
919 : (format
920 : "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
921 : (capitalize (symbol-name script)))
922 : :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
923 : :supplementary-p t
924 : :code-space [0 255]
925 : :code-offset code-offset)
926 : (setq code-offset (+ code-offset #x100))))
927 :
928 : (define-charset 'indian-glyph
929 : "Glyphs for Indian characters."
930 : :short-name "Indian glyph"
931 : :iso-final-char ?4
932 : :emacs-mule-id 240
933 : :supplementary-p t
934 : :code-space [32 127 32 127]
935 : :code-offset #x180100)
936 :
937 : ;; Actual Glyph for 1-column width.
938 : (define-charset 'indian-1-column
939 : "Indian charset for 1-column width glyphs."
940 : :short-name "Indian 1-col"
941 : :long-name "Indian 1 Column"
942 : :iso-final-char ?6
943 : :emacs-mule-id 251
944 : :supplementary-p t
945 : :code-space [33 126 33 126]
946 : :code-offset #x184000)
947 :
948 : ;; Actual Glyph for 2-column width.
949 : (define-charset 'indian-2-column
950 : "Indian charset for 2-column width glyphs."
951 : :short-name "Indian 2-col"
952 : :long-name "Indian 2 Column"
953 : :iso-final-char ?5
954 : :emacs-mule-id 251
955 : :supplementary-p t
956 : :code-space [33 126 33 126]
957 : :code-offset #x184000)
958 :
959 : (define-charset 'tibetan
960 : "Tibetan characters"
961 : :iso-final-char ?7
962 : :short-name "Tibetan 2-col"
963 : :long-name "Tibetan 2 column"
964 : :iso-final-char ?7
965 : :emacs-mule-id 252
966 : :unify-map "MULE-tibetan"
967 : :supplementary-p t
968 : :code-space [33 126 33 37]
969 : :code-offset #x190000)
970 :
971 : (define-charset 'tibetan-1-column
972 : "Tibetan 1 column glyph"
973 : :short-name "Tibetan 1-col"
974 : :long-name "Tibetan 1 column"
975 : :iso-final-char ?8
976 : :emacs-mule-id 241
977 : :supplementary-p t
978 : :code-space [33 126 33 37]
979 : :code-offset #x190000)
980 :
981 : ;; Subsets of Unicode.
982 : (define-charset 'mule-unicode-2500-33ff
983 : "Unicode characters of the range U+2500..U+33FF."
984 : :short-name "Unicode subset 2"
985 : :long-name "Unicode subset (U+2500..U+33FF)"
986 : :iso-final-char ?2
987 : :emacs-mule-id 242
988 : :supplementary-p t
989 : :code-space [#x20 #x7f #x20 #x47]
990 : :code-offset #x2500)
991 :
992 : (define-charset 'mule-unicode-e000-ffff
993 : "Unicode characters of the range U+E000..U+FFFF."
994 : :short-name "Unicode subset 3"
995 : :long-name "Unicode subset (U+E000+FFFF)"
996 : :iso-final-char ?3
997 : :emacs-mule-id 243
998 : :supplementary-p t
999 : :code-space [#x20 #x7F #x20 #x75]
1000 : :code-offset #xE000
1001 : :max-code 30015) ; U+FFFF
1002 :
1003 : (define-charset 'mule-unicode-0100-24ff
1004 : "Unicode characters of the range U+0100..U+24FF."
1005 : :short-name "Unicode subset"
1006 : :long-name "Unicode subset (U+0100..U+24FF)"
1007 : :iso-final-char ?1
1008 : :emacs-mule-id 244
1009 : :supplementary-p t
1010 : :code-space [#x20 #x7F #x20 #x7F]
1011 : :code-offset #x100)
1012 :
1013 : (define-charset 'unicode-bmp
1014 : "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
1015 : :short-name "Unicode BMP"
1016 : :code-space [0 255 0 255]
1017 : :code-offset 0)
1018 :
1019 : (define-charset 'unicode-smp
1020 : "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1021 : :short-name "Unicode SMP "
1022 : :code-space [0 255 0 255]
1023 : :code-offset #x10000)
1024 :
1025 : (define-charset 'unicode-sip
1026 : "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1027 : :short-name "Unicode SIP"
1028 : :code-space [0 255 0 255]
1029 : :code-offset #x20000)
1030 :
1031 : (define-charset 'unicode-ssp
1032 : "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1033 : :short-name "Unicode SSP"
1034 : :code-space [0 255 0 255]
1035 : :code-offset #xE0000)
1036 :
1037 : (define-charset 'ethiopic
1038 : "Ethiopic characters for Amharic and Tigrigna."
1039 : :short-name "Ethiopic"
1040 : :long-name "Ethiopic characters"
1041 : :iso-final-char ?3
1042 : :emacs-mule-id 245
1043 : :supplementary-p t
1044 : :unify-map "MULE-ethiopic"
1045 : :code-space [33 126 33 126]
1046 : :code-offset #x1A0000)
1047 :
1048 : (define-charset 'mac-roman
1049 : "Mac Roman charset"
1050 : :short-name "Mac Roman"
1051 : :ascii-compatible-p t
1052 : :code-space [0 255]
1053 : :map "MACINTOSH")
1054 :
1055 : ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1056 : (define-charset 'ebcdic-us
1057 : "US version of EBCDIC"
1058 : :short-name "EBCDIC-US"
1059 : :code-space [0 255]
1060 : :mime-charset 'ebcdic-us
1061 : :map "EBCDICUS")
1062 :
1063 : (define-charset 'ebcdic-uk
1064 : "UK version of EBCDIC"
1065 : :short-name "EBCDIC-UK"
1066 : :code-space [0 255]
1067 : :mime-charset 'ebcdic-uk
1068 : :map "EBCDICUK")
1069 :
1070 : (define-charset 'ibm1047
1071 : ;; Says groff:
1072 : "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1073 : :short-name "IBM1047"
1074 : :code-space [0 255]
1075 : :mime-charset 'ibm1047
1076 : :map "IBM1047")
1077 : (define-charset-alias 'cp1047 'ibm1047)
1078 :
1079 : (define-charset 'hp-roman8
1080 : "Encoding used by Hewlet-Packard printer software"
1081 : :short-name "HP-ROMAN8"
1082 : :ascii-compatible-p t
1083 : :code-space [0 255]
1084 : :map "HP-ROMAN8")
1085 :
1086 : ;; To make a coding system with this, a pre-write-conversion should
1087 : ;; account for the commented-out multi-valued code points in
1088 : ;; stdenc.map.
1089 : (define-charset 'adobe-standard-encoding
1090 : "Adobe `standard encoding' used in PostScript"
1091 : :short-name "ADOBE-STANDARD-ENCODING"
1092 : :code-space [#x20 255]
1093 : :map "stdenc")
1094 :
1095 : (define-charset 'symbol
1096 : "Adobe symbol encoding used in PostScript"
1097 : :short-name "ADOBE-SYMBOL"
1098 : :code-space [#x20 255]
1099 : :map "symbol")
1100 :
1101 : (define-charset 'ibm850
1102 : "DOS codepage 850 (Latin-1)"
1103 : :short-name "IBM850"
1104 : :ascii-compatible-p t
1105 : :code-space [0 255]
1106 : :map "IBM850")
1107 : (define-charset-alias 'cp850 'ibm850)
1108 :
1109 : (define-charset 'mik
1110 : "Bulgarian DOS codepage"
1111 : :short-name "MIK"
1112 : :ascii-compatible-p t
1113 : :code-space [0 255]
1114 : :map "MIK")
1115 :
1116 : (define-charset 'ptcp154
1117 : "ParaType codepage (Asian Cyrillic)"
1118 : :short-name "PT154"
1119 : :ascii-compatible-p t
1120 : :code-space [0 255]
1121 : :mime-charset 'pt154
1122 : :map "PTCP154")
1123 : (define-charset-alias 'pt154 'ptcp154)
1124 : (define-charset-alias 'cp154 'ptcp154)
1125 :
1126 : (define-charset 'gb18030-2-byte
1127 : "GB18030 2-byte (0x814E..0xFEFE)"
1128 : :code-space [#x40 #xFE #x81 #xFE]
1129 : :supplementary-p t
1130 : :map "GB180302")
1131 :
1132 : (define-charset 'gb18030-4-byte-bmp
1133 : "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1134 : :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1135 : :supplementary-p t
1136 : :map "GB180304")
1137 :
1138 : (define-charset 'gb18030-4-byte-smp
1139 : "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1140 : :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1141 : :min-code '(#x9030 . #x8130)
1142 : :max-code '(#xE332 . #x9A35)
1143 : :supplementary-p t
1144 : :code-offset #x10000)
1145 :
1146 : (define-charset 'gb18030-4-byte-ext-1
1147 : "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1148 : :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1149 : :min-code '(#x8431 . #xA530)
1150 : :max-code '(#x8F39 . #xFE39)
1151 : :supplementary-p t
1152 : :code-offset #x200000 ; ... #x22484B
1153 : )
1154 :
1155 : (define-charset 'gb18030-4-byte-ext-2
1156 : "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1157 : :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1158 : :min-code '(#xE332 . #x9A36)
1159 : :max-code '(#xFE39 . #xFE39)
1160 : :supplementary-p t
1161 : :code-offset #x22484C ; ... #x279f93
1162 : )
1163 :
1164 : (define-charset 'gb18030
1165 : "GB18030"
1166 : :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1167 : :min-code 0
1168 : :max-code '(#xFE39 . #xFE39)
1169 : :superset '(ascii gb18030-2-byte
1170 : gb18030-4-byte-bmp gb18030-4-byte-smp
1171 : gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1172 :
1173 : (define-charset 'chinese-cns11643-15
1174 : "CNS11643 Plane 15 Chinese Traditional"
1175 : :short-name "CNS11643-15"
1176 : :long-name "CNS11643-15 (Chinese traditional)"
1177 : :code-space [33 126 33 126]
1178 : :code-offset #x27A000
1179 : :unify-map "CNS-F")
1180 :
1181 : (unify-charset 'chinese-gb2312)
1182 : (unify-charset 'chinese-gbk)
1183 : (unify-charset 'chinese-cns11643-1)
1184 : (unify-charset 'chinese-cns11643-2)
1185 : (unify-charset 'chinese-cns11643-3)
1186 : (unify-charset 'chinese-cns11643-4)
1187 : (unify-charset 'chinese-cns11643-5)
1188 : (unify-charset 'chinese-cns11643-6)
1189 : (unify-charset 'chinese-cns11643-7)
1190 : (unify-charset 'chinese-cns11643-15)
1191 : (unify-charset 'big5)
1192 : (unify-charset 'chinese-big5-1)
1193 : (unify-charset 'chinese-big5-2)
1194 : (unify-charset 'big5-hkscs)
1195 : (unify-charset 'korean-ksc5601)
1196 : (unify-charset 'vietnamese-viscii-lower)
1197 : (unify-charset 'vietnamese-viscii-upper)
1198 : (unify-charset 'chinese-sisheng)
1199 : (unify-charset 'ipa)
1200 : (unify-charset 'tibetan)
1201 : (unify-charset 'ethiopic)
1202 : (unify-charset 'indian-is13194)
1203 : (unify-charset 'japanese-jisx0208-1978)
1204 : (unify-charset 'japanese-jisx0208)
1205 : (unify-charset 'japanese-jisx0212)
1206 : (unify-charset 'japanese-jisx0213-1)
1207 : (unify-charset 'japanese-jisx0213-2)
1208 :
1209 :
1210 : ;; These are tables for translating characters on decoding and
1211 : ;; encoding.
1212 : ;; Fixme: these aren't used now -- should they be?
1213 : (setq standard-translation-table-for-decode nil)
1214 :
1215 : (setq standard-translation-table-for-encode nil)
1216 :
1217 : ;;; Make fundamental coding systems.
1218 :
1219 : ;; The coding system `no-conversion' and `undecided' are already
1220 : ;; defined in coding.c as below:
1221 : ;;
1222 : ;; (define-coding-system 'no-conversion
1223 : ;; "..."
1224 : ;; :coding-type 'raw-text
1225 : ;; ...)
1226 : ;; (define-coding-system 'undecided
1227 : ;; "..."
1228 : ;; :coding-type 'undecided
1229 : ;; ...)
1230 :
1231 : (define-coding-system-alias 'binary 'no-conversion)
1232 : (define-coding-system-alias 'unix 'undecided-unix)
1233 : (define-coding-system-alias 'dos 'undecided-dos)
1234 : (define-coding-system-alias 'mac 'undecided-mac)
1235 :
1236 : (define-coding-system 'prefer-utf-8
1237 : "Like `undecided' but prefer UTF-8 when appropriate.
1238 : On decoding, if the source contains 8-bit codes and they all
1239 : are valid UTF-8 sequences, detect the source as UTF-8 encoding
1240 : regardless of the coding priority.
1241 : On encoding, if the source contains non-ASCII characters, encode them
1242 : by UTF-8."
1243 : :coding-type 'undecided
1244 : :mnemonic ?-
1245 : :charset-list '(emacs)
1246 : :prefer-utf-8 t)
1247 :
1248 : (define-coding-system 'raw-text
1249 : "Raw text, which means text contains random 8-bit codes.
1250 : Encoding text with this coding system produces the actual byte
1251 : sequence of the text in buffers and strings. An exception is made for
1252 : characters from the `eight-bit' character set. Each of them is encoded
1253 : into a single byte.
1254 :
1255 : When you visit a file with this coding, the file is read into a
1256 : unibyte buffer as is (except for EOL format), thus each byte of a file
1257 : is treated as a character."
1258 : :coding-type 'raw-text
1259 : :for-unibyte t
1260 : :mnemonic ?t)
1261 :
1262 : (define-coding-system 'no-conversion-multibyte
1263 : "Like `no-conversion' but don't read a file into a unibyte buffer."
1264 : :coding-type 'raw-text
1265 : :eol-type 'unix
1266 : :mnemonic ?=)
1267 :
1268 : (define-coding-system 'iso-latin-1
1269 : "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1270 : :coding-type 'charset
1271 : :mnemonic ?1
1272 : :charset-list '(iso-8859-1)
1273 : :mime-charset 'iso-8859-1)
1274 :
1275 : (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1276 : (define-coding-system-alias 'latin-1 'iso-latin-1)
1277 :
1278 : ;; Coding systems not specific to each language environment.
1279 :
1280 : (define-coding-system 'emacs-mule
1281 : "Emacs 21 internal format used in buffer and string."
1282 : :coding-type 'emacs-mule
1283 : :charset-list 'emacs-mule
1284 : :mnemonic ?M)
1285 :
1286 : (define-coding-system 'utf-8
1287 : "UTF-8 (no signature (BOM))"
1288 : :coding-type 'utf-8
1289 : :mnemonic ?U
1290 : :charset-list '(unicode)
1291 : :mime-charset 'utf-8)
1292 :
1293 : (define-coding-system 'utf-8-with-signature
1294 : "UTF-8 (with signature (BOM))"
1295 : :coding-type 'utf-8
1296 : :mnemonic ?U
1297 : :charset-list '(unicode)
1298 : :bom t)
1299 :
1300 : (define-coding-system 'utf-8-auto
1301 : "UTF-8 (auto-detect signature (BOM))"
1302 : :coding-type 'utf-8
1303 : :mnemonic ?U
1304 : :charset-list '(unicode)
1305 : :bom '(utf-8-with-signature . utf-8))
1306 :
1307 : (define-coding-system-alias 'mule-utf-8 'utf-8)
1308 :
1309 : (define-coding-system 'utf-8-emacs
1310 : "Support for all Emacs characters (including non-Unicode characters)."
1311 : :coding-type 'utf-8
1312 : :mnemonic ?U
1313 : :charset-list '(emacs))
1314 :
1315 : ;; The encoding used internally. This encoding is meant to be able to save
1316 : ;; any multibyte buffer without losing information. It can change between
1317 : ;; Emacs releases, tho, so should only be used for internal files.
1318 : (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1319 :
1320 : (define-coding-system 'utf-16le
1321 : "UTF-16LE (little endian, no signature (BOM))."
1322 : :coding-type 'utf-16
1323 : :mnemonic ?U
1324 : :charset-list '(unicode)
1325 : :endian 'little
1326 : :mime-text-unsuitable t
1327 : :mime-charset 'utf-16le)
1328 :
1329 : (define-coding-system 'utf-16be
1330 : "UTF-16BE (big endian, no signature (BOM))."
1331 : :coding-type 'utf-16
1332 : :mnemonic ?U
1333 : :charset-list '(unicode)
1334 : :endian 'big
1335 : :mime-text-unsuitable t
1336 : :mime-charset 'utf-16be)
1337 :
1338 : (define-coding-system 'utf-16le-with-signature
1339 : "UTF-16 (little endian, with signature (BOM))."
1340 : :coding-type 'utf-16
1341 : :mnemonic ?U
1342 : :charset-list '(unicode)
1343 : :bom t
1344 : :endian 'little
1345 : :mime-text-unsuitable t
1346 : :mime-charset 'utf-16)
1347 :
1348 : (define-coding-system 'utf-16be-with-signature
1349 : "UTF-16 (big endian, with signature (BOM))."
1350 : :coding-type 'utf-16
1351 : :mnemonic ?U
1352 : :charset-list '(unicode)
1353 : :bom t
1354 : :endian 'big
1355 : :mime-text-unsuitable t
1356 : :mime-charset 'utf-16)
1357 :
1358 : (define-coding-system 'utf-16
1359 : "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1360 : :coding-type 'utf-16
1361 : :mnemonic ?U
1362 : :charset-list '(unicode)
1363 : :bom '(utf-16le-with-signature . utf-16be-with-signature)
1364 : :endian 'big
1365 : :mime-text-unsuitable t
1366 : :mime-charset 'utf-16)
1367 :
1368 : ;; Backwards compatibility (old names, also used by Mule-UCS). We
1369 : ;; prefer the MIME names.
1370 : (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1371 : (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1372 :
1373 :
1374 : (define-coding-system 'iso-2022-7bit
1375 : "ISO 2022 based 7-bit encoding using only G0."
1376 : :coding-type 'iso-2022
1377 : :mnemonic ?J
1378 : :charset-list 'iso-2022
1379 : :designation [(ascii t) nil nil nil]
1380 : :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1381 :
1382 : (define-coding-system 'iso-2022-7bit-ss2
1383 : "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1384 : :coding-type 'iso-2022
1385 : :mnemonic ?$
1386 : :charset-list 'iso-2022
1387 : :designation [(ascii 94) nil (nil 96) nil]
1388 : :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1389 : designation single-shift composition))
1390 :
1391 : (define-coding-system 'iso-2022-7bit-lock
1392 : "ISO-2022 coding system using Locking-Shift for 96-charset."
1393 : :coding-type 'iso-2022
1394 : :mnemonic ?&
1395 : :charset-list 'iso-2022
1396 : :designation [(ascii 94) (nil 96) nil nil]
1397 : :flags '(ascii-at-eol ascii-at-cntl 7-bit
1398 : designation locking-shift composition))
1399 :
1400 : (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1401 :
1402 : (define-coding-system 'iso-2022-7bit-lock-ss2
1403 : "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1404 : :coding-type 'iso-2022
1405 : :mnemonic ?i
1406 : :charset-list '(ascii
1407 : japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1408 : korean-ksc5601
1409 : chinese-gb2312
1410 : chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1411 : chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1412 : chinese-cns11643-7)
1413 : :designation [(ascii 94)
1414 : (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1415 : (nil chinese-cns11643-2)
1416 : (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1417 : chinese-cns11643-6 chinese-cns11643-7)]
1418 : :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1419 : single-shift init-bol))
1420 :
1421 : (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1422 :
1423 : (define-coding-system 'iso-2022-8bit-ss2
1424 : "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1425 : :coding-type 'iso-2022
1426 : :mnemonic ?@
1427 : :charset-list 'iso-2022
1428 : :designation [(ascii 94) nil (nil 96) nil]
1429 : :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1430 :
1431 : (define-coding-system 'compound-text
1432 : "Compound text based generic encoding.
1433 : This coding system is an extension of X's \"Compound Text Encoding\".
1434 : It encodes many characters using the normal ISO-2022 designation sequences,
1435 : but it doesn't support extended segments of CTEXT."
1436 : :coding-type 'iso-2022
1437 : :mnemonic ?x
1438 : :charset-list 'iso-2022
1439 : :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1440 : :flags '(ascii-at-eol ascii-at-cntl long-form
1441 : designation locking-shift single-shift composition)
1442 : ;; Fixme: this isn't a valid MIME charset and has to be
1443 : ;; special-cased elsewhere -- fx
1444 : :mime-charset 'x-ctext)
1445 :
1446 : (define-coding-system-alias 'x-ctext 'compound-text)
1447 : (define-coding-system-alias 'ctext 'compound-text)
1448 :
1449 : ;; Same as compound-text, but doesn't produce composition escape
1450 : ;; sequences. Used in post-read and pre-write conversions of
1451 : ;; compound-text-with-extensions, see mule.el. Note that this should
1452 : ;; not have a mime-charset property, to prevent it from showing up
1453 : ;; close to the beginning of coding systems ordered by priority.
1454 : (define-coding-system 'ctext-no-compositions
1455 : "Compound text based generic encoding.
1456 :
1457 : Like `compound-text', but does not produce escape sequences for compositions."
1458 : :coding-type 'iso-2022
1459 : :mnemonic ?x
1460 : :charset-list 'iso-2022
1461 : :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1462 : :flags '(ascii-at-eol ascii-at-cntl
1463 : designation locking-shift single-shift))
1464 :
1465 : (define-coding-system 'compound-text-with-extensions
1466 : "Compound text encoding with ICCCM Extended Segment extensions.
1467 :
1468 : See the variables `ctext-standard-encodings' and
1469 : `ctext-non-standard-encodings-alist' for the detail about how
1470 : extended segments are handled.
1471 :
1472 : This coding system should be used only for X selections. It is inappropriate
1473 : for decoding and encoding files, process I/O, etc."
1474 : :coding-type 'iso-2022
1475 : :mnemonic ?x
1476 : :charset-list 'iso-2022
1477 : :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1478 : :flags '(ascii-at-eol ascii-at-cntl long-form
1479 : designation locking-shift single-shift)
1480 : :post-read-conversion 'ctext-post-read-conversion
1481 : :pre-write-conversion 'ctext-pre-write-conversion
1482 : :mime-charset 'x-ctext)
1483 :
1484 : (define-coding-system-alias
1485 : 'x-ctext-with-extensions 'compound-text-with-extensions)
1486 : (define-coding-system-alias
1487 : 'ctext-with-extensions 'compound-text-with-extensions)
1488 :
1489 : (define-coding-system 'us-ascii
1490 : "Encode ASCII as-is and encode non-ASCII characters to `?'."
1491 : :coding-type 'charset
1492 : :mnemonic ?-
1493 : :charset-list '(ascii)
1494 : :default-char ??
1495 : :mime-charset 'us-ascii)
1496 :
1497 : (define-coding-system-alias 'iso-safe 'us-ascii)
1498 :
1499 : (define-coding-system 'utf-7
1500 : "UTF-7 encoding of Unicode (RFC 2152)."
1501 : :coding-type 'utf-8
1502 : :mnemonic ?U
1503 : :mime-charset 'utf-7
1504 : :charset-list '(unicode)
1505 : :pre-write-conversion 'utf-7-pre-write-conversion
1506 : :post-read-conversion 'utf-7-post-read-conversion)
1507 :
1508 : (define-coding-system 'utf-7-imap
1509 : "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1510 : :coding-type 'utf-8
1511 : :mnemonic ?u
1512 : :charset-list '(unicode)
1513 : :pre-write-conversion 'utf-7-imap-pre-write-conversion
1514 : :post-read-conversion 'utf-7-imap-post-read-conversion)
1515 :
1516 : ;; Use us-ascii for terminal output if some other coding system is not
1517 : ;; specified explicitly.
1518 : (set-safe-terminal-coding-system-internal 'us-ascii)
1519 :
1520 : ;; The other coding-systems are defined in each language specific
1521 : ;; files under lisp/language.
1522 :
1523 : ;; Normally, set coding system to `undecided' before reading a file.
1524 : ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1525 : ;; but we regard them as containing multibyte characters.
1526 : ;; Tar files are not decoded at all, but we treat them as raw bytes.
1527 :
1528 : (setq file-coding-system-alist
1529 : (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1530 : '(("\\.elc\\'" . utf-8-emacs)
1531 : ("\\.el\\'" . prefer-utf-8)
1532 : ("\\.utf\\(-8\\)?\\'" . utf-8)
1533 : ("\\.xml\\'" . xml-find-file-coding-system)
1534 : ;; We use raw-text for reading loaddefs.el so that if it
1535 : ;; happens to have DOS or Mac EOLs, they are converted to
1536 : ;; newlines. This is required to make the special treatment
1537 : ;; of the "\ newline" combination in loaddefs.el, which marks
1538 : ;; the beginning of a doc string, work.
1539 : ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1540 : ("\\.tar\\'" . (no-conversion . no-conversion))
1541 : ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1542 : ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1543 : ("" . (undecided . nil)))))
1544 :
1545 :
1546 : ;;; Setting coding categories and their priorities.
1547 :
1548 : ;; This setting is just to read an Emacs Lisp source files which
1549 : ;; contain multilingual text while dumping Emacs. More appropriate
1550 : ;; values are set by the command `set-language-environment' for each
1551 : ;; language environment.
1552 :
1553 : (set-coding-system-priority
1554 : 'iso-latin-1
1555 : 'utf-8
1556 : 'iso-2022-7bit
1557 : )
1558 :
1559 :
1560 : ;;; Miscellaneous settings.
1561 :
1562 : ;; Make all multibyte characters self-insert.
1563 : (set-char-table-range (nth 1 global-map)
1564 : (cons 128 (max-char))
1565 : 'self-insert-command)
1566 :
1567 : (aset latin-extra-code-table ?\221 t)
1568 : (aset latin-extra-code-table ?\222 t)
1569 : (aset latin-extra-code-table ?\223 t)
1570 : (aset latin-extra-code-table ?\224 t)
1571 : (aset latin-extra-code-table ?\225 t)
1572 : (aset latin-extra-code-table ?\226 t)
1573 :
1574 : ;; The old code-pages library is obsoleted by coding systems based on
1575 : ;; the charsets defined in this file but might be required by user
1576 : ;; code.
1577 : (provide 'code-pages)
1578 :
1579 : ;;; mule-conf.el ends here
|