[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] Changes to emacs/lisp/language/chinese.el,v
From: |
Miles Bader |
Subject: |
[Emacs-diffs] Changes to emacs/lisp/language/chinese.el,v |
Date: |
Fri, 01 Feb 2008 16:02:29 +0000 |
CVSROOT: /cvsroot/emacs
Module name: emacs
Changes by: Miles Bader <miles> 08/02/01 16:01:31
Index: lisp/language/chinese.el
===================================================================
RCS file: /cvsroot/emacs/emacs/lisp/language/chinese.el,v
retrieving revision 1.37
retrieving revision 1.38
diff -u -b -r1.37 -r1.38
--- lisp/language/chinese.el 8 Jan 2008 20:45:57 -0000 1.37
+++ lisp/language/chinese.el 1 Feb 2008 16:01:02 -0000 1.38
@@ -6,6 +6,9 @@
;; 2005, 2006, 2007, 2008
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H14PRO021
+;; Copyright (C) 2003
+;; National Institute of Advanced Industrial Science and Technology (AIST)
+;; Registration Number H13PRO009
;; Keywords: multilingual, Chinese
@@ -37,85 +40,72 @@
;;; Chinese (general)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(make-coding-system
- 'iso-2022-cn 2 ?C
+
+(define-coding-system 'iso-2022-cn
"ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)."
- '(ascii
+ :coding-type 'iso-2022
+ :mnemonic ?C
+ :charset-list '(ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2)
+ :designation [ascii
(nil chinese-gb2312 chinese-cns11643-1)
(nil chinese-cns11643-2)
- nil
- nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
- init-bol)
- '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2)
- (mime-charset . iso-2022-cn)))
+ nil]
+ :flags '(ascii-at-eol ascii-at-cntl 7-bit
+ designation locking-shift single-shift init-at-bol)
+ :mime-charset 'iso-2022-cn
+ :suitable-for-keyboard t)
(define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn)
-(make-coding-system
- 'iso-2022-cn-ext 2 ?C
+(define-coding-system 'iso-2022-cn-ext
"ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)."
- '(ascii
+ :coding-type 'iso-2022
+ :mnemonic ?C
+ :charset-list '(ascii
+ chinese-gb2312 chinese-cns11643-1
+ chinese-cns11643-2 chinese-cns11643-3 chinese-cns11643-4
+ chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7)
+ :designation '[ascii
(nil chinese-gb2312 chinese-cns11643-1)
(nil chinese-cns11643-2)
(nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
- chinese-cns11643-6 chinese-cns11643-7)
- nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
- init-bol)
- '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2
- chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
- chinese-cns11643-6 chinese-cns11643-7)
- (mime-charset . iso-2022-cn-ext)))
+ chinese-cns11643-6 chinese-cns11643-7)]
+ :flags '(ascii-at-eol ascii-at-cntl 7-bit
+ designation locking-shift single-shift init-at-bol)
+ :mime-charset 'iso-2022-cn-ext
+ :suitable-for-keyboard t)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Chinese GB2312 (simplified)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(make-coding-system
- 'chinese-iso-8bit 2 ?c
+(define-coding-system 'chinese-iso-8bit
"ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)."
- '(ascii chinese-gb2312 nil nil
- nil ascii-eol ascii-cntl nil nil nil nil)
- '((safe-charsets ascii chinese-gb2312)
- (mime-charset . gb2312)))
+ :coding-type 'iso-2022
+ :mnemonic ?c
+ :charset-list '(ascii chinese-gb2312)
+ :designation [ascii chinese-gb2312 nil nil]
+ :mime-charset 'gb2312)
(define-coding-system-alias 'cn-gb-2312 'chinese-iso-8bit)
(define-coding-system-alias 'euc-china 'chinese-iso-8bit)
(define-coding-system-alias 'euc-cn 'chinese-iso-8bit)
(define-coding-system-alias 'cn-gb 'chinese-iso-8bit)
(define-coding-system-alias 'gb2312 'chinese-iso-8bit)
-(define-coding-system-alias 'cp936 'chinese-iso-8bit)
-(make-coding-system
- 'chinese-hz 0 ?z
+(define-coding-system 'chinese-hz
"Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)."
- nil
- '((safe-charsets ascii chinese-gb2312)
- (mime-charset . hz-gb-2312)
- (post-read-conversion . post-read-decode-hz)
- (pre-write-conversion . pre-write-encode-hz)))
+ :coding-type 'utf-8
+ :mnemonic ?z
+ :charset-list '(ascii chinese-gb2312)
+ :mime-charset 'hz-gb-2312
+ :post-read-conversion 'post-read-decode-hz
+ :pre-write-conversion 'pre-write-encode-hz)
(define-coding-system-alias 'hz-gb-2312 'chinese-hz)
(define-coding-system-alias 'hz 'chinese-hz)
-(defun post-read-decode-hz (len)
- (let ((pos (point))
- (buffer-modified-p (buffer-modified-p))
- last-coding-system-used)
- (prog1
- (decode-hz-region pos (+ pos len))
- (set-buffer-modified-p buffer-modified-p))))
-
-(defun pre-write-encode-hz (from to)
- (let ((buf (current-buffer)))
- (set-buffer (generate-new-buffer " *temp*"))
- (if (stringp from)
- (insert from)
- (insert-buffer-substring buf from to))
- (let (last-coding-system-used)
- (encode-hz-region 1 (point-max)))
- nil))
-
(set-language-info-alist
"Chinese-GB" '((charset chinese-gb2312 chinese-sisheng)
(coding-system chinese-iso-8bit iso-2022-cn chinese-hz)
@@ -131,181 +121,47 @@
;; Chinese BIG5 (traditional)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(make-coding-system
- 'chinese-big5 3 ?B
- "BIG5 8-bit encoding for Chinese (MIME:Big5)."
- nil
- '((safe-charsets ascii chinese-big5-1 chinese-big5-2)
- (mime-charset . big5)
- (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char)
- (chinese-big5-2 "BIG5" encode-big5-char))))
+(define-coding-system 'chinese-big5
+ "BIG5 8-bit encoding for Chinese (MIME:Big5)"
+ :coding-type 'charset
+ :mnemonic ?B
+ :charset-list '(ascii big5)
+ :mime-charset 'big5)
(define-coding-system-alias 'big5 'chinese-big5)
(define-coding-system-alias 'cn-big5 'chinese-big5)
(define-coding-system-alias 'cp950 'chinese-big5)
-;; Big5 font requires special encoding.
-(define-ccl-program ccl-encode-big5-font
- `(0
- ;; In: R0:chinese-big5-1 or chinese-big5-2
- ;; R1:position code 1
- ;; R2:position code 2
- ;; Out: R1:font code point 1
- ;; R2:font code point 2
- ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21))
- (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280))
- (r1 = ((r2 / 157) + ?\xA1))
- (r2 %= 157)
- (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62))))
- "CCL program to encode a Big5 code to code point of Big5 font.")
-
-(setq font-ccl-encoder-alist
- (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist))
-
(set-language-info-alist
"Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2)
(coding-system chinese-big5 chinese-iso-7bit)
(coding-priority chinese-big5 iso-2022-cn chinese-iso-8bit)
(input-method . "chinese-py-punct-b5")
+ (ctext-non-standard-encodings "big5-0")
(features china-util)
- (sample-text . "Cantonese ($(0GnM$(B,$(0N]0*Hd(B)
$(0*/=((B, $(0+$)p(B")
+ (sample-text . "Cantonese ($(Gemk#(B,$(Gl]N)fc(B)
$ATg3?(B, $ADc:C(B")
(documentation . "Support for Chinese Big5 character set.")
(tutorial . "TUTORIAL.zh"))
'("Chinese"))
+(define-coding-system 'chinese-big5-hkscs
+ "BIG5-HKSCS 8-bit encoding for Chinese, Hong Kong supplement
(MIME:Big5-HKSCS)"
+ :coding-type 'charset
+ :mnemonic ?B
+ :charset-list '(ascii big5-hkscs)
+ :mime-charset 'big5-hkscs)
+(define-coding-system-alias 'big5-hkscs 'chinese-big5-hkscs)
+(define-coding-system-alias 'cn-big5-hkscs 'chinese-big5-hkscs)
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Chinese CNS11643 (traditional)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(defvar big5-to-cns (make-translation-table)
- "Translation table for encoding to `euc-tw'.")
-;; Could have been done by china-util loaded before.
-(unless (get 'big5-to-cns 'translation-table)
- (define-translation-table 'big5-to-cns big5-to-cns))
-
-(define-ccl-program ccl-decode-euc-tw
- ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding;
- ;; CNS planes 2 to 7 always need four bytes. In internal encoding of
- ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need
- ;; four bytes. Thus a buffer magnification value of 2 (for both
- ;; encoding and decoding) is sufficient.
- `(2
- ;; we don't have enough registers to hold all charset-ids
- ((r4 = ,(charset-id 'chinese-cns11643-1))
- (r5 = ,(charset-id 'chinese-cns11643-2))
- (r6 = ,(charset-id 'chinese-cns11643-3))
- (loop
- (read-if (r0 < #x80)
- ;; ASCII
- (write-repeat r0)
- ;; not ASCII
- (if (r0 == #x8E)
- ;; single shift
- (read-if (r1 < #xA1)
- ;; invalid byte
- ((write r0)
- (write-repeat r1))
- (if (r1 > #xA7)
- ;; invalid plane
- ((write r0)
- (write-repeat r1))
- ;; OK, we have a plane
- (read-if (r2 < #xA1)
- ;; invalid first byte
- ((write r0 r1)
- (write-repeat r2))
- (read-if (r3 < #xA1)
- ;; invalid second byte
- ((write r0 r1 r2)
- (write-repeat r3))
- ;; CNS 1-7, finally
- ((branch (r1 - #xA1)
- (r1 = r4)
- (r1 = r5)
- (r1 = r6)
- (r1 = ,(charset-id 'chinese-cns11643-4))
- (r1 = ,(charset-id 'chinese-cns11643-5))
- (r1 = ,(charset-id 'chinese-cns11643-6))
- (r1 = ,(charset-id 'chinese-cns11643-7)))
- (r2 = ((((r2 - #x80) << 7) + r3) - #x80))
- (write-multibyte-character r1 r2)
- (repeat))))))
- ;; standard EUC
- (if (r0 < #xA1)
- ;; invalid first byte
- (write-repeat r0)
- (read-if (r1 < #xA1)
- ;; invalid second byte
- ((write r0)
- (write-repeat r1))
- ;; CNS 1, finally
- ((r1 = ((((r0 - #x80) << 7) + r1) - #x80))
- (write-multibyte-character r4 r1)
- (repeat)))))))))
- "CCL program to decode EUC-TW encoding."
-)
-
-(define-ccl-program ccl-encode-euc-tw
- `(2
- ;; we don't have enough registers to hold all charset-ids
- ((r2 = ,(charset-id 'ascii))
- (r3 = ,(charset-id 'chinese-big5-1))
- (r4 = ,(charset-id 'chinese-big5-2))
- (r5 = ,(charset-id 'chinese-cns11643-1))
- (r6 = ,(charset-id 'chinese-cns11643-2))
- (loop
- (read-multibyte-character r0 r1)
- (if (r0 == r2)
- (write-repeat r1)
- (;; Big 5 encoded characters are first translated to CNS
- (if (r0 == r3)
- (translate-character big5-to-cns r0 r1)
- (if (r0 == r4)
- (translate-character big5-to-cns r0 r1)))
- (if (r0 == r5)
- (r0 = #xA1)
- (if (r0 == r6)
- (r0 = #xA2)
- (if (r0 == ,(charset-id 'chinese-cns11643-3))
- (r0 = #xA3)
- (if (r0 == ,(charset-id 'chinese-cns11643-4))
- (r0 = #xA4)
- (if (r0 == ,(charset-id 'chinese-cns11643-5))
- (r0 = #xA5)
- (if (r0 == ,(charset-id 'chinese-cns11643-6))
- (r0 = #xA6)
- (if (r0 == ,(charset-id 'chinese-cns11643-7))
- (r0 = #xA7)
- ;; not CNS. We use a dummy character which
- ;; can't occur in EUC-TW encoding to indicate
- ;; this.
- (write-repeat #xFF))))))))))
- (if (r0 != #xA1)
- ;; single shift and CNS plane
- ((write #x8E)
- (write r0)))
- (write ((r1 >> 7) + #x80))
- (write ((r1 % #x80) + #x80))
- (repeat))))
- "CCL program to encode EUC-TW encoding."
-)
-
-(defun euc-tw-pre-write-conversion (beg end)
- "Semi-dummy pre-write function effectively to autoload china-util."
- ;; Ensure translation table is loaded.
- (require 'china-util)
- ;; Don't do this again.
- (coding-system-put 'euc-tw 'pre-write-conversion nil)
- nil)
-
-(make-coding-system
- 'euc-tw 4 ?Z
- "ISO 2022 based EUC encoding for Chinese CNS11643.
-Big5 encoding is accepted for input also (which is then converted to CNS)."
- '(ccl-decode-euc-tw . ccl-encode-euc-tw)
- '((safe-charsets ascii
- chinese-big5-1
- chinese-big5-2
+(define-coding-system 'euc-tw
+ "ISO 2022 based EUC encoding for Chinese CNS11643."
+ :coding-type 'iso-2022
+ :mnemonic ?Z
+ :charset-list '(ascii
chinese-cns11643-1
chinese-cns11643-2
chinese-cns11643-3
@@ -313,8 +169,14 @@
chinese-cns11643-5
chinese-cns11643-6
chinese-cns11643-7)
- (valid-codes (0 . 255))
- (pre-write-conversion . euc-tw-pre-write-conversion)))
+ :designation [ascii chinese-cns11643-1 (chinese-cns11643-1
+ chinese-cns11643-2
+ chinese-cns11643-3
+ chinese-cns11643-4
+ chinese-cns11643-5
+ chinese-cns11643-6
+ chinese-cns11643-7) nil]
+ :mime-charset 'euc-tw)
(define-coding-system-alias 'euc-taiwan 'euc-tw)
@@ -328,6 +190,7 @@
chinese-iso-8bit)
(features china-util)
(input-method . "chinese-cns-quick")
+ ;; Fixme: presumably it won't accept big5 now.
(documentation . "\
Support for Chinese CNS character sets. Note that the EUC-TW coding system
accepts Big5 for input also (which is then converted to CNS)."))
@@ -349,6 +212,60 @@
converted to CNS)."))
'("Chinese"))
+
+;;; Chinese GBK
+
+(define-coding-system 'chinese-gbk
+ "GBK encoding for Chinese (MIME:GBK)."
+ :coding-type 'charset
+ :mnemonic ?c
+ :charset-list '(ascii chinese-gbk)
+ :mime-charset 'gbk)
+(define-coding-system-alias 'gbk 'chinese-gbk)
+(define-coding-system-alias 'cp936 'chinese-gbk)
+(define-coding-system-alias 'windows-936 'chinese-gbk)
+
+(set-language-info-alist
+ "Chinese-GBK" '((charset chinese-gbk)
+ (coding-system chinese-gbk)
+ (coding-priority gbk iso-2022-cn chinese-big5
+ chinese-iso-8bit) ; fixme?
+ (ctext-non-standard-encodings "gbk-0")
+ (input-method . "chinese-py-punct") ; fixme?
+ (sample-text . "Chinese
($BCfJ8(B,$BIaDL$A;0(B,$A::So(B) $(D95$B9%(B")
+ (features china-util)
+ (documentation . "Support for Chinese GBK character set.")
+ (tutorial . "TUTORIAL.cn"))
+ '("Chinese"))
+
+;;; Chinese GB18030
+
+(define-coding-system 'chinese-gb18030
+ "GB18030 encoding for Chinese (MIME:GB18030)."
+ :coding-type 'charset
+ :mnemonic ?c
+ :charset-list '(ascii gb18030-2-byte
+ gb18030-4-byte-bmp gb18030-4-byte-smp
+ gb18030-4-byte-ext-1 gb18030-4-byte-ext-2)
+ :mime-charset 'gb18030)
+
+(define-coding-system-alias 'gb18030 'chinese-gb18030)
+
+(set-language-info-alist
+ "Chinese-GB18030" '((charset gb18030)
+ (coding-system chinese-gb18030)
+ (coding-priority gb18030 gbk iso-2022-cn chinese-big5
+ chinese-iso-8bit) ; fixme?
+ (input-method . "chinese-py-punct") ; fixme?
+ (sample-text . "Chinese
($BCfJ8(B,$BIaDL$A;0(B,$A::So(B) $(D0_$B9%(B")
+ (features china-util)
+ (documentation
+ . "Support for Chinese GB18030 character set.")
+ (tutorial . "TUTORIAL.cn"))
+ '("Chinese"))
+
+;; Fixme: add HKSCS
+
(provide 'chinese)
;;; arch-tag: b82fcf7a-84f6-4e0b-b38c-1742dac0e09f
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] Changes to emacs/lisp/language/chinese.el,v,
Miles Bader <=