[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] Changes to emacs/lisp/international/mule.el
From: |
Kenichi Handa |
Subject: |
[Emacs-diffs] Changes to emacs/lisp/international/mule.el |
Date: |
Wed, 03 Dec 2003 03:24:43 -0500 |
Index: emacs/lisp/international/mule.el
diff -c emacs/lisp/international/mule.el:1.195
emacs/lisp/international/mule.el:1.196
*** emacs/lisp/international/mule.el:1.195 Thu Nov 13 21:39:35 2003
--- emacs/lisp/international/mule.el Wed Dec 3 03:24:42 2003
***************
*** 1330,1341 ****
;;; X selections
(defvar ctext-non-standard-encodings-alist
! '(("ISO8859-15" . iso-8859-15)
! ("ISO8859-14" . iso-8859-14)
! ("KOI8-R" . koi8-r)
! ("BIG5-0" . big5))
! "Alist of non-standard encoding names vs Emacs coding systems.
! This alist is used to decode an extened segment of a compound text.")
(defvar ctext-non-standard-encodings-regexp
(string-to-multibyte
--- 1330,1371 ----
;;; X selections
(defvar ctext-non-standard-encodings-alist
! '(("big5-0" big5 2 (chinese-big5-1 chinese-big5-2))
! ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
! ("ISO8859-15" iso-8859-15 1 latin-iso8859-15))
! "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
!
! It controls how extended segments of a compound text are handled
! by the coding system `compound-text-with-extensions'.
!
! Each element has the form (ENCODING-NAME CODING-SYSTEM N-OCTET CHARSET).
!
! ENCODING-NAME is an encoding name of an \"extended segments\".
!
! CODING-SYSTEM is the coding-system to encode (or decode) the
! characters into (or from) the extended segment.
!
! N-OCTET is the number of octets (bytes) that encodes a character
! in the segment. It can be 0 (meaning the number of octets per
! character is variable), 1, 2, 3, or 4.
!
! CHARSET is a charater set containing characters that are encoded
! in the segment. It can be a list of character sets. It can also
! be a char-table, in which case characters that have non-nil value
! in the char-table are the target.
!
! On decoding CTEXT, all encoding names listed here are recognized.
!
! On encoding CTEXT, encoding names in the variable
! `ctext-non-standard-encodings' (which see) and in the information
! listed for the current language environment under the key
! `ctext-non-standard-encodings' are used.")
!
! (defvar ctext-non-standard-encodings
! '("big5-0")
! "List of non-standard encoding names used in extended segments of CTEXT.
! Each element must be one of the names listed in the variable
! `ctext-non-standard-encodings-alist' (which see).")
(defvar ctext-non-standard-encodings-regexp
(string-to-multibyte
***************
*** 1347,1359 ****
"\\(address@hidden)")))
;; Functions to support "Non-Standard Character Set Encodings" defined
! ;; by the COMPOUND-TEXT spec.
! ;; We support that by decoding the whole data by `ctext' which just
! ;; pertains byte sequences belonging to ``extended segment'', then
! ;; decoding those byte sequences one by one in Lisp.
! ;; This function also supports "The UTF-8 encoding" described in the
! ;; section 7 of the documentation fo COMPOUND-TEXT distributed with
! ;; XFree86.
(defun ctext-post-read-conversion (len)
"Decode LEN characters encoded as Compound Text with Extended Segments."
--- 1377,1385 ----
"\\(address@hidden)")))
;; Functions to support "Non-Standard Character Set Encodings" defined
! ;; by the COMPOUND-TEXT spec. They also support "The UTF-8 encoding"
! ;; described in the section 7 of the documentation of COMPOUND-TEXT
! ;; distributed with XFree86.
(defun ctext-post-read-conversion (len)
"Decode LEN characters encoded as Compound Text with Extended Segments."
***************
*** 1365,1371 ****
pos bytes)
(or in-workbuf
(narrow-to-region (point) (+ (point) len)))
- (decode-coding-region (point-min) (point-max) 'ctext)
(if in-workbuf
(set-buffer-multibyte t))
(while (re-search-forward ctext-non-standard-encodings-regexp
--- 1391,1396 ----
***************
*** 1376,1386 ****
(let* ((M (char-after (+ pos 4)))
(L (char-after (+ pos 5)))
(encoding (match-string 2))
! (coding (or (cdr (assoc-ignore-case
! encoding
! ctext-non-standard-encodings-alist))
! (coding-system-p
! (intern (downcase encoding))))))
(setq bytes (- (+ (* (- M 128) 128) (- L 128))
(- (point) (+ pos 6))))
(when coding
--- 1401,1414 ----
(let* ((M (char-after (+ pos 4)))
(L (char-after (+ pos 5)))
(encoding (match-string 2))
! (encoding-info (assoc-ignore-case
! encoding
! ctext-non-standard-encodings-alist))
! (coding (if encoding-info
! (nth 1 encoding-info)
! (setq encoding (intern (downcase encoding)))
! (and (coding-system-p encoding)
! encoding))))
(setq bytes (- (+ (* (- M 128) 128) (- L 128))
(- (point) (+ pos 6))))
(when coding
***************
*** 1388,1453 ****
(forward-char bytes)
(decode-coding-region (- (point) bytes) (point) coding)))
;; ESC % G --UTF-8-BYTES-- ESC % @
! (setq bytes (- (point) pos))
! (decode-coding-region (- (point) bytes) (point) 'utf-8))))
(goto-char (point-min))
(- (point-max) (point)))))
! ;; From X registry 2001/06/01
! ;; 20. NON-STANDARD CHARACTER SET ENCODINGS
!
! ;; See Section 6 of the Compound Text standard.
!
! ;; Name Reference
! ;; ---- ---------
! ;; "DEC.CNS11643.1986-2" [53]
! ;; CNS11643 2-plane using the recommended
! ;; internal representation scheme
! ;; "DEC.DTSCS.1990-2" [54]
! ;; DEC Taiwan Supplemental Character Set
! ;; "fujitsu.u90x03" [87]
! ;; "ILA" [62]
! ;; registry prefix
! ;; "IPSYS" [59]
! ;; registry prefix
! ;; "omron_UDC" [45]
! ;; omron User Defined Charset
! ;; "omron_UDC_ja" [45]
! ;; omron User Defined Charset for Japanese
! ;; "omron_UDC_zh" [45]
! ;; omron User Defined Charset for Chinese(Main land)
! ;; "omron_UDC_tw" [45]
! ;; omron User Defined Charset for Chinese(Taiwan)
!
! ;; If you add charsets here, be sure to modify the regexp used by
! ;; ctext-pre-write-conversion to look up non-standard charsets.
! (defvar ctext-non-standard-designations-alist
! '(("$(0" . (big5 "big5-0" 2))
! ("$(1" . (big5 "big5-0" 2))
! ;; The following are actually standard; generating extended
! ;; segments for them is wrong and screws e.g. Latin-9 users.
! ;; 8859-{10,13,16} aren't Emacs charsets anyhow. -- fx
! ;; ("-V" . (t "iso8859-10" 1))
! ;; ("-Y" . (t "iso8859-13" 1))
! ;; ("-_" . (t "iso8859-14" 1))
! ;; ("-b" . (t "iso8859-15" 1))
! ;; ("-f" . (t "iso8859-16" 1))
! )
! "Alist of ctext control sequences that introduce character sets which
! are not in the list of approved encodings, and the corresponding
! coding system, identifier string, and number of octets per encoded
! character.
!
! Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
! is the control sequence (sans the leading ESC) that introduces the character
! set in the text encoded by compound-text. ENCODING is a coding system
! symbol; if it is t, it means that the ctext coding system already encodes
! the text correctly, and only the leading control sequence needs to be altered.
! If ENCODING is a coding system, we need to re-encode the text with that
! coding system. CHARSET is the name of the charset we need to put into
! the leading control sequence. NOCTETS is the number of octets (bytes) that
! encode each character in this charset. NOCTETS can be 0 (meaning the number
! of octets per character is variable), 1, 2, 3, or 4.")
(defun ctext-pre-write-conversion (from to)
"Encode characters between FROM and TO as Compound Text w/Extended Segments.
--- 1416,1454 ----
(forward-char bytes)
(decode-coding-region (- (point) bytes) (point) coding)))
;; ESC % G --UTF-8-BYTES-- ESC % @
! (delete-char -3)
! (delete-region pos (+ pos 3))
! (decode-coding-region pos (point) 'utf-8))))
(goto-char (point-min))
(- (point-max) (point)))))
! ;; Return a char table of extended segment usage for each character.
! ;; Each value of the char table is nil, one of the elements of
! ;; `ctext-non-standard-encodings-alist', or the symbol `utf-8'.
!
! (defun ctext-non-standard-encodings-table ()
! (let ((table (make-char-table 'translation-table)))
! (aset table (make-char 'mule-unicode-0100-24ff) 'utf-8)
! (aset table (make-char 'mule-unicode-2500-33ff) 'utf-8)
! (aset table (make-char 'mule-unicode-e000-ffff) 'utf-8)
! (dolist (encoding (reverse
! (append
! (get-language-info current-language-environment
! 'ctext-non-standard-encodings)
! ctext-non-standard-encodings)))
! (let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
! (charset (nth 3 slot)))
! (if charset
! (cond ((charsetp charset)
! (aset table (make-char charset) slot))
! ((listp charset)
! (dolist (elt charset)
! (aset table (make-char elt) slot)))
! ((char-table-p charset)
! (map-char-table #'(lambda (k v)
! (if (and v (> k 128)) (aset table k slot)))
! charset))))))
! table))
(defun ctext-pre-write-conversion (from to)
"Encode characters between FROM and TO as Compound Text w/Extended Segments.
***************
*** 1470,1516 ****
(insert-buffer-substring buf from to))))
;; Now we can encode the whole buffer.
! (let ((case-fold-search nil)
last-coding-system-used
! pos posend desig encode-info encoding chset noctets textlen)
! (goto-char (point-min))
! ;; At first encode the whole buffer.
! (encode-coding-region (point-min) (point-max) 'ctext-no-compositions)
! ;; Then replace ISO-2022 charset designations with extended
! ;; segments, for those charsets that are not part of the
! ;; official X registry. The regexp below finds the leading
! ;; sequences for big5.
! (while (re-search-forward "\e\\(\$([01]\\)" nil 'move)
! (setq pos (match-beginning 0)
! posend (point)
! desig (match-string 1)
! encode-info (cdr (assoc desig
! ctext-non-standard-designations-alist))
! encoding (car encode-info)
! chset (cadr encode-info)
! noctets (car (cddr encode-info)))
! (skip-chars-forward "^\e")
! (cond
! ((eq encoding t) ; only the leading sequence needs to be changed
! (setq textlen (+ (- (point) posend) (length chset) 1))
! ;; Generate the control sequence for an extended segment.
! (replace-match (format "\e%%/%d%c%c%s"
! noctets
! (+ (/ textlen 128) 128)
! (+ (% textlen 128) 128)
! chset)
! t t))
! ((coding-system-p encoding) ; need to recode the entire segment...
! (decode-coding-region pos (point) 'ctext-no-compositions)
! (encode-coding-region pos (point) encoding)
! (setq textlen (+ (- (point) pos) (length chset) 1))
! (save-excursion
! (goto-char pos)
! (insert (format "\e%%/%d%c%c%s"
! noctets
! (+ (/ textlen 128) 128)
! (+ (% textlen 128) 128)
! chset))))))
(goto-char (point-min))))
;; Must return nil, as build_annotations_2 expects that.
nil)
--- 1471,1526 ----
(insert-buffer-substring buf from to))))
;; Now we can encode the whole buffer.
! (let ((encoding-table (ctext-non-standard-encodings-table))
last-coding-system-used
! last-pos last-encoding-info
! encoding-info end-pos)
! (goto-char (setq last-pos (point-min)))
! (setq end-pos (point-marker))
! (while (re-search-forward "[^\000-\177]+" nil t)
! ;; Found a sequence of non-ASCII characters.
! (setq last-pos (match-beginning 0)
! last-encoding-info (aref encoding-table (char-after last-pos)))
! (set-marker end-pos (match-end 0))
! (goto-char (1+ last-pos))
! (catch 'tag
! (while t
! (setq encoding-info
! (if (< (point) end-pos)
! (aref encoding-table (following-char))))
! (unless (eq last-encoding-info encoding-info)
! (cond ((consp last-encoding-info)
! ;; Encode the previous range using an extended
! ;; segment.
! (let ((encoding-name (car last-encoding-info))
! (coding-system (nth 1 last-encoding-info))
! (noctets (nth 2 last-encoding-info))
! len)
! (encode-coding-region last-pos (point) coding-system)
! (setq len (+ (length encoding-name) 1
! (- (point) last-pos)))
! (save-excursion
! (goto-char last-pos)
! (insert (string-to-multibyte
! (format "\e%%/%d%c%c%s"
! noctets
! (+ (/ len 128) 128)
! (+ (% len 128) 128)
! encoding-name))))))
! ((eq last-encoding-info 'utf-8)
! ;; Encode the previous range using UTF-8 encoding
! ;; extention.
! (encode-coding-region last-pos (point) 'mule-utf-8)
! (save-excursion
! (goto-char last-pos)
! (insert "\e%G"))
! (insert "\e%@")))
! (setq last-pos (point)
! last-encoding-info encoding-info))
! (if (< (point) end-pos)
! (forward-char 1)
! (throw 'tag nil)))))
! (set-marker end-pos nil)
(goto-char (point-min))))
;; Must return nil, as build_annotations_2 expects that.
nil)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] Changes to emacs/lisp/international/mule.el,
Kenichi Handa <=