emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/lisp/international/mule.el


From: Eli Zaretskii
Subject: [Emacs-diffs] Changes to emacs/lisp/international/mule.el
Date: Fri, 22 Feb 2002 08:44:22 -0500

Index: emacs/lisp/international/mule.el
diff -c emacs/lisp/international/mule.el:1.139 
emacs/lisp/international/mule.el:1.140
*** emacs/lisp/international/mule.el:1.139      Thu Dec 13 09:41:19 2001
--- emacs/lisp/international/mule.el    Fri Feb 22 08:44:21 2002
***************
*** 1284,1289 ****
--- 1284,1444 ----
      (setq coding-category-list (append arg current-list))
      (set-coding-priority-internal)))
  
+ ;;; X selections
+ 
+ (defvar non-standard-icccm-encodings-alist
+   '(("ISO8859-15" . latin-iso8859-15)
+     ("ISO8859-14" . latin-iso8859-14)
+     ("KOI8-R" . koi8-r)
+     ("BIG5-0" . big5))
+   "Alist of font charset names defined by XLFD, and the corresponding Emacs
+ charsets or coding systems.")
+ 
+ ;; Functions to support "Non-Standard Character Set Encodings" defined
+ ;; by the ICCCM spec.  We support that by converting the leading
+ ;; sequence of the ``extended segment'' to the corresponding ISO-2022
+ ;; sequences (if the leading sequence names an Emacs charset), or decode
+ ;; the segment (if it names a coding system).  Encoding does the reverse.
+ (defun ctext-post-read-conversion (len)
+   "Decode LEN characters encoded as Compound Text with Extended Segments."
+   (buffer-disable-undo)       ; minimize consing due to insertions and 
deletions
+   (narrow-to-region (point) (+ (point) len))
+   (save-match-data
+     (let ((pt (point-marker))
+         (oldpt (point-marker))
+         (newpt (make-marker))
+         (modified-p (buffer-modified-p))
+         (case-fold-search nil)
+         last-coding-system-used
+         encoding textlen chset)
+       (while (re-search-forward
+             "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002"
+             nil 'move)
+       (set-marker newpt (point))
+       (set-marker pt (match-beginning 0))
+       (setq encoding (match-string 3))
+       (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
+                           (- (aref (match-string 2) 1) 128))
+                        (1+ (length encoding))))
+       (setq
+        chset (cdr (assoc-ignore-case encoding
+                                      non-standard-icccm-encodings-alist)))
+       (cond ((null chset)
+              ;; This charset is not supported--leave this extended
+              ;; segment unaltered and skip over it.
+              (goto-char (+ (point) textlen)))
+             ((charsetp chset)
+            ;; If it's a charset, replace the leading escape sequence
+            ;; with a standard ISO-2022 sequence.  We will decode all
+             ;; such segments later, in one go, when we exit the loop
+              ;; or find an extended segment that names a coding
+              ;; system, not a charset.
+              (replace-match
+               (concat "\\1"
+                       (if (= 0 (charset-iso-graphic-plane chset))
+                           ;; GL charsets
+                           (if (= 1 (charset-dimension chset)) "(" "$(")
+                         ;; GR charsets
+                         (if (= 96 (charset-chars chset))
+                             "-"
+                           (if (= 1 (charset-dimension chset)) ")" "$)")))
+                       (string (charset-iso-final-char chset)))
+               t)
+              (goto-char (+ (point) textlen)))
+             ((coding-system-p chset)
+            ;; If it's a coding system, we need to decode the segment
+              ;; right away.  But first, decode what we've skipped
+              ;; across until now.
+              (when (> pt oldpt)
+                (decode-coding-region oldpt pt 'ctext-no-compositions))
+              (delete-region pt newpt)
+              (set-marker newpt (+ newpt textlen))
+              (decode-coding-region pt newpt chset)
+              (goto-char newpt)
+              (set-marker oldpt newpt))))
+       ;; Decode what's left.
+       (when (> (point) oldpt)
+       (decode-coding-region oldpt (point) 'ctext-no-compositions))
+      ;; This buffer started as unibyte, because the string we get from
+       ;; the X selection is a unibyte string.  We must now make it
+       ;; multibyte, so that the decoded text is inserted as multibyte
+       ;; into its buffer.
+       (set-buffer-multibyte t)
+       (set-buffer-modified-p modified-p)
+       (- (point-max) (point-min)))))
+ 
+ (defvar non-standard-designations-alist
+   '(("$(0" . (big5 "big5-0" 2))
+     ("$(1" . (big5 "big5-0" 2))
+     ("-V"  . (t "iso8859-10" 1))
+     ("-Y"  . (t "iso8859-13" 1))
+     ("-_"  . (t "iso8859-14" 1))
+     ("-b"  . (t "iso8859-15" 1))
+     ("-f"  . (t "iso8859-16" 1)))
+   "Alist of ctext control sequences that introduce character sets which
+ are not in the list of approved ICCCM encodings, and the corresponding
+ coding system, identifier string, and number of octets per encoded
+ character.
+ 
+ Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)).  CTLSEQ
+ is the control sequence (sans the leading ESC) that introduces the character
+ set in the text encoded by compound-text.  ENCODING is a coding system
+ symbol; if it is t, it means that the ctext coding system already encodes
+ the text correctly, and only the leading control sequence needs to be altered.
+ If ENCODING is a coding system, we need to re-encode the text with that
+ coding system.  CHARSET is the ICCCM name of the charset we need to put into
+ the leading control sequence.  NOCTETS is the number of octets (bytes) that
+ encode each character in this charset.  NOCTETS can be 0 (meaning the number
+ of octets per character is variable), 1, 2, 3, or 4.")
+ 
+ (defun ctext-pre-write-conversion (from to)
+   "Encode characters between FROM and TO as Compound Text w/Extended 
Segments."
+   (buffer-disable-undo)       ; minimize consing due to insertions and 
deletions
+   (narrow-to-region from to)
+   (encode-coding-region from to 'ctext-no-compositions)
+   ;; Replace ISO-2022 charset designations with extended segments, for
+   ;; those charsets that are not part of the official X registry.
+   (save-match-data
+     (goto-char (point-min))
+     (let ((newpt (make-marker))
+         (case-fold-search nil)
+         pt desig encode-info encoding chset noctets textlen)
+       (set-buffer-multibyte nil)
+       (while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move)
+       (setq desig (match-string 1)
+             pt (point-marker)
+             encode-info (cdr (assoc desig non-standard-designations-alist))
+             encoding (car encode-info)
+             chset (cadr encode-info)
+             noctets (car (cddr encode-info)))
+       (skip-chars-forward "^\e")
+       (set-marker newpt (point))
+       (cond
+        ((eq encoding t)  ; only the leading sequence needs to be changed
+         (setq textlen (+ (- newpt pt) (length chset) 1))
+         (replace-match (format "\e%%/%d%c%c%s"
+                                noctets
+                                (+ (/ textlen 128) 128)
+                                (+ (% textlen 128) 128)
+                                chset)
+                        t t))
+        ((coding-system-p encoding) ; need to recode the entire segment...
+         (set-marker pt (match-beginning 0))
+         (decode-coding-region pt newpt 'ctext-no-compositions)
+         (set-buffer-multibyte t)
+         (encode-coding-region pt newpt encoding)
+         (set-buffer-multibyte nil)
+         (setq textlen (+ (- newpt pt) (length chset) 1))
+         (goto-char pt)
+         (insert (format "\e%%/%d%c%c%s"
+                         noctets
+                         (+ (/ textlen 128) 128)
+                         (+ (% textlen 128) 128)
+                         chset))))
+       (goto-char newpt))))
+   (set-buffer-multibyte t)
+   nil)
+ 
  ;;; FILE I/O
  
  (defcustom auto-coding-alist



reply via email to

[Prev in Thread] Current Thread [Next in Thread]