emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/lisp/international/mule-diag.el,v


From: Miles Bader
Subject: [Emacs-diffs] Changes to emacs/lisp/international/mule-diag.el,v
Date: Fri, 01 Feb 2008 16:03:01 +0000

CVSROOT:        /cvsroot/emacs
Module name:    emacs
Changes by:     Miles Bader <miles>     08/02/01 16:01:31

Index: lisp/international/mule-diag.el
===================================================================
RCS file: /cvsroot/emacs/emacs/lisp/international/mule-diag.el,v
retrieving revision 1.105
retrieving revision 1.106
diff -u -b -r1.105 -r1.106
--- lisp/international/mule-diag.el     8 Jan 2008 20:46:09 -0000       1.105
+++ lisp/international/mule-diag.el     1 Feb 2008 16:01:18 -0000       1.106
@@ -6,6 +6,9 @@
 ;;   2005, 2006, 2007, 2008
 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
 ;;   Registration Number H14PRO021
+;; Copyright (C) 2003
+;;   National Institute of Advanced Industrial Science and Technology (AIST)
+;;   Registration Number H13PRO009
 
 ;; Keywords: multilingual, charset, coding system, fontset, diagnosis, i18n
 
@@ -35,8 +38,8 @@
 
 ;;; General utility function
 
-;; Print all arguments with single space separator in one line.
 (defun print-list (&rest args)
+  "Print all arguments with single space separator in one line."
   (while (cdr args)
     (when (car args)
       (princ (car args))
@@ -45,12 +48,6 @@
   (princ (car args))
   (princ "\n"))
 
-;; Re-order the elements of charset-list.
-(defun sort-charset-list ()
-  (setq charset-list
-       (sort charset-list
-             (lambda (x y) (< (charset-id x) (charset-id y))))))
-
 ;;; CHARSET
 
 (define-button-type 'sort-listed-character-sets
@@ -65,85 +62,9 @@
   'help-echo "mouse-2, RET: show table of characters for this character set")
 
 ;;;###autoload
-(defvar non-iso-charset-alist
-  `((mac-roman
-     (ascii latin-iso8859-1 mule-unicode-2500-33ff
-           mule-unicode-0100-24ff mule-unicode-e000-ffff)
-     mac-roman-decoder
-     ((0 255)))
-    (viscii
-     (ascii vietnamese-viscii-lower vietnamese-viscii-upper)
-     viet-viscii-nonascii-translation-table
-     ((0 255)))
-    (vietnamese-tcvn
-     (ascii vietnamese-viscii-lower vietnamese-viscii-upper)
-     viet-tcvn-nonascii-translation-table
-     ((0 255)))
-    (koi8-r
-     (ascii cyrillic-iso8859-5)
-     cyrillic-koi8-r-nonascii-translation-table
-     ((32 255)))
-    (alternativnyj
-     (ascii cyrillic-iso8859-5)
-     cyrillic-alternativnyj-nonascii-translation-table
-     ((32 255)))
-    (koi8-u
-     (ascii cyrillic-iso8859-5 mule-unicode-0100-24ff)
-     cyrillic-koi8-u-nonascii-translation-table
-     ((32 255)))
-    (big5
-     (ascii chinese-big5-1 chinese-big5-2)
-     decode-big5-char
-     ((32 127)
-      ((?\xA1 ?\xFE) . (?\x40 ?\x7E ?\xA1 ?\xFE))))
-    (sjis
-     (ascii katakana-jisx0201 japanese-jisx0208)
-     decode-sjis-char
-     ((32 127 ?\xA1 ?\xDF)
-      ((?\x81 ?\x9F ?\xE0 ?\xEF) . (?\x40 ?\x7E ?\x80 ?\xFC)))))
-  "Alist of charset names vs the corresponding information.
-This is mis-named for historical reasons.  The charsets are actually
-non-built-in ones.  They correspond to Emacs coding systems, not Emacs
-charsets, i.e. what Emacs can read (or write) by mapping to (or
-from) Emacs internal charsets that typically correspond to a limited
-set of ISO charsets.
-
-Each element has the following format:
-  (CHARSET CHARSET-LIST TRANSLATION-METHOD [ CODE-RANGE ])
-
-CHARSET is the name (symbol) of the charset.
-
-CHARSET-LIST is a list of Emacs charsets into which characters of
-CHARSET are mapped.
-
-TRANSLATION-METHOD is a translation table (symbol) to translate a
-character code of CHARSET to the corresponding Emacs character
-code.  It can also be a function to call with one argument, a
-character code in CHARSET.
-
-CODE-RANGE specifies the valid code ranges of CHARSET.
-It is a list of RANGEs, where each RANGE is of the form:
-  (FROM1 TO1 FROM2 TO2 ...)
-or
-  ((FROM1-1 TO1-1 FROM1-2 TO1-2 ...) . (FROM2-1 TO2-1 FROM2-2 TO2-2 ...))
-In the first form, valid codes are between FROM1 and TO1, or FROM2 and
-TO2, or...
-The second form is used for 2-byte codes.  The car part is the ranges
-of the first byte, and the cdr part is the ranges of the second byte.")
-
-;;;###autoload
 (defun list-character-sets (arg)
   "Display a list of all character sets.
 
-The ID-NUM column contains a charset identification number for
-internal Emacs use.
-
-The MULTIBYTE-FORM column contains the format of the buffer and string
-multibyte sequence of characters in the charset using one to four
-hexadecimal digits.
-  `xx' stands for any byte in the range 0..127.
-  `XX' stands for any byte in the range 160..255.
-
 The D column contains the dimension of this character set.  The CH
 column contains the number of characters in a block of this character
 set.  The FINAL-CHAR column contains an ISO-2022 <final-char> to use
@@ -158,21 +79,19 @@
       (if arg
          (list-character-sets-2)
        ;; Insert header.
-       (insert "Indirectly supported character sets are shown below.\n")
+       (insert "Supplementary character sets are shown below.\n")
        (insert
         (substitute-command-keys
          (concat "Use "
                  (if (display-mouse-p) "\\[help-follow-mouse] or ")
                  "\\[help-follow]:\n")))
        (insert "  on a column title to sort by that title,")
-       (indent-to 56)
+       (indent-to 48)
        (insert "+----DIMENSION\n")
        (insert "  on a charset name to list characters.")
-       (indent-to 56)
+       (indent-to 48)
        (insert "| +--CHARS\n")
-       (let ((columns '(("ID-NUM" . id) "\t"
-                        ("CHARSET-NAME" . name) "\t\t\t"
-                        ("MULTIBYTE-FORM" . id) "\t"
+       (let ((columns '(("CHARSET-NAME" . name) "\t\t\t\t\t"
                         ("D CH FINAL-CHAR" . iso-spec)))
              pos)
          (while columns
@@ -184,127 +103,73 @@
              (goto-char (point-max)))
            (setq columns (cdr columns)))
          (insert "\n"))
-       (insert "------\t------------\t\t\t--------------\t- -- ----------\n")
+       (insert "------------\t\t\t\t\t- --- ----------\n")
 
        ;; Insert body sorted by charset IDs.
-       (list-character-sets-1 'id)
-
-       ;; Insert non-directly-supported charsets.
-       (insert-char ?- 72)
-       (insert "\n\nINDIRECTLY SUPPORTED CHARSETS SETS:\n\n"
-               (propertize "CHARSET NAME\tMAPPED TO" 'face 'bold)
-               "\n------------\t---------\n")
-       (dolist (elt non-iso-charset-alist)
-         (insert-text-button (symbol-name (car elt))
-                             :type 'list-charset-chars
-                             'help-args (list (car elt)))
-         (indent-to 16)
-         (dolist (e (nth 1 elt))
-           (when (>= (+ (current-column) 1 (string-width (symbol-name e)))
-                     ;; This is an approximate value.  We don't know
-                     ;; the correct window width of this buffer yet.
-                     78)
-             (insert "\n")
-             (indent-to 16))
-
-           (insert (format "%s " e)))
-         (insert "\n"))))))
+       (list-character-sets-1 'name)))))
 
 (defun sort-listed-character-sets (sort-key)
   (if sort-key
       (save-excursion
-       (help-setup-xref (list #'list-character-sets nil) t)
        (let ((buffer-read-only nil))
          (goto-char (point-min))
-         (re-search-forward "[0-9][0-9][0-9]")
-         (beginning-of-line)
-         (let ((pos (point)))
-           (search-forward "----------")
-           (beginning-of-line)
-           (save-restriction
-             (narrow-to-region pos (point))
-             (delete-region (point-min) (point-max))
-             (list-character-sets-1 sort-key)))))))
-
-(defun charset-multibyte-form-string (charset)
-  (let ((info (charset-info charset)))
-    (cond ((eq charset 'ascii)
-          "xx")
-         ((eq charset 'eight-bit-control)
-          (format "%2X Xx" (aref info 6)))
-         ((eq charset 'eight-bit-graphic)
-          "XX")
-         (t
-          (let ((str (format "%2X" (aref info 6))))
-            (if (> (aref info 7) 0)
-                (setq str (format "%s %2X"
-                                  str (aref info 7))))
-            (setq str (concat str " XX"))
-            (if (> (aref info 2) 1)
-                (setq str (concat str " XX")))
-            str)))))
-
-;; Insert a list of character sets sorted by SORT-KEY.  SORT-KEY
-;; should be one of `id', `name', and `iso-spec'.  If SORT-KEY is nil,
-;; it defaults to `id'.
+         (search-forward "\n-")
+         (forward-line 1)
+         (delete-region (point) (point-max))
+         (list-character-sets-1 sort-key)))))
 
 (defun list-character-sets-1 (sort-key)
+  "Insert a list of character sets sorted by SORT-KEY.
+SORT-KEY should be `name' or `iso-spec' (default `name')."
   (or sort-key
-      (setq sort-key 'id))
-  (let ((tail (charset-list))
-       charset-info-list elt charset info sort-func)
-    (while tail
-      (setq charset (car tail) tail (cdr tail)
-           info (charset-info charset))
-
+      (setq sort-key 'name))
+  (let ((tail charset-list)
+       charset-info-list supplementary-list charset sort-func)
+    (dolist (charset charset-list)
       ;; Generate a list that contains all information to display.
-      (setq charset-info-list
-           (cons (list (charset-id charset)    ; ID-NUM
-                       charset                 ; CHARSET-NAME
-                       (charset-multibyte-form-string charset); MULTIBYTE-FORM
-                       (aref info 2)           ; DIMENSION
-                       (aref info 3)           ; CHARS
-                       (aref info 8)           ; FINAL-CHAR
-                       )
-                 charset-info-list)))
+      (let ((elt (list charset
+                      (charset-dimension charset)
+                      (charset-chars charset)
+                      (charset-iso-final-char charset))))
+       (if (plist-get (charset-plist charset) :supplementary-p)
+           (push elt supplementary-list)
+         (push elt charset-info-list))))
 
     ;; Determine a predicate for `sort' by SORT-KEY.
     (setq sort-func
-         (cond ((eq sort-key 'id)
-                (lambda (x y) (< (car x) (car y))))
-
-               ((eq sort-key 'name)
-                (lambda (x y) (string< (nth 1 x) (nth 1 y))))
+         (cond ((eq sort-key 'name)
+                (lambda (x y) (string< (car x) (car y))))
 
                ((eq sort-key 'iso-spec)
                 ;; Sort by DIMENSION CHARS FINAL-CHAR
+                (function
                 (lambda (x y)
-                  (or (< (nth 3 x) (nth 3 y))
-                      (and (= (nth 3 x) (nth 3 y))
-                           (or (< (nth 4 x) (nth 4 y))
-                               (and (= (nth 4 x) (nth 4 y))
-                                    (< (nth 5 x) (nth 5 y))))))))
+                   (or (< (nth 1 x) (nth 1 y))
+                       (and (= (nth 1 x) (nth 1 y))
+                            (or (< (nth 2 x) (nth 2 y))
+                                (and (= (nth 2 x) (nth 2 y))
+                                     (< (nth 3 x) (nth 3 y)))))))))
                (t
                 (error "Invalid charset sort key: %s" sort-key))))
 
     (setq charset-info-list (sort charset-info-list sort-func))
+    (setq supplementary-list (sort supplementary-list sort-func))
 
     ;; Insert information of character sets.
-    (while charset-info-list
-      (setq elt (car charset-info-list)
-           charset-info-list (cdr charset-info-list))
-      (insert (format "%03d(%02X)" (car elt) (car elt))) ; ID-NUM
-      (indent-to 8)
-      (insert-text-button (symbol-name (nth 1 elt))
+    (dolist (elt (append charset-info-list (list t) supplementary-list))
+      (if (eq elt t)
+         (insert "-------------- Supplementary Character Sets --------------")
+       (insert-text-button (symbol-name (car elt)) ; NAME
                          :type 'list-charset-chars
-                         'help-args (list (nth 1 elt)))
+                           'help-args (list (car elt)))
       (goto-char (point-max))
       (insert "\t")
-      (indent-to 40)
-      (insert (nth 2 elt))             ; MULTIBYTE-FORM
-      (indent-to 56)
-      (insert (format "%d %2d " (nth 3 elt) (nth 4 elt)) ; DIMENSION and CHARS
-             (if (< (nth 5 elt) 0) "none" (nth 5 elt))) ; FINAL-CHAR
+       (indent-to 48)
+       (insert (format "%d %3d "
+                       (nth 1 elt) (nth 2 elt)) ; DIMENSION and CHARS
+               (if (< (nth 3 elt) 0)
+                   "none"
+                 (nth 3 elt))))        ; FINAL-CHAR
       (insert "\n"))))
 
 
@@ -316,42 +181,37 @@
 ## Each line corresponds to one charset.
 ## The following attributes are listed in this order
 ## separated by a colon `:' in one line.
-##     CHARSET-ID,
 ##     CHARSET-SYMBOL-NAME,
 ##     DIMENSION (1 or 2)
 ##     CHARS (94 or 96)
-##     BYTES (of multibyte form: 1, 2, 3, or 4),
-##     WIDTH (occupied column numbers: 1 or 2),
-##     DIRECTION (0:left-to-right, 1:right-to-left),
 ##     ISO-FINAL-CHAR (character code of ISO-2022's final character)
-##     ISO-GRAPHIC-PLANE (ISO-2022's graphic plane, 0:GL, 1:GR)
+##             -1 means that no final character is assigned.
 ##     DESCRIPTION (describing string of the charset)
 ")
   (let ((l charset-list)
        charset)
     (while l
       (setq charset (car l) l (cdr l))
-      (princ (format "%03d:%s:%d:%d:%d:%d:%d:%d:%d:%s\n"
-                    (charset-id charset)
+      (princ (format "%s:%d:%d:%d:%s\n"
                     charset
                     (charset-dimension charset)
                     (charset-chars charset)
-                    (charset-bytes charset)
-                    (charset-width charset)
-                    (charset-direction charset)
+;;;                 (char-width (make-char charset))
+;;;                 (charset-direction charset)
                     (charset-iso-final-char charset)
-                    (charset-iso-graphic-plane charset)
+;;;                 (charset-iso-graphic-plane charset)
                     (charset-description charset))))))
 
+(defvar non-iso-charset-alist nil
+  "Obsolete.")
+(make-obsolete-variable 'non-iso-charset-alist "no longer relevant" "23.1")
+
 (defun decode-codepage-char (codepage code)
   "Decode a character that has code CODE in CODEPAGE.
 Return a decoded character string.  Each CODEPAGE corresponds to a
-coding system cpCODEPAGE."
-  (let ((coding-system (intern (format "cp%d" codepage))))
-    (or (coding-system-p coding-system)
-       (codepage-setup codepage))
-    (string-to-char
-     (decode-coding-string (char-to-string code) coding-system))))
+coding system cpCODEPAGE.  This function is obsolete."
+  (decode-char (intern (format "cp%d" codepage)) code))
+(make-obsolete 'decode-codepage-char 'decode-char "23.1")
 
 ;; A variable to hold charset input history.
 (defvar charset-history nil)
@@ -360,210 +220,172 @@
 ;;;###autoload
 (defun read-charset (prompt &optional default-value initial-input)
   "Read a character set from the minibuffer, prompting with string PROMPT.
-It must be an Emacs character set listed in the variable `charset-list'
-or a non-ISO character set listed in the variable
-`non-iso-charset-alist'.
+It must be an Emacs character set listed in the variable `charset-list'.
 
 Optional arguments are DEFAULT-VALUE and INITIAL-INPUT.
 DEFAULT-VALUE, if non-nil, is the default value.
 INITIAL-INPUT, if non-nil, is a string inserted in the minibuffer initially.
 See the documentation of the function `completing-read' for the
 detailed meanings of these arguments."
-  (let* ((table (append (mapcar (lambda (x) (list (symbol-name x)))
-                               charset-list)
-                       (mapcar (lambda (x) (list (symbol-name (car x))))
-                               non-iso-charset-alist)))
+  (let* ((table (mapcar (lambda (x) (list (symbol-name x))) charset-list))
         (charset (completing-read prompt table
                                   nil t initial-input 'charset-history
                                   default-value)))
     (if (> (length charset) 0)
        (intern charset))))
 
-
 ;; List characters of the range MIN and MAX of CHARSET.  If dimension
 ;; of CHARSET is two (i.e. 2-byte charset), ROW is the first byte
 ;; (block index) of the characters, and MIN and MAX are the second
 ;; bytes of the characters.  If the dimension is one, ROW should be 0.
-;; For a non-ISO charset, CHARSET is a translation table (symbol) or a
-;; function to get Emacs' character codes that corresponds to the
-;; characters to list.
 
 (defun list-block-of-chars (charset row min max)
   (let (i ch)
-    (insert-char ?- (+ 4 (* 3 16)))
+    (insert-char ?- (+ 7 (* 4 16)))
     (insert "\n    ")
     (setq i 0)
     (while (< i 16)
-      (insert (format "%3X" i))
+      (insert (format "%4X" i))
       (setq i (1+ i)))
     (setq i (* (/ min 16) 16))
     (while (<= i max)
       (if (= (% i 16) 0)
-         (insert (format "\n%3Xx" (/ (+ (* row 256) i) 16))))
-      (setq ch (cond ((< i min)
-                     32)
-                    ((charsetp charset)
-                     (if (= row 0)
-                         (make-char charset i)
-                       (make-char charset row i)))
-                    ((and (symbolp charset) (get charset 'translation-table))
-                     (aref (get charset 'translation-table) i))
-                    (t (funcall charset (+ (* row 256) i)))))
-      (if (and (char-table-p charset)
-              (or (< ch 32) (and (>= ch 127) (<= ch 255))))
-         ;; Don't insert a control code.
-         (setq ch 32))
-      (unless ch (setq ch 32))
-      (if (eq ch ?\t)
-         ;; Make it visible.
-         (setq ch (propertize "\t" 'display "^I")))
-      ;; This doesn't DTRT.  Maybe it's better to insert "^J" and not
-      ;; worry about the buffer contents not being correct.
-;;;       (if (eq ch ?\n)
-;;;    (setq ch (propertize "\n" 'display "^J")))
-      (indent-to (+ (* (% i 16) 3) 6))
-      (insert ch)
+         (insert (format "\n%6Xx" (/ (+ (* row 256) i) 16))))
+      (setq ch (if (< i min)
+                  32
+                (or (decode-char charset (+ (* row 256) i))
+                    32)))              ; gap in mapping
+      ;; Don't insert control codes, non-Unicode characters.
+      (if (or (< ch 32) (= ch 127))
+         (setq ch (single-key-description ch))
+       (if (and (>= ch 128) (< ch 160))
+           (setq ch (format "%02Xh" ch))
+         (if (> ch #x10FFFF)
+             (setq ch 32))))
+      (insert "\t" ch)
       (setq i (1+ i))))
   (insert "\n"))
 
-(defun list-iso-charset-chars (charset)
-  (let ((dim (charset-dimension charset))
-       (chars (charset-chars charset))
-       (plane (charset-iso-graphic-plane charset))
-       min max)
-    (insert (format "Characters in the coded character set %s.\n" charset))
-
-    (cond ((eq charset 'eight-bit-control)
-          (setq min 128 max 159))
-         ((eq charset 'eight-bit-graphic)
-          (setq min 160 max 255))
-         (t
-          (if (= chars 94)
-              (setq min 33 max 126)
-            (setq min 32 max 127))
-          (or (= plane 0)
-              (setq min (+ min 128) max (+ max 128)))))
-
-    (if (= dim 1)
-       (list-block-of-chars charset 0 min max)
-      (let ((i min))
-       (while (<= i max)
-         (list-block-of-chars charset i min max)
-         (setq i (1+ i)))))))
-
-(defun list-non-iso-charset-chars (charset)
-  "List all characters in non-built-in coded character set CHARSET."
-  (let* ((slot (assq charset non-iso-charset-alist))
-        (charsets (nth 1 slot))
-        (translate-method (nth 2 slot))
-        (ranges (nth 3 slot))
-        range)
-    (or slot
-       (error "Unknown character set: %s" charset))
-    (insert (format "Characters in the coded character set %s.\n" charset))
-    (if charsets
-       (insert "They are mapped to: "
-               (mapconcat #'symbol-name charsets ", ")
-               "\n"))
-    (while ranges
-      (setq range (pop ranges))
-      (if (integerp (car range))
-         ;; The form of RANGES is (FROM1 TO1 FROM2 TO2 ...).
-         (if (and (not (functionp translate-method))
-                  (< (car (last range)) 256))
-             ;; Do it all in one block to avoid the listing being
-             ;; broken up at gaps in the range.  Don't do that for
-             ;; function translate-method, since not all codes in
-             ;; that range may be valid.
-             (list-block-of-chars translate-method
-                                  0 (car range) (car (last range)))
-           (while range
-             (list-block-of-chars translate-method
-                                  0 (car range) (nth 1 range))
-             (setq range (nthcdr 2 range))))
-       ;; The form of RANGES is ((FROM1-1 TO1-1 ...) . (FROM2-1 TO2-1 ...)).
-       (let ((row-range (car range))
-             row row-max
-             col-range col col-max)
-         (while row-range
-           (setq row (car row-range) row-max (nth 1 row-range)
-                 row-range (nthcdr 2 row-range))
-           (while (<= row row-max)
-             (setq col-range (cdr range))
-             (while col-range
-               (setq col (car col-range) col-max (nth 1 col-range)
-                     col-range (nthcdr 2 col-range))
-               (list-block-of-chars translate-method row col col-max))
-             (setq row (1+ row)))))))))
-
-
 ;;;###autoload
 (defun list-charset-chars (charset)
-  "Display a list of characters in the specified character set.
-This can list both Emacs `official' (ISO standard) charsets and the
-characters encoded by various Emacs coding systems which correspond to
-PC `codepages' and other coded character sets.  See `non-iso-charset-alist'."
+  "Display a list of characters in character set CHARSET."
   (interactive (list (read-charset "Character set: ")))
+  (or (charsetp charset)
+      (error "Invalid character set: %s" charset))
   (with-output-to-temp-buffer "*Character List*"
     (with-current-buffer standard-output
+      (if (coding-system-p charset)
+         ;; Useful to be able to do C-u C-x = to find file code, for
+         ;; instance:
+         (set-buffer-file-coding-system charset))
       (setq mode-line-format (copy-sequence mode-line-format))
       (let ((slot (memq 'mode-line-buffer-identification mode-line-format)))
        (if slot
            (setcdr slot
                    (cons (format " (%s)" charset)
                          (cdr slot)))))
-      (setq indent-tabs-mode nil)
+      (setq tab-width 4)
       (set-buffer-multibyte t)
-      (cond ((charsetp charset)
-            (list-iso-charset-chars charset))
-           ((assq charset non-iso-charset-alist)
-            (list-non-iso-charset-chars charset))
-           (t
-            (error "Invalid character set %s" charset))))))
+      (let ((dim (charset-dimension charset))
+           (chars (charset-chars charset))
+           ;;  (plane (charset-iso-graphic-plane charset))
+           (plane 1)
+           (range (plist-get (charset-plist charset) :code-space))
+           min max min2 max2)
+       (if (> dim 2)
+           (error "Can only list 1- and 2-dimensional charsets"))
+       (insert (format "Characters in the coded character set %s.\n" charset))
+       (narrow-to-region (point) (point))
+       (setq min (aref range 0)
+             max (aref range 1))
+       (if (= dim 1)
+           (list-block-of-chars charset 0 min max)
+         (setq min2 (aref range 2)
+               max2 (aref range 3))
+         (let ((i min2))
+           (while (<= i max2)
+             (list-block-of-chars charset i min max)
+             (setq i (1+ i)))))
+       (put-text-property (point-min) (point-max) 'charset charset)
+       (widen)))))
 
 
 ;;;###autoload
 (defun describe-character-set (charset)
   "Display information about built-in character set CHARSET."
-  (interactive (list (let ((non-iso-charset-alist nil))
-                      (read-charset "Charset: "))))
+  (interactive (list (read-charset "Charset: ")))
   (or (charsetp charset)
       (error "Invalid charset: %S" charset))
-  (let ((info (charset-info charset)))
     (help-setup-xref (list #'describe-character-set charset) (interactive-p))
     (with-output-to-temp-buffer (help-buffer)
       (with-current-buffer standard-output
-       (insert "Character set: " (symbol-name charset)
-               (format " (ID:%d)\n\n" (aref info 0)))
-       (insert (aref info 13) "\n\n")  ; description
-       (insert "Number of contained characters: "
-               (if (= (aref info 2) 1)
-                   (format "%d\n" (aref info 3))
-                 (format "%dx%d\n" (aref info 3) (aref info 3))))
+      (insert "Character set: " (symbol-name charset))
+      (let ((name (get-charset-property charset :name)))
+       (if (not (eq name charset))
+           (insert " (alias of " (symbol-name name) ?\))))
+      (insert "\n\n" (charset-description charset) "\n\n")
+      (insert "Number of contained characters: ")
+      (dotimes (i (charset-dimension charset))
+       (unless (= i 0)
+         (insert ?x))
+       (insert (format "%d" (charset-chars charset (1+ i)))))
+      (insert ?\n)
+      (let ((char (charset-iso-final-char charset)))
+       (when (> char 0)
        (insert "Final char of ISO2022 designation sequence: ")
-       (if (>= (aref info 8) 0)
-           (insert (format "`%c'\n" (aref info 8)))
-         (insert "not assigned\n"))
+         (insert (format "`%c'\n" char))))
        (insert (format "Width (how many columns on screen): %d\n"
-                       (aref info 4)))
-       (insert (format "Internal multibyte sequence: %s\n"
-                       (charset-multibyte-form-string charset)))
-       (let ((coding (plist-get (aref info 14) 'preferred-coding-system)))
-         (when coding
-           (insert (format "Preferred coding system: %s\n" coding))
-           (search-backward (symbol-name coding))
-           (help-xref-button 0 'help-coding-system coding)))))))
+                     (aref char-width-table (make-char charset))))
+      (let (aliases)
+       (dolist (c charset-list)
+         (if (and (not (eq c charset))
+                  (eq charset (get-charset-property c :name)))
+             (push c aliases)))
+       (if aliases
+           (insert "Aliases: " (mapconcat #'symbol-name aliases ", ") ?\n)))
+      
+      (dolist (elt `((:ascii-compatible-p "ASCII compatible." nil)
+                    (:map "Map file: " identity)
+                    (:unify-map "Unification map file: " identity)
+                    (:invalid-code
+                     nil
+                     ,(lambda (c)
+                        (format "Invalid character: %c (code %d)" c c)))
+                    (:emacs-mule-id "Id in emacs-mule coding system: "
+                                    number-to-string)
+                    (:parents "Parents: "
+                              (lambda (parents)
+                                (mapconcat ,(lambda (elt)
+                                              (format "%s" elt))
+                                           parents
+                                           ", ")))
+                    (:code-space "Code space: " ,(lambda (c)
+                                                   (format "%s" c)))
+                    (:code-offset "Code offset: " number-to-string)
+                    (:iso-revision-number "ISO revision number: "
+                                          number-to-string)
+                    (:supplementary-p
+                     "Used only as a parent of some other charset." nil)))
+       (let ((val (get-charset-property charset (car elt))))
+         (when val
+           (if (cadr elt) (insert (cadr elt)))
+           (if (nth 2 elt)
+               (insert (funcall (nth 2 elt) val)))
+           (insert ?\n)))))))
 
 ;;; CODING-SYSTEM
 
-;; Print information of designation of each graphic register in FLAGS
-;; in human readable format.  See the documentation of
-;; `make-coding-system' for the meaning of FLAGS.
-(defun print-designation (flags)
-  (let ((graphic-register 0)
-       charset)
-    (while (< graphic-register 4)
-      (setq charset (aref flags graphic-register))
+(eval-when-compile                     ; dynamic bondage
+  (defvar graphic-register))
+
+;; Print information about designation of each graphic register in
+;; DESIGNATIONS in human readable format.  See the documentation of
+;; `define-coding-system' for the meaning of DESIGNATIONS
+;; (`:designation' property).
+(defun print-designation (designations)
+  (let (charset)
+    (dotimes (graphic-register 4)
+      (setq charset (aref designations graphic-register))
       (princ (format
              "  G%d -- %s\n"
              graphic-register
@@ -593,8 +415,7 @@
                                (charset-description (car charset)))))
                (t
                 "invalid designation information"))
-         (setq charset (cdr charset))))
-      (setq graphic-register (1+ graphic-register)))))
+         (setq charset (cdr charset)))))))
 
 ;;;###autoload
 (defun describe-coding-system (coding-system)
@@ -606,55 +427,39 @@
                     (interactive-p))
     (with-output-to-temp-buffer (help-buffer)
       (print-coding-system-briefly coding-system 'doc-string)
-      (princ "\n")
-      (let ((vars (coding-system-get coding-system 'dependency)))
-       (when vars
-         (princ "See also the documentation of these customizable variables
-which alter the behavior of this coding system.\n")
-         (dolist (v vars)
-           (princ "  `")
-           (princ v)
-           (princ "'\n"))
-         (princ "\n")))
-
-      (princ "Type: ")
       (let ((type (coding-system-type coding-system))
-           (flags (coding-system-flags coding-system)))
+           ;; Fixme: use this
+           (extra-spec (coding-system-plist coding-system)))
+       (princ "Type: ")
        (princ type)
-       (cond ((eq type nil)
-              (princ " (do no conversion)"))
-             ((eq type t)
+       (cond ((eq type 'undecided)
               (princ " (do automatic conversion)"))
-             ((eq type 0)
-              (princ " (Emacs internal multibyte form)"))
-             ((eq type 1)
+             ((eq type 'utf-8)
+              (princ " (UTF-8: Emacs internal multibyte form)"))
+             ((eq type 'utf-16)
+              ;; (princ " (UTF-16)")
+              )
+             ((eq type 'shift-jis)
               (princ " (Shift-JIS, MS-KANJI)"))
-             ((eq type 2)
+             ((eq type 'iso-2022)
               (princ " (variant of ISO-2022)\n")
               (princ "Initial designations:\n")
-              (print-designation flags)
-              (princ "Other Form: \n  ")
-              (princ (if (aref flags 4) "short-form" "long-form"))
-              (if (aref flags 5) (princ ", address@hidden"))
-              (if (aref flags 6) (princ ", address@hidden"))
-              (princ (if (aref flags 7) ", 7-bit" ", 8-bit"))
-              (if (aref flags 8) (princ ", use-locking-shift"))
-              (if (aref flags 9) (princ ", use-single-shift"))
-              (if (aref flags 10) (princ ", use-roman"))
-              (if (aref flags 11) (princ ", use-old-jis"))
-              (if (aref flags 12) (princ ", no-ISO6429"))
-              (if (aref flags 13) (princ ", init-bol"))
-              (if (aref flags 14) (princ ", designation-bol"))
-              (if (aref flags 15) (princ ", convert-unsafe"))
-              (if (aref flags 16) (princ ", accept-latin-extra-code"))
-              (princ "."))
-             ((eq type 3)
-              (princ " (Big5)"))
-             ((eq type 4)
+              (print-designation (coding-system-get coding-system
+                                                    :designation))
+
+              (when (coding-system-get coding-system :flags)
+                (princ "Other specifications: \n  ")
+                (apply #'print-list
+                       (coding-system-get coding-system :flags))))
+             ((eq type 'charset)
+              (princ " (charset)"))
+             ((eq type 'ccl)
               (princ " (do conversion by CCL program)"))
-             ((eq type 5)
+             ((eq type 'raw-text)
               (princ " (text with random binary characters)"))
-             (t (princ ": invalid coding-system."))))
+             ((eq type 'emacs-mule)
+              (princ " (Emacs 21 internal encoding)"))
+             (t (princ ": invalid coding-system.")))
       (princ "\nEOL type: ")
       (let ((eol-type (coding-system-eol-type coding-system)))
        (cond ((vectorp eol-type)
@@ -664,15 +469,15 @@
              ((or (null eol-type) (eq eol-type 0)) (princ "LF\n"))
              ((eq eol-type 1) (princ "CRLF\n"))
              ((eq eol-type 2) (princ "CR\n"))
-             (t (princ "invalid\n"))))
-      (let ((postread (coding-system-get coding-system 'post-read-conversion)))
+               (t (princ "invalid\n")))))
+      (let ((postread (coding-system-get coding-system :post-read-conversion)))
        (when postread
          (princ "After decoding text normally,")
          (princ " perform post-conversion using the function: ")
          (princ "\n  ")
          (princ postread)
          (princ "\n")))
-      (let ((prewrite (coding-system-get coding-system 'pre-write-conversion)))
+      (let ((prewrite (coding-system-get coding-system :pre-write-conversion)))
        (when prewrite
          (princ "Before encoding text normally,")
          (princ " perform pre-conversion using the function: ")
@@ -680,21 +485,23 @@
          (princ prewrite)
          (princ "\n")))
       (with-current-buffer standard-output
-       (let ((charsets (coding-system-get coding-system 'safe-charsets)))
-         (when (and (not (memq (coding-system-base coding-system)
-                               '(raw-text emacs-mule)))
+       (let ((charsets (coding-system-charset-list coding-system)))
+         (when (and (not (eq (coding-system-base coding-system) 'raw-text))
                     charsets)
-           (if (eq charsets t)
-               (insert "This coding system can encode all charsets except for
-eight-bit-control and eight-bit-graphic.\n")
+           (cond
+            ((eq charsets 'iso-2022)
+             (insert "This coding system can encode all ISO 2022 charsets."))
+            ((eq charsets 'emacs-mule)
+             (insert "This coding system can encode all emacs-mule charsets\
+."""))
+            (t
              (insert "This coding system encodes the following charsets:\n ")
              (while charsets
                (insert " " (symbol-name (car charsets)))
                (search-backward (symbol-name (car charsets)))
                (help-xref-button 0 'help-character-set (car charsets))
                (goto-char (point-max))
-               (setq charsets (cdr charsets))))))))))
-
+               (setq charsets (cdr charsets)))))))))))
 
 ;;;###autoload
 (defun describe-current-coding-system-briefly ()
@@ -742,18 +549,17 @@
      (coding-system-eol-type-mnemonic (cdr default-process-coding-system))
      )))
 
-;; Print symbol name and mnemonic letter of CODING-SYSTEM with `princ'.
-;; If DOC-STRING is non-nil, print also the docstring of CODING-SYSTEM.
-;; If DOC-STRING is `tightly', don't print an empty line before the
-;; docstring, and print only the first line of the docstring.
-
 (defun print-coding-system-briefly (coding-system &optional doc-string)
+  "Print symbol name and mnemonic letter of CODING-SYSTEM with `princ'.
+If DOC-STRING is non-nil, print also the docstring of CODING-SYSTEM.
+If DOC-STRING is `tightly', don't print an empty line before the
+docstring, and print only the first line of the docstring."
   (if (not coding-system)
       (princ "nil\n")
     (princ (format "%c -- %s"
                   (coding-system-mnemonic coding-system)
                   coding-system))
-    (let ((aliases (coding-system-get coding-system 'alias-coding-systems)))
+    (let ((aliases (coding-system-aliases coding-system)))
       (cond ((eq coding-system (car aliases))
             (if (cdr aliases)
                 (princ (format " %S" (cons 'alias: (cdr aliases))))))
@@ -795,6 +601,8 @@
       (print-coding-system-briefly (keyboard-coding-system))
       (princ "Coding system for terminal output:\n  ")
       (print-coding-system-briefly (terminal-coding-system))
+      (princ "Coding system for inter-client cut and paste:\n  ")
+      (print-coding-system-briefly selection-coding-system)
       (when (get-buffer-process (current-buffer))
        (princ "Coding systems for process I/O:\n")
        (princ "  encoding input to the process: ")
@@ -811,30 +619,23 @@
 
       (princ "
 Priority order for recognizing coding systems when reading files:\n")
-      (let ((l coding-category-list)
-           (i 1)
-           (coding-list nil)
-           coding aliases)
-       (while l
-         (setq coding (symbol-value (car l)))
-         ;; Do not list up the same coding system twice.
-         (when (and coding (not (memq coding coding-list)))
-           (setq coding-list (cons coding coding-list))
-           (princ (format "  %d. %s " i coding))
-           (setq aliases (coding-system-get coding 'alias-coding-systems))
-           (if (eq coding (car aliases))
+      (let ((i 1))
+       (dolist (elt (coding-system-priority-list))
+         (princ (format "  %d. %s " i elt))
+         (let ((aliases (coding-system-aliases elt)))
+           (if (eq elt (car aliases))
                (if (cdr aliases)
                    (princ (cons 'alias: (cdr aliases))))
-             (if (memq coding aliases)
-                 (princ (list 'alias 'of (car aliases)))))
+             (princ (list 'alias 'of (car aliases))))
            (terpri)
-           (setq i (1+ i)))
-         (setq l (cdr l))))
+           (setq i (1+ i)))))
 
       (princ "\n  Other coding systems cannot be distinguished automatically
   from these, and therefore cannot be recognized automatically
   with the present coding system priorities.\n\n")
 
+      ;; Fixme: should this be replaced or junked?
+      (if nil
       (let ((categories '(coding-category-iso-7 coding-category-iso-7-else))
            coding-system codings)
        (while categories
@@ -842,9 +643,9 @@
          (mapc
           (lambda (x)
             (if (and (not (eq x coding-system))
-                     (coding-system-get x 'no-initial-designation)
-                     (let ((flags (coding-system-flags x)))
-                       (not (or (aref flags 10) (aref flags 11)))))
+                      (let ((flags (coding-system-get :flags)))
+                        (not (or (memq 'use-roman flags)
+                                 (memq 'use-oldjis flags)))))
                 (setq codings (cons x codings))))
           (get (car categories) 'coding-systems))
          (if codings
@@ -862,7 +663,7 @@
                    (goto-char (point-max)))
                  (setq codings (cdr codings)))
                (insert "\n\n")))
-         (setq categories (cdr categories))))
+         (setq categories (cdr categories)))))
 
       (princ "Particular coding systems specified for certain file names:\n")
       (terpri)
@@ -887,12 +688,12 @@
        (funcall func "Network I/O" network-coding-system-alist))
       (help-mode))))
 
-;; Print detailed information on CODING-SYSTEM.
 (defun print-coding-system (coding-system)
+  "Print detailed information on CODING-SYSTEM."
   (let ((type (coding-system-type coding-system))
        (eol-type (coding-system-eol-type coding-system))
-       (flags (coding-system-flags coding-system))
-       (aliases (coding-system-get coding-system 'alias-coding-systems)))
+       (flags (coding-system-get coding-system :flags))
+       (aliases (coding-system-aliases coding-system)))
     (if (not (eq (car aliases) coding-system))
        (princ (format "%s (alias of %s)\n" coding-system (car aliases)))
       (princ coding-system)
@@ -905,7 +706,7 @@
                     type
                     (coding-system-mnemonic coding-system)
                     (if (integerp eol-type) eol-type 3)))
-      (cond ((eq type 2)               ; ISO-2022
+      (cond ((eq type 'iso2022)
             (let ((idx 0)
                   charset)
               (while (< idx 4)
@@ -932,7 +733,7 @@
                 (princ ",")
                 (setq idx (1+ idx)))
               (princ (if (aref flags idx) 1 0))))
-           ((eq type 4)                ; CCL
+           ((eq type 'ccl)
             (let (i len)
               (if (symbolp (car flags))
                   (princ (format " %s" (car flags)))
@@ -999,19 +800,9 @@
   (dolist (coding-system (sort-coding-systems (coding-system-list 'base-only)))
     (if (null arg)
        (print-coding-system-briefly coding-system 'tightly)
-      (print-coding-system coding-system)))
-  (let ((first t))
-    (dolist (elt coding-system-alist)
-      (unless (memq (intern (car elt)) coding-system-list)
-       (when first
-         (princ "\
-####################################################
-# The following coding systems are not yet loaded. #
-####################################################
-")
-         (setq first nil))
-       (princ-list (car elt))))))
+      (print-coding-system coding-system))))
 
+;; Fixme: delete?
 ;;;###autoload
 (defun list-coding-categories ()
   "Display a list of all coding categories."
@@ -1029,8 +820,8 @@
 
 ;;; FONT
 
-;; Print information of a font in FONTINFO.
 (defun describe-font-internal (font-info &optional verbose)
+  "Print information about a font in FONT-INFO."
   (print-list "name (opened by):" (aref font-info 0))
   (print-list "       full name:" (aref font-info 1))
   (print-list "            size:" (format "%2d" (aref font-info 2)))
@@ -1065,76 +856,84 @@
       (with-output-to-temp-buffer "*Help*"
        (describe-font-internal font-info 'verbose)))))
 
-(defun print-fontset (fontset &optional print-fonts)
+(defun print-fontset-element (val)
+  ;; VAL has this format:
+  ;;  ((REQUESTED-FONT-NAME OPENED-FONT-NAME ...) ...)
+  ;; CHAR RANGE is already inserted.  Get character codes from
+  ;; the current line.
+  (beginning-of-line)
+  (let ((from (following-char))
+       (to (if (looking-at "[^.]*[.]* ")
+               (char-after (match-end 0)))))
+    (if (re-search-forward "[ \t]*$" nil t)
+       (delete-region (match-beginning 0) (match-end 0)))
+
+    ;; For non-ASCII characters, insert also CODE RANGE.
+    (if (or (>= from 128) (and to (>= to 128)))
+       (if to
+           (insert (format " (#x%02X .. #x%02X)" from to))
+         (insert (format " (#x%02X)" from))))
+
+    ;; Insert a requested font name.
+    (dolist (elt val)
+      (let ((requested (car elt)))
+       (if (stringp requested)
+           (insert "\n    " requested)
+         (let (family registry weight slant width adstyle)
+           (if (and (fboundp 'fontp) (fontp requested))
+               (setq family (font-get requested :family)
+                     registry (font-get requested :registry)
+                     weight (font-get requested :weight)
+                     slant (font-get requested :slant)
+                     width (font-get requested :width)
+                     adstyle (font-get requested :adstyle))
+             (setq family (aref requested 0)
+                   registry (aref requested 5)
+                   weight (aref requested 1)
+                   slant (aref requested 2)
+                   width (aref requested 3)
+                   adstyle (aref requested 4)))
+           (if (not family)
+               (setq family "*-*")
+             (if (symbolp family)
+                 (setq family (symbol-name family)))
+             (or (string-match "-" family)
+                 (setq family (concat "*-" family))))
+           (if (not registry)
+               (setq registry "*-*")
+             (if (symbolp registry)
+                 (setq registry (symbol-name registry)))
+             (or (string-match "-" registry)
+                 (= (aref registry (1- (length registry))) ?*)
+                 (setq registry (concat registry "*"))))
+           (insert (format"\n    -%s-%s-%s-%s-%s-*-*-*-*-*-*-%s"
+                          family (or weight "*") (or slant "*") (or width "*")
+                          (or adstyle "*") registry)))))
+
+      ;; Insert opened font names (if any).
+      (if (and (boundp 'print-opened) (symbol-value 'print-opened))
+         (dolist (opened (cdr elt))
+           (insert "\n\t[" opened "]"))))))
+
+(defun print-fontset (fontset &optional print-opened)
   "Print information about FONTSET.
 If FONTSET is nil, print information about the default fontset.
-If optional arg PRINT-FONTS is non-nil, also print names of all opened
+If optional arg PRINT-OPENED is non-nil, also print names of all opened
 fonts for FONTSET.  This function actually inserts the information in
 the current buffer."
   (or fontset
       (setq fontset (query-fontset "fontset-default")))
-  (let ((tail (aref (fontset-info fontset) 2))
-       elt chars font-spec opened prev-charset charset from to)
     (beginning-of-line)
     (insert "Fontset: " fontset "\n")
-    (insert "CHARSET or CHAR RANGE")
-    (indent-to 24)
-    (insert "FONT NAME\n")
-    (insert "---------------------")
-    (indent-to 24)
-    (insert "---------")
-    (insert "\n")
-    (while tail
-      (setq elt (car tail) tail (cdr tail))
-      (setq chars (car elt) font-spec (car (cdr elt)) opened (cdr (cdr elt)))
-      (if (symbolp chars)
-         (setq charset chars from nil to nil)
-       (if (integerp chars)
-           (setq charset (char-charset chars) from chars to chars)
-         (setq charset (char-charset (car chars))
-               from (car chars) to (cdr chars))))
-      (unless (eq charset prev-charset)
-       (insert (symbol-name charset))
-       (if from
-           (insert "\n")))
-      (when from
-       (let ((split (split-char from)))
-         (if (and (= (charset-dimension charset) 2)
-                  (= (nth 2 split) 0))
-             (setq from
-                   (make-char charset (nth 1 split)
-                              (if (= (charset-chars charset) 94) 33 32))))
-         (insert "  " from))
-       (when (/= from to)
-         (insert "-")
-         (let ((split (split-char to)))
-           (if (and (= (charset-dimension charset) 2)
-                    (= (nth 2 split) 0))
-               (setq to
-                     (make-char charset (nth 1 split)
-                                (if (= (charset-chars charset) 94) 126 127))))
-           (insert to))))
-      (indent-to 24)
-      (if (stringp font-spec)
-         (insert font-spec)
-       (if (car font-spec)
-           (if (string-match "-" (car font-spec))
-               (insert "-" (car font-spec) "-*-")
-             (insert "-*-" (car font-spec) "-*-"))
-         (insert "-*-"))
-       (if (cdr font-spec)
-           (if (string-match "-" (cdr font-spec))
-               (insert (cdr font-spec))
-             (insert (cdr font-spec) "-*"))
-         (insert "*")))
-      (insert "\n")
-      (when print-fonts
-       (while opened
-         (indent-to 5)
-         (insert "[" (car opened) "]\n")
-         (setq opened (cdr opened))))
-      (setq prev-charset charset)
-      )))
+  (insert (propertize "CHAR RANGE" 'face 'underline)
+          " (" (propertize "CODE RANGE" 'face 'underline) ")\n")
+  (insert "    " (propertize "FONT NAME" 'face 'underline)
+         " (" (propertize "REQUESTED" 'face 'underline)
+         " and [" (propertize "OPENED" 'face 'underline) "])")
+  (let ((info (fontset-info fontset)))
+    (describe-vector info 'print-fontset-element)
+    (insert "\n  ---<fallback to the default fontset>---")
+    (describe-vector (char-table-extra-slot info 0) 'print-fontset-element)))
 
 ;;;###autoload
 (defun describe-fontset (fontset)
@@ -1291,16 +1090,6 @@
 
       (insert-section 4 "Coding systems")
       (list-coding-systems-1 t)
-      (princ "\
-############################
-## LIST OF CODING CATEGORIES (ordered by priority)
-## CATEGORY:CODING-SYSTEM
-##
-")
-      (let ((l coding-category-list))
-       (while l
-         (princ (format "%s:%s\n" (car l) (symbol-value (car l))))
-         (setq l (cdr l))))
       (insert "\n")
 
       (insert-section 5 "Character sets")
@@ -1318,6 +1107,192 @@
            (setq fontsets (cdr fontsets)))))
       (print-help-return-message))))
 
+;;;###autoload
+(defcustom unicodedata-file nil
+  "Location of UnicodeData file.
+This is the UnicodeData.txt file from the Unicode consortium, used for
+diagnostics.  If it is non-nil `describe-char-after' will print data
+looked up from it."
+  :group 'mule
+  :type '(choice (const :tag "None" nil)
+                file))
+
+;; We could convert the unidata file into a Lispy form once-for-all
+;; and distribute it for loading on demand.  It might be made more
+;; space-efficient by splitting strings word-wise and replacing them
+;; with lists of symbols interned in a private obarray, e.g.
+;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A).
+
+;;;###autoload
+(defun unicode-data (char)
+  "Return a list of Unicode data for unicode CHAR.
+Each element is a list of a property description and the property value.
+The list is null if CHAR isn't found in `unicodedata-file'."
+  (when unicodedata-file
+    (unless (file-exists-p unicodedata-file)
+      (error "`unicodedata-file' %s not found" unicodedata-file))
+    (save-excursion
+      (set-buffer (find-file-noselect unicodedata-file t t))
+      (goto-char (point-min))
+      (let ((hex (format "%04X" char))
+           found first last)
+       (if (re-search-forward (concat "^" hex) nil t)
+           (setq found t)
+         ;; It's not listed explicitly.  Look for ranges, e.g. CJK
+         ;; ideographs, and check whether it's in one of them.
+         (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
+                     (>= char (setq first
+                                    (string-to-number (match-string 1) 16)))
+                     (progn
+                       (forward-line 1)
+                       (looking-at "^\\([^;]+\\);[^;]+Last>;")
+                       (> char
+                          (setq last
+                                (string-to-number (match-string 1) 16))))))
+         (if (and (>= char first)
+                  (<= char last))
+             (setq found t)))
+       (if found
+           (let ((fields (mapcar (lambda (elt)
+                                   (if (> (length elt) 0)
+                                       elt))
+                                 (cdr (split-string
+                                       (buffer-substring
+                                        (line-beginning-position)
+                                        (line-end-position))
+                                       ";")))))
+             ;; The length depends on whether the last field was empty.
+             (unless (or (= 13 (length fields))
+                         (= 14 (length fields)))
+               (error "Invalid contents in %s" unicodedata-file))
+             ;; The field names and values lists are slightly
+             ;; modified from Mule-UCS unidata.el.
+             (list
+              (list "Name" (let ((name (nth 0 fields)))
+                             ;; Check for <..., First>, <..., Last>
+                             (if (string-match "\\`\\(<[^,]+\\)," name)
+                                 (concat (match-string 1 name) ">")
+                               name)))
+              (list "Category"
+                    (cdr (assoc
+                          (nth 1 fields)
+                          '(("Lu" . "uppercase letter")
+                            ("Ll" . "lowercase letter")
+                            ("Lt" . "titlecase letter")
+                            ("Mn" . "non-spacing mark")
+                            ("Mc" . "spacing-combining mark")
+                            ("Me" . "enclosing mark")
+                            ("Nd" . "decimal digit")
+                            ("Nl" . "letter number")
+                            ("No" . "other number")
+                            ("Zs" . "space separator")
+                            ("Zl" . "line separator")
+                            ("Zp" . "paragraph separator")
+                            ("Cc" . "other control")
+                            ("Cf" . "other format")
+                            ("Cs" . "surrogate")
+                            ("Co" . "private use")
+                            ("Cn" . "not assigned")
+                            ("Lm" . "modifier letter")
+                            ("Lo" . "other letter")
+                            ("Pc" . "connector punctuation")
+                            ("Pd" . "dash punctuation")
+                            ("Ps" . "open punctuation")
+                            ("Pe" . "close punctuation")
+                            ("Pi" . "initial-quotation punctuation")
+                            ("Pf" . "final-quotation punctuation")
+                            ("Po" . "other punctuation")
+                            ("Sm" . "math symbol")
+                            ("Sc" . "currency symbol")
+                            ("Sk" . "modifier symbol")
+                            ("So" . "other symbol")))))
+              (list "Combining class"
+                    (cdr (assoc
+                          (string-to-number (nth 2 fields))
+                          '((0 . "Spacing")
+                            (1 . "Overlays and interior")
+                            (7 . "Nuktas") 
+                            (8 . "Hiragana/Katakana voicing marks")
+                            (9 . "Viramas")
+                            (10 . "Start of fixed position classes")
+                            (199 . "End of fixed position classes")
+                            (200 . "Below left attached")
+                            (202 . "Below attached")
+                            (204 . "Below right attached")
+                            (208 . "Left attached (reordrant around \
+single base character)")
+                            (210 . "Right attached")
+                            (212 . "Above left attached")
+                            (214 . "Above attached")
+                            (216 . "Above right attached")
+                            (218 . "Below left")
+                            (220 . "Below")
+                            (222 . "Below right")
+                            (224 . "Left (reordrant around single base \
+character)")
+                            (226 . "Right")
+                            (228 . "Above left")
+                            (230 . "Above")
+                            (232 . "Above right")
+                            (233 . "Double below")
+                            (234 . "Double above")
+                            (240 . "Below (iota subscript)")))))
+              (list "Bidi category"
+                    (cdr (assoc
+                          (nth 3 fields)
+                          '(("L" . "Left-to-Right")
+                            ("LRE" . "Left-to-Right Embedding")
+                            ("LRO" . "Left-to-Right Override")
+                            ("R" . "Right-to-Left")
+                            ("AL" . "Right-to-Left Arabic")
+                            ("RLE" . "Right-to-Left Embedding")
+                            ("RLO" . "Right-to-Left Override")
+                            ("PDF" . "Pop Directional Format")
+                            ("EN" . "European Number")
+                            ("ES" . "European Number Separator")
+                            ("ET" . "European Number Terminator")
+                            ("AN" . "Arabic Number")
+                            ("CS" . "Common Number Separator")
+                            ("NSM" . "Non-Spacing Mark")
+                            ("BN" . "Boundary Neutral")
+                            ("B" . "Paragraph Separator")
+                            ("S" . "Segment Separator")
+                            ("WS" . "Whitespace")
+                            ("ON" . "Other Neutrals")))))
+              (list "Decomposition"
+                    (if (nth 4 fields)
+                        (let* ((parts (split-string (nth 4 fields)))
+                               (info (car parts)))
+                          (if (string-match "\\`<\\(.+\\)>\\'" info)
+                              (setq info (match-string 1 info))
+                            (setq info nil))
+                          (if info (setq parts (cdr parts)))
+                          (setq parts (mapconcat
+                                       (lambda (arg)
+                                         (string (string-to-number arg 16)))
+                                       parts " "))
+                          (concat info parts))))
+              (list "Decimal digit value"
+                    (nth 5 fields))
+              (list "Digit value"
+                    (nth 6 fields))
+              (list "Numeric value"
+                    (nth 7 fields))
+              (list "Mirrored"
+                    (if (equal "Y" (nth 8 fields))
+                        "yes"))
+              (list "Old name" (nth 9 fields))
+              (list "ISO 10646 comment" (nth 10 fields))
+              (list "Uppercase" (and (nth 11 fields)
+                                     (string (string-to-number
+                                              (nth 11 fields) 16))))
+              (list "Lowercase" (and (nth 12 fields)
+                                     (string (string-to-number
+                                              (nth 12 fields) 16))))
+              (list "Titlecase" (and (nth 13 fields)
+                                     (string (string-to-number
+                                              (nth 13 fields) 16)))))))))))
+
 (provide 'mule-diag)
 
 ;;; arch-tag: cd3b607c-2893-45a0-a4fa-a6535754dbee




reply via email to

[Prev in Thread] Current Thread [Next in Thread]