[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] Changes to emacs/lisp/descr-text.el
From: |
Richard M. Stallman |
Subject: |
[Emacs-diffs] Changes to emacs/lisp/descr-text.el |
Date: |
Wed, 28 May 2003 07:14:07 -0400 |
Index: emacs/lisp/descr-text.el
diff -c emacs/lisp/descr-text.el:1.11 emacs/lisp/descr-text.el:1.12
*** emacs/lisp/descr-text.el:1.11 Wed May 21 18:00:01 2003
--- emacs/lisp/descr-text.el Wed May 28 07:14:07 2003
***************
*** 218,433 ****
(newline)
(widget-insert "There are text properties here:\n")
(describe-property-list properties)))))
! (defcustom unicodedata-file nil
! "Location of Unicode data file.
! This is the UnicodeData.txt file from the Unicode consortium, used for
! diagnostics. If it is non-nil `describe-char-after' will print data
! looked up from it. This facility is mostly of use to people doing
! multilingual development.
! This is a fairly large file, not typically present on GNU systems. At
! the time of writing it is at
! <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>."
! :group 'mule
! :version "21.5"
! :type '(choice (const :tag "None" nil)
! file))
! ;; We could convert the unidata file into a Lispy form once-for-all
! ;; and distribute it for loading on demand. It might be made more
! ;; space-efficient by splitting strings word-wise and replacing them
! ;; with lists of symbols interned in a private obarray, e.g.
! ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A).
!
! ;; Fixme: Check whether this needs updating for Unicode 4.
! (defun unicode-data (char)
! "Return a list of Unicode data for unicode CHAR.
! Each element is a list of a property description and the property value.
! The list is null if CHAR isn't found in `unicodedata-file'."
! (when unicodedata-file
! (unless (file-exists-p unicodedata-file)
! (error "`unicodedata-file' %s not found" unicodedata-file))
! (save-excursion
! ;; Find file in fundamental mode to avoid, e.g. flyspell turned
! ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings.
! (set-buffer (let ((auto-mode-alist))
! (find-file-noselect unicodedata-file)))
! (goto-char (point-min))
! (let ((hex (format "%04X" char))
! found first last)
! (if (re-search-forward (concat "^" hex) nil t)
! (setq found t)
! ;; It's not listed explicitly. Look for ranges, e.g. CJK
! ;; ideographs, and check whether it's in one of them.
! (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
! (>= char (setq first
! (string-to-number (match-string 1) 16)))
! (progn
! (forward-line 1)
! (looking-at "^\\([^;]+\\);[^;]+Last>;")
! (> char
! (setq last
! (string-to-number (match-string 1) 16))))))
! (if (and (>= char first)
! (<= char last))
! (setq found t)))
! (if found
! (let ((fields (mapcar (lambda (elt)
! (if (> (length elt) 0)
! elt))
! (cdr (split-string
! (buffer-substring
! (line-beginning-position)
! (line-end-position))
! ";")))))
! ;; The length depends on whether the last field was empty.
! (unless (or (= 13 (length fields))
! (= 14 (length fields)))
! (error "Invalid contents in %s" unicodedata-file))
! ;; The field names and values lists are slightly
! ;; modified from Mule-UCS unidata.el.
! (list
! (list "Name" (let ((name (nth 0 fields)))
! ;; Check for <..., First>, <..., Last>
! (if (string-match "\\`\\(<[^,]+\\)," name)
! (concat (match-string 1 name) ">")
! name)))
! (list "Category"
! (cdr (assoc
! (nth 1 fields)
! '(("Lu" . "uppercase letter")
! ("Ll" . "lowercase letter")
! ("Lt" . "titlecase letter")
! ("Mn" . "non-spacing mark")
! ("Mc" . "spacing-combining mark")
! ("Me" . "enclosing mark")
! ("Nd" . "decimal digit")
! ("Nl" . "letter number")
! ("No" . "other number")
! ("Zs" . "space separator")
! ("Zl" . "line separator")
! ("Zp" . "paragraph separator")
! ("Cc" . "other control")
! ("Cf" . "other format")
! ("Cs" . "surrogate")
! ("Co" . "private use")
! ("Cn" . "not assigned")
! ("Lm" . "modifier letter")
! ("Lo" . "other letter")
! ("Pc" . "connector punctuation")
! ("Pd" . "dash punctuation")
! ("Ps" . "open punctuation")
! ("Pe" . "close punctuation")
! ("Pi" . "initial-quotation punctuation")
! ("Pf" . "final-quotation punctuation")
! ("Po" . "other punctuation")
! ("Sm" . "math symbol")
! ("Sc" . "currency symbol")
! ("Sk" . "modifier symbol")
! ("So" . "other symbol")))))
! (list "Combining class"
! (cdr (assoc
! (string-to-number (nth 2 fields))
! '((0 . "Spacing")
! (1 . "Overlays and interior")
! (7 . "Nuktas")
! (8 . "Hiragana/Katakana voicing marks")
! (9 . "Viramas")
! (10 . "Start of fixed position classes")
! (199 . "End of fixed position classes")
! (200 . "Below left attached")
! (202 . "Below attached")
! (204 . "Below right attached")
! (208 . "Left attached (reordrant around \
! single base character)")
! (210 . "Right attached")
! (212 . "Above left attached")
! (214 . "Above attached")
! (216 . "Above right attached")
! (218 . "Below left")
! (220 . "Below")
! (222 . "Below right")
! (224 . "Left (reordrant around single base \
! character)")
! (226 . "Right")
! (228 . "Above left")
! (230 . "Above")
! (232 . "Above right")
! (233 . "Double below")
! (234 . "Double above")
! (240 . "Below (iota subscript)")))))
! (list "Bidi category"
! (cdr (assoc
! (nth 3 fields)
! '(("L" . "Left-to-Right")
! ("LRE" . "Left-to-Right Embedding")
! ("LRO" . "Left-to-Right Override")
! ("R" . "Right-to-Left")
! ("AL" . "Right-to-Left Arabic")
! ("RLE" . "Right-to-Left Embedding")
! ("RLO" . "Right-to-Left Override")
! ("PDF" . "Pop Directional Format")
! ("EN" . "European Number")
! ("ES" . "European Number Separator")
! ("ET" . "European Number Terminator")
! ("AN" . "Arabic Number")
! ("CS" . "Common Number Separator")
! ("NSM" . "Non-Spacing Mark")
! ("BN" . "Boundary Neutral")
! ("B" . "Paragraph Separator")
! ("S" . "Segment Separator")
! ("WS" . "Whitespace")
! ("ON" . "Other Neutrals")))))
! (list
! "Decomposition"
! (if (nth 4 fields)
! (let* ((parts (split-string (nth 4 fields)))
! (info (car parts)))
! (if (string-match "\\`<\\(.+\\)>\\'" info)
! (setq info (match-string 1 info))
! (setq info nil))
! (if info (setq parts (cdr parts)))
! ;; Maybe printing ? for unrepresentable unicodes
! ;; here and below should be changed?
! (setq parts (mapconcat
! (lambda (arg)
! (string (or (decode-char
! 'ucs
! (string-to-number arg 16))
! ??)))
! parts " "))
! (concat info parts))))
! (list "Decimal digit value"
! (nth 5 fields))
! (list "Digit value"
! (nth 6 fields))
! (list "Numeric value"
! (nth 7 fields))
! (list "Mirrored"
! (if (equal "Y" (nth 8 fields))
! "yes"))
! (list "Old name" (nth 9 fields))
! (list "ISO 10646 comment" (nth 10 fields))
! (list "Uppercase" (and (nth 11 fields)
! (string (or (decode-char
! 'ucs
! (string-to-number
! (nth 11 fields) 16))
! ??))))
! (list "Lowercase" (and (nth 12 fields)
! (string (or (decode-char
! 'ucs
! (string-to-number
! (nth 12 fields) 16))
! ??))))
! (list "Titlecase" (and (nth 13 fields)
! (string (or (decode-char
! 'ucs
! (string-to-number
! (nth 13 fields) 16))
! ??)))))))))))
;;;###autoload
(defun describe-char (pos)
"Describe the character after POS (interactively, the character after
point).
--- 218,440 ----
(newline)
(widget-insert "There are text properties here:\n")
(describe-property-list properties)))))
+
+ ;;; We cannot use the UnicodeData.txt file as such; it is not free.
+ ;;; We can turn that info a different format and release the result
+ ;;; as free data. When that is done, we could reinstate the code below.
+ ;;; For the mean time, here is a dummy placeholder.
+ ;;; -- rms
+ (defun describe-char-unicode-data (char) nil)
! ;;; (defcustom describe-char-unicodedata-file nil
! ;;; "Location of Unicode data file.
! ;;; This is the UnicodeData.txt file from the Unicode consortium, used for
! ;;; diagnostics. If it is non-nil `describe-char-after' will print data
! ;;; looked up from it. This facility is mostly of use to people doing
! ;;; multilingual development.
! ;;; This is a fairly large file, not typically present on GNU systems. At
! ;;; the time of writing it is at
! ;;; <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>."
! ;;; :group 'mule
! ;;; :version "21.5"
! ;;; :type '(choice (const :tag "None" nil)
! ;;; file))
! ;;; ;; We could convert the unidata file into a Lispy form once-for-all
! ;;; ;; and distribute it for loading on demand. It might be made more
! ;;; ;; space-efficient by splitting strings word-wise and replacing them
! ;;; ;; with lists of symbols interned in a private obarray, e.g.
! ;;; ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A).
+ ;;; ;; Fixme: Check whether this needs updating for Unicode 4.
+ ;;; (defun describe-char-unicode-data (char)
+ ;;; "Return a list of Unicode data for unicode CHAR.
+ ;;; Each element is a list of a property description and the property value.
+ ;;; The list is null if CHAR isn't found in `describe-char-unicodedata-file'."
+ ;;; (when describe-char-unicodedata-file
+ ;;; (unless (file-exists-p describe-char-unicodedata-file)
+ ;;; (error "`unicodedata-file' %s not found"
describe-char-unicodedata-file))
+ ;;; (save-excursion
+ ;;; ;; Find file in fundamental mode to avoid, e.g. flyspell turned
+ ;;; ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings.
+ ;;; (set-buffer (let ((auto-mode-alist))
+ ;;; (find-file-noselect describe-char-unicodedata-file)))
+ ;;; (goto-char (point-min))
+ ;;; (let ((hex (format "%04X" char))
+ ;;; found first last)
+ ;;; (if (re-search-forward (concat "^" hex) nil t)
+ ;;; (setq found t)
+ ;;; ;; It's not listed explicitly. Look for ranges, e.g. CJK
+ ;;; ;; ideographs, and check whether it's in one of them.
+ ;;; (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
+ ;;; (>= char (setq first
+ ;;; (string-to-number (match-string 1) 16)))
+ ;;; (progn
+ ;;; (forward-line 1)
+ ;;; (looking-at "^\\([^;]+\\);[^;]+Last>;")
+ ;;; (> char
+ ;;; (setq last
+ ;;; (string-to-number (match-string 1) 16))))))
+ ;;; (if (and (>= char first)
+ ;;; (<= char last))
+ ;;; (setq found t)))
+ ;;; (if found
+ ;;; (let ((fields (mapcar (lambda (elt)
+ ;;; (if (> (length elt) 0)
+ ;;; elt))
+ ;;; (cdr (split-string
+ ;;; (buffer-substring
+ ;;; (line-beginning-position)
+ ;;; (line-end-position))
+ ;;; ";")))))
+ ;;; ;; The length depends on whether the last field was empty.
+ ;;; (unless (or (= 13 (length fields))
+ ;;; (= 14 (length fields)))
+ ;;; (error "Invalid contents in %s" describe-char-unicodedata-file))
+ ;;; ;; The field names and values lists are slightly
+ ;;; ;; modified from Mule-UCS unidata.el.
+ ;;; (list
+ ;;; (list "Name" (let ((name (nth 0 fields)))
+ ;;; ;; Check for <..., First>, <..., Last>
+ ;;; (if (string-match "\\`\\(<[^,]+\\)," name)
+ ;;; (concat (match-string 1 name) ">")
+ ;;; name)))
+ ;;; (list "Category"
+ ;;; (cdr (assoc
+ ;;; (nth 1 fields)
+ ;;; '(("Lu" . "uppercase letter")
+ ;;; ("Ll" . "lowercase letter")
+ ;;; ("Lt" . "titlecase letter")
+ ;;; ("Mn" . "non-spacing mark")
+ ;;; ("Mc" . "spacing-combining mark")
+ ;;; ("Me" . "enclosing mark")
+ ;;; ("Nd" . "decimal digit")
+ ;;; ("Nl" . "letter number")
+ ;;; ("No" . "other number")
+ ;;; ("Zs" . "space separator")
+ ;;; ("Zl" . "line separator")
+ ;;; ("Zp" . "paragraph separator")
+ ;;; ("Cc" . "other control")
+ ;;; ("Cf" . "other format")
+ ;;; ("Cs" . "surrogate")
+ ;;; ("Co" . "private use")
+ ;;; ("Cn" . "not assigned")
+ ;;; ("Lm" . "modifier letter")
+ ;;; ("Lo" . "other letter")
+ ;;; ("Pc" . "connector punctuation")
+ ;;; ("Pd" . "dash punctuation")
+ ;;; ("Ps" . "open punctuation")
+ ;;; ("Pe" . "close punctuation")
+ ;;; ("Pi" . "initial-quotation punctuation")
+ ;;; ("Pf" . "final-quotation punctuation")
+ ;;; ("Po" . "other punctuation")
+ ;;; ("Sm" . "math symbol")
+ ;;; ("Sc" . "currency symbol")
+ ;;; ("Sk" . "modifier symbol")
+ ;;; ("So" . "other symbol")))))
+ ;;; (list "Combining class"
+ ;;; (cdr (assoc
+ ;;; (string-to-number (nth 2 fields))
+ ;;; '((0 . "Spacing")
+ ;;; (1 . "Overlays and interior")
+ ;;; (7 . "Nuktas")
+ ;;; (8 . "Hiragana/Katakana voicing marks")
+ ;;; (9 . "Viramas")
+ ;;; (10 . "Start of fixed position classes")
+ ;;; (199 . "End of fixed position classes")
+ ;;; (200 . "Below left attached")
+ ;;; (202 . "Below attached")
+ ;;; (204 . "Below right attached")
+ ;;; (208 . "Left attached (reordrant around \
+ ;;; single base character)")
+ ;;; (210 . "Right attached")
+ ;;; (212 . "Above left attached")
+ ;;; (214 . "Above attached")
+ ;;; (216 . "Above right attached")
+ ;;; (218 . "Below left")
+ ;;; (220 . "Below")
+ ;;; (222 . "Below right")
+ ;;; (224 . "Left (reordrant around single base \
+ ;;; character)")
+ ;;; (226 . "Right")
+ ;;; (228 . "Above left")
+ ;;; (230 . "Above")
+ ;;; (232 . "Above right")
+ ;;; (233 . "Double below")
+ ;;; (234 . "Double above")
+ ;;; (240 . "Below (iota subscript)")))))
+ ;;; (list "Bidi category"
+ ;;; (cdr (assoc
+ ;;; (nth 3 fields)
+ ;;; '(("L" . "Left-to-Right")
+ ;;; ("LRE" . "Left-to-Right Embedding")
+ ;;; ("LRO" . "Left-to-Right Override")
+ ;;; ("R" . "Right-to-Left")
+ ;;; ("AL" . "Right-to-Left Arabic")
+ ;;; ("RLE" . "Right-to-Left Embedding")
+ ;;; ("RLO" . "Right-to-Left Override")
+ ;;; ("PDF" . "Pop Directional Format")
+ ;;; ("EN" . "European Number")
+ ;;; ("ES" . "European Number Separator")
+ ;;; ("ET" . "European Number Terminator")
+ ;;; ("AN" . "Arabic Number")
+ ;;; ("CS" . "Common Number Separator")
+ ;;; ("NSM" . "Non-Spacing Mark")
+ ;;; ("BN" . "Boundary Neutral")
+ ;;; ("B" . "Paragraph Separator")
+ ;;; ("S" . "Segment Separator")
+ ;;; ("WS" . "Whitespace")
+ ;;; ("ON" . "Other Neutrals")))))
+ ;;; (list
+ ;;; "Decomposition"
+ ;;; (if (nth 4 fields)
+ ;;; (let* ((parts (split-string (nth 4 fields)))
+ ;;; (info (car parts)))
+ ;;; (if (string-match "\\`<\\(.+\\)>\\'" info)
+ ;;; (setq info (match-string 1 info))
+ ;;; (setq info nil))
+ ;;; (if info (setq parts (cdr parts)))
+ ;;; ;; Maybe printing ? for unrepresentable unicodes
+ ;;; ;; here and below should be changed?
+ ;;; (setq parts (mapconcat
+ ;;; (lambda (arg)
+ ;;; (string (or (decode-char
+ ;;; 'ucs
+ ;;; (string-to-number arg 16))
+ ;;; ??)))
+ ;;; parts " "))
+ ;;; (concat info parts))))
+ ;;; (list "Decimal digit value"
+ ;;; (nth 5 fields))
+ ;;; (list "Digit value"
+ ;;; (nth 6 fields))
+ ;;; (list "Numeric value"
+ ;;; (nth 7 fields))
+ ;;; (list "Mirrored"
+ ;;; (if (equal "Y" (nth 8 fields))
+ ;;; "yes"))
+ ;;; (list "Old name" (nth 9 fields))
+ ;;; (list "ISO 10646 comment" (nth 10 fields))
+ ;;; (list "Uppercase" (and (nth 11 fields)
+ ;;; (string (or (decode-char
+ ;;; 'ucs
+ ;;; (string-to-number
+ ;;; (nth 11 fields) 16))
+ ;;; ??))))
+ ;;; (list "Lowercase" (and (nth 12 fields)
+ ;;; (string (or (decode-char
+ ;;; 'ucs
+ ;;; (string-to-number
+ ;;; (nth 12 fields) 16))
+ ;;; ??))))
+ ;;; (list "Titlecase" (and (nth 13 fields)
+ ;;; (string (or (decode-char
+ ;;; 'ucs
+ ;;; (string-to-number
+ ;;; (nth 13 fields) 16))
+ ;;; ??)))))))))))
+
;;;###autoload
(defun describe-char (pos)
"Describe the character after POS (interactively, the character after
point).
***************
*** 517,523 ****
(encoded-string-description encoded coding)
"not encodable"))))
,@(let ((unicodedata (and unicode
! (unicode-data unicode))))
(if unicodedata
(cons (list "Unicode data" " ") unicodedata))))))
(setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x)))
--- 524,530 ----
(encoded-string-description encoded coding)
"not encodable"))))
,@(let ((unicodedata (and unicode
! (describe-char-unicode-data unicode))))
(if unicodedata
(cons (list "Unicode data" " ") unicodedata))))))
(setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x)))