emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master 86d0834: New function ‘char-from-name’


From: Paul Eggert
Subject: [Emacs-diffs] master 86d0834: New function ‘char-from-name’
Date: Mon, 25 Apr 2016 17:42:51 +0000

branch: master
commit 86d083438dba60dc00e9e96414bf7e832720c05a
Author: Paul Eggert <address@hidden>
Commit: Paul Eggert <address@hidden>

    New function ‘char-from-name’
    
    This also fixes the mishandling of "\N{CJK COMPATIBILITY
    IDEOGRAPH-F900}", "\N{VARIATION SELECTOR-1}", etc.
    Problem reported by Eli Zaretskii in:
    http://lists.gnu.org/archive/html/emacs-devel/2016-04/msg00614.html
    * doc/lispref/nonascii.texi (Character Codes), etc/NEWS: Document this.
    * lisp/international/mule-cmds.el (char-from-name): New function.
    (read-char-by-name): Use it.  Document that "BED" is treated as
    a name, not as a hexadecimal number.  Reject out-of-range integers,
    floating-point numbers, and strings with trailing junk.
    * src/lread.c (character_name_to_code): Call char-from-name
    instead of inspecting ucs-names directly, so that we handle
    computed names like "VARIATION SELECTOR-1".  Do not use an auto
    string, since char-from-name might GC.
    * test/src/lread-tests.el: Add tests for new behavior, and
    fix some old tests that were wrong.
---
 doc/lispref/nonascii.texi       |   12 ++++++++++
 etc/NEWS                        |    4 ++++
 lisp/international/mule-cmds.el |   43 +++++++++++++++++++++++++++--------
 src/lread.c                     |   31 ++++++++-----------------
 test/src/lread-tests.el         |   48 +++++++++++++++++++++++++++++++++++----
 5 files changed, 103 insertions(+), 35 deletions(-)

diff --git a/doc/lispref/nonascii.texi b/doc/lispref/nonascii.texi
index 0e4aa86..fd2ce32 100644
--- a/doc/lispref/nonascii.texi
+++ b/doc/lispref/nonascii.texi
@@ -420,6 +420,18 @@ codepoint can have.
 @end example
 @end defun
 
address@hidden char-from-name string &optional ignore-case
+This function returns the character whose Unicode name is @var{string}.
+If @var{ignore-case} is address@hidden, case is ignored in @var{string}.
+This function returns @code{nil} if @var{string} does not name a character.
+
address@hidden
+;; U+03A3
+(= (char-from-name "GREEK CAPITAL LETTER SIGMA") #x03A3)
+     @result{} t
address@hidden example
address@hidden defun
+
 @defun get-byte &optional pos string
 This function returns the byte at character position @var{pos} in the
 current buffer.  If the current buffer is unibyte, this is literally
diff --git a/etc/NEWS b/etc/NEWS
index 6bdb648..e401d2d 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -391,6 +391,10 @@ compares their numerical values.  According to this 
predicate,
 "foo2.png" is smaller than "foo12.png".
 
 +++
+** The new function 'char-from-name' converts a Unicode name string
+to the corresponding character code.
+
++++
 ** New functions 'sxhash-eq' and 'sxhash-eql' return hash codes of a
 Lisp object suitable for use with 'eq' and 'eql' correspondingly.  If
 two objects are 'eq' ('eql'), then the result of 'sxhash-eq'
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index 8eb320a..2ce21a8 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -2978,6 +2978,27 @@ on encoding."
   (let ((char (assoc name ucs-names)))
     (when char (format " (%c)" (cdr char)))))
 
+(defun char-from-name (string &optional ignore-case)
+  "Return a character as a number from its Unicode name STRING.
+If optional IGNORE-CASE is non-nil, ignore case in STRING.
+Return nil if STRING does not name a character."
+  (or (cdr (assoc-string string (ucs-names) ignore-case))
+      (let ((minus (string-match-p "-[0-9A-F]+\\'" string)))
+        (when minus
+          ;; Parse names like "VARIATION SELECTOR-17" and "CJK
+          ;; COMPATIBILITY IDEOGRAPH-F900" that are not in ucs-names.
+          (ignore-errors
+            (let* ((case-fold-search ignore-case)
+                   (vs (string-match-p "\\`VARIATION SELECTOR-" string))
+                   (minus-num (string-to-number (substring string minus)
+                                                (if vs 10 16)))
+                   (vs-offset (if vs (if (< minus-num -16) #xE00EF #xFDFF) 0))
+                   (code (- vs-offset minus-num))
+                   (name (get-char-code-property code 'name)))
+              (when (eq t (compare-strings string nil nil name nil nil
+                                           ignore-case))
+                code)))))))
+
 (defun read-char-by-name (prompt)
   "Read a character by its Unicode name or hex number string.
 Display PROMPT and read a string that represents a character by its
@@ -2991,9 +3012,11 @@ preceded by an asterisk `*' and use completion, it will 
show all
 the characters whose names include that substring, not necessarily
 at the beginning of the name.
 
-This function also accepts a hexadecimal number of Unicode code
-point or a number in hash notation, e.g. #o21430 for octal,
-#x2318 for hex, or #10r8984 for decimal."
+Accept a name like \"CIRCULATION FUNCTION\", a hexadecimal
+number like \"2A10\", or a number in hash notation (e.g.,
+\"#x2a10\" for hex, \"10r10768\" for decimal, or \"#o25020\" for
+octal).  Treat otherwise-ambiguous strings like \"BED\" (U+1F6CF)
+as names, not numbers."
   (let* ((enable-recursive-minibuffers t)
         (completion-ignore-case t)
         (input
@@ -3006,13 +3029,13 @@ point or a number in hash notation, e.g. #o21430 for 
octal,
                   (category . unicode-name))
               (complete-with-action action (ucs-names) string pred)))))
         (char
-         (cond
-          ((string-match-p "\\`[0-9a-fA-F]+\\'" input)
-           (string-to-number input 16))
-          ((string-match-p "\\`#" input)
-           (read input))
-          (t
-           (cdr (assoc-string input (ucs-names) t))))))
+          (cond
+           ((char-from-name input t))
+           ((string-match-p "\\`[0-9a-fA-F]+\\'" input)
+            (ignore-errors (string-to-number input 16)))
+           ((string-match-p "\\`#\\([bBoOxX]\\|[0-9]+[rR]\\)[0-9a-zA-Z]+\\'"
+                            input)
+            (ignore-errors (read input))))))
     (unless (characterp char)
       (error "Invalid character"))
     char))
diff --git a/src/lread.c b/src/lread.c
index a42c1f6..6e97e07 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -2155,26 +2155,15 @@ grow_read_buffer (void)
 static int
 character_name_to_code (char const *name, ptrdiff_t name_len)
 {
-  Lisp_Object code;
-
-  /* Code point as U+XXXX....  */
-  if (name[0] == 'U' && name[1] == '+')
-    {
-      /* Pass the leading '+' to string_to_number, so that it
-        rejects monstrosities such as negative values.  */
-      code = string_to_number (name + 1, 16, false);
-    }
-  else
-    {
-      /* Look up the name in the table returned by 'ucs-names'.  */
-      AUTO_STRING_WITH_LEN (namestr, name, name_len);
-      Lisp_Object names = call0 (Qucs_names);
-      code = CDR (Fassoc (namestr, names));
-    }
-
-  if (! (INTEGERP (code)
-        && 0 <= XINT (code) && XINT (code) <= MAX_UNICODE_CHAR
-        && ! char_surrogate_p (XINT (code))))
+  /* For "U+XXXX", pass the leading '+' to string_to_number to reject
+     monstrosities like "U+-0000".  */
+  Lisp_Object code
+    = (name[0] == 'U' && name[1] == '+'
+       ? string_to_number (name + 1, 16, false)
+       : call2 (Qchar_from_name, make_unibyte_string (name, name_len), Qt));
+
+  if (! RANGED_INTEGERP (0, code, MAX_UNICODE_CHAR)
+      || char_surrogate_p (XINT (code)))
     {
       AUTO_STRING (format, "\\N{%s}");
       AUTO_STRING_WITH_LEN (namestr, name, name_len);
@@ -4829,5 +4818,5 @@ that are loaded before your customizations are read!  */);
   DEFSYM (Qrehash_size, "rehash-size");
   DEFSYM (Qrehash_threshold, "rehash-threshold");
 
-  DEFSYM (Qucs_names, "ucs-names");
+  DEFSYM (Qchar_from_name, "char-from-name");
 }
diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el
index 2ebaf49..1a82d13 100644
--- a/test/src/lread-tests.el
+++ b/test/src/lread-tests.el
@@ -28,15 +28,55 @@
 (ert-deftest lread-char-number ()
   (should (equal (read "?\\N{U+A817}") #xA817)))
 
-(ert-deftest lread-char-name ()
+(ert-deftest lread-char-name-1 ()
   (should (equal (read "?\\N{SYLOTI  NAGRI LETTER \n DHO}")
                  #xA817)))
+(ert-deftest lread-char-name-2 ()
+  (should (equal (read "?\\N{BED}") #x1F6CF)))
+(ert-deftest lread-char-name-3 ()
+  (should (equal (read "?\\N{U+BED}") #xBED)))
+(ert-deftest lread-char-name-4 ()
+  (should (equal (read "?\\N{VARIATION SELECTOR-1}") #xFE00)))
+(ert-deftest lread-char-name-5 ()
+  (should (equal (read "?\\N{VARIATION SELECTOR-16}") #xFE0F)))
+(ert-deftest lread-char-name-6 ()
+  (should (equal (read "?\\N{VARIATION SELECTOR-17}") #xE0100)))
+(ert-deftest lread-char-name-7 ()
+  (should (equal (read "?\\N{VARIATION SELECTOR-256}") #xE01EF)))
+(ert-deftest lread-char-name-8 ()
+  (should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-F900}") #xF900)))
+(ert-deftest lread-char-name-9 ()
+  (should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-FAD9}") #xFAD9)))
+(ert-deftest lread-char-name-10 ()
+  (should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2F800}") #x2F800)))
+(ert-deftest lread-char-name-11 ()
+  (should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2FA1D}") #x2FA1D)))
 
 (ert-deftest lread-char-invalid-number ()
   (should-error (read "?\\N{U+110000}") :type 'invalid-read-syntax))
 
-(ert-deftest lread-char-invalid-name ()
+(ert-deftest lread-char-invalid-name-1 ()
   (should-error (read "?\\N{DOES NOT EXIST}")) :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-2 ()
+  (should-error (read "?\\N{VARIATION SELECTOR-0}")) :type 
'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-3 ()
+  (should-error (read "?\\N{VARIATION SELECTOR-257}"))
+  :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-4 ()
+  (should-error (read "?\\N{VARIATION SELECTOR--0}"))
+  :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-5 ()
+  (should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-F8FF}"))
+  :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-6 ()
+  (should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-FADA}"))
+  :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-7 ()
+  (should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2F7FF}"))
+  :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-8 ()
+  (should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2FA1E}"))
+  :type 'invalid-read-syntax)
 
 (ert-deftest lread-char-non-ascii-name ()
   (should-error (read "?\\N{LATIN CAPITAL LETTER Ø}")
@@ -55,13 +95,13 @@
   (should-error (read "?\\N{U+DFFF}") :type 'invalid-read-syntax))
 
 (ert-deftest lread-string-char-number-1 ()
-  (should (equal (read "a\\N{U+A817}b") "a\uA817bx")))
+  (should (equal (read "\"a\\N{U+A817}b\"") "a\uA817b")))
 (ert-deftest lread-string-char-number-2 ()
   (should-error (read "?\\N{0.5}") :type 'invalid-read-syntax))
 (ert-deftest lread-string-char-number-3 ()
   (should-error (read "?\\N{U+-0}") :type 'invalid-read-syntax))
 
 (ert-deftest lread-string-char-name ()
-  (should (equal (read "a\\N{SYLOTI NAGRI  LETTER DHO}b") "a\uA817b")))
+  (should (equal (read "\"a\\N{SYLOTI NAGRI  LETTER DHO}b\"") "a\uA817b")))
 
 ;;; lread-tests.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]