bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#24603: [RFC 13/18] Add some tricky Unicode characters to regex test


From: Michal Nazarewicz
Subject: bug#24603: [RFC 13/18] Add some tricky Unicode characters to regex test
Date: Tue, 4 Oct 2016 03:10:36 +0200

* test/src/regex-tests.el: Include capital ‘DZ’ dygraph, sharp ‘s’,
capital ligature ‘IJ’, small ligature ‘fi’, title-case dygraph ‘Dz’,
all three forms of Greek sigma and and IPA ɕ symbol in the regex tests.
---
 test/src/regex-tests.el | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/test/src/regex-tests.el b/test/src/regex-tests.el
index c4844c7..fa66ff1 100644
--- a/test/src/regex-tests.el
+++ b/test/src/regex-tests.el
@@ -65,27 +65,30 @@ regex--test-cc
         (skip-chars-forward (concat "[:" name ":]\u2622"))
         (should (or (equal (point) p) (equal (point) (1+ p))))))))
 
-(dolist (test '(("alnum" "abcABC012łąka" "-, \t\n")
-                ("alpha" "abcABCłąka" "-,012 \t\n")
+(dolist (test '(("alnum" "abcABC012łąkaDZßIJfiDzΣσςɕ" "-, \t\n")
+                ("alpha" "abcABCłąkaDZßIJfiDzΣσςɕ" "-,012 \t\n")
                 ("digit" "012" "abcABCłąka-, \t\n")
                 ("xdigit" "0123aBc" "łąk-, \t\n")
-                ("upper" "ABCŁĄKA" "abc012-, \t\n")
-                ("lower" "abcłąka" "ABC012-, \t\n")
+                ("upper" "ABCŁĄKADZIJΣ" "abcß0fiσςɕ12-, \t\n")
+                ;; FIXME: ßfiɕ are all lower case (even though they don’t have
+                ;; (single-character) upper-case form).
+                ("lower" "abcłąkaσς" "ABC012DZIJΣ-, \t\n")
 
-                ("word" "abcABC012\u2620" "-, \t\n")
+                ("word" "abcABC012\u2620DZßIJfiDzΣσςɕ" "-, \t\n")
 
                 ("punct" ".,-" "abcABC012\u2620 \t\n")
                 ("cntrl" "\1\2\t\n" ".,-abcABC012\u2620 ")
-                ("graph" "abcłąka\u2620-," " \t\n\1")
-                ("print" "abcłąka\u2620-, " "\t\n\1")
+                ("graph" "abcłąka\u2620-,DZßIJfiDzΣσςɕ" " \t\n\1")
+                ("print" "abcłąka\u2620-,DZßIJfiDzΣσςɕ " "\t\n\1")
 
                 ("space" " \t\n\u2001" "abcABCł0123")
                 ("blank" " \t" "\n\u2001")
 
-                ("ascii" "abcABC012 \t\n\1" "łą\u2620")
-                ("nonascii" "łą\u2622" "abcABC012 \t\n\1")
-                ("unibyte" "abcABC012 \t\n\1" "łą\u2622")
-                ("multibyte" "łą\u2622" "abcABC012 \t\n\1")))
+                ("ascii" "abcABC012 \t\n\1" "łą\u2620DZßIJfiDzΣσςɕ")
+                ("nonascii" "łą\u2622DZßIJfiDzΣσςɕ" "abcABC012 \t\n\1")
+                ;; Note: sharp s is unibyte since it’s code point is below 256.
+                ("unibyte" "abcABC012ß \t\n\1" "łą\u2622DZIJfiDzΣσςɕ")
+                ("multibyte" "łą\u2622DZIJfiDzΣσςɕ" "abcABC012ß \t\n\1")))
   (let ((name (intern (concat "regex-tests-" (car test) "-character-class")))
         (doc (concat "Perform sanity test of regexes using " (car test)
                      " character class.
-- 
2.8.0.rc3.226.g39d4020






reply via email to

[Prev in Thread] Current Thread [Next in Thread]