emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim 5973ac084d 2/4: Refactor pyim-cregexp-create.


From: ELPA Syncer
Subject: [elpa] externals/pyim 5973ac084d 2/4: Refactor pyim-cregexp-create.
Date: Wed, 22 Jun 2022 02:57:55 -0400 (EDT)

branch: externals/pyim
commit 5973ac084dc673e8286dcf0b2b327b3f660510a4
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    Refactor pyim-cregexp-create.
---
 pyim-cregexp.el     | 54 +++++++++++++++++++++++++++++++++--------------------
 tests/pyim-tests.el |  6 +++---
 2 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/pyim-cregexp.el b/pyim-cregexp.el
index 90266a8c07..be81fdac37 100644
--- a/pyim-cregexp.el
+++ b/pyim-cregexp.el
@@ -90,30 +90,27 @@ CHAR-LEVEL-NUM 代表汉字常用级别,pyim 中根据汉字的使用频率,
 会抛弃一些不常用的汉字,重新生成,知道生成一个 Emacs 可以处理的
 regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子的时候,
 就无法搜索生僻字了。"
-  ;; NOTE: (rx-to-string "") will return "\\(?:\\)",
-  ;; While I want (pyim-cregexp-create "") return just "".
   (if (and string scheme
            (stringp string)
            (> (length string) 0)
            (pyim-scheme-p scheme)
            (pyim-scheme-cregexp-support-p scheme))
-      (let ((num (pyim-cregexp-char-level-num char-level-num))
-            rx-string)
-        (while (not (pyim-cregexp-valid-p rx-string))
-          (setq rx-string
-                (or (ignore-errors
-                      (rx-to-string
-                       (pyim-cregexp-create-from-rx
-                        (lambda (x)
-                          (if (stringp x)
-                              (xr (pyim-cregexp-create-1 x scheme num 
chinese-only))
-                            x))
-                        (xr string))))
-                    string))
-          (setq num (1- num)))
-        rx-string)
+      (pyim-cregexp-create-valid-cregexp-from-string
+       string scheme char-level-num chinese-only)
     string))
 
+(defun pyim-cregexp-create-valid-cregexp-from-string
+    (string scheme &optional char-level-num chinese-only)
+  "从 STRING 创建一个有效的搜索中文的 regexp."
+  (let ((num (pyim-cregexp-char-level-num char-level-num))
+        rx-string)
+    (while (not (pyim-cregexp-valid-p rx-string))
+      (setq rx-string
+            (pyim-cregexp-create-beautiful-cregexp-from-string
+             string scheme num chinese-only))
+      (setq num (1- num)))
+    rx-string))
+
 (defun pyim-cregexp-valid-p (cregexp)
   "Return t when cregexp is a valid regexp."
   (and cregexp
@@ -123,18 +120,35 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
          ;; FIXME: Emacs can't handle regexps whose length is too big :-(
          (error nil))))
 
-(defun pyim-cregexp-create-from-rx (fn rx-form)
+(defun pyim-cregexp-create-beautiful-cregexp-from-string
+    (string scheme &optional char-level-num chinese-only)
+  "使用 rx 和 xr, 从 STRING 生成一个漂亮的搜索中文的 regexp.
+
+这个 regexp 可能正常使用,也可能长度超出 emacs 的限制。"
+  (or (ignore-errors
+        (rx-to-string
+         (pyim-cregexp-create-cregexp-from-rx
+          (lambda (x)
+            (if (stringp x)
+                (xr (pyim-cregexp-create-cregexp-from-string
+                     x scheme char-level-num chinese-only))
+              x))
+          (xr string))))
+      string))
+
+(defun pyim-cregexp-create-cregexp-from-rx (fn rx-form)
   (pcase rx-form
     ('nil nil)
     (`(,form) (funcall fn form))
     (`(any . ,_) rx-form)
     (`(,_ . ,_)
      (mapcar (lambda (x)
-               (pyim-cregexp-create-from-rx fn x))
+               (pyim-cregexp-create-cregexp-from-rx fn x))
              rx-form))
     (_ (funcall fn rx-form))))
 
-(defun pyim-cregexp-create-1 (str scheme &optional char-level-num chinese-only)
+(defun pyim-cregexp-create-cregexp-from-string
+    (str scheme &optional char-level-num chinese-only)
   (let* ((num (pyim-cregexp-char-level-num char-level-num))
          (sep "#####&&&&#####")
          (lst (remove "" (split-string
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 163d5ec928..4af2a6fd6d 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -951,8 +951,8 @@
 
   (let* ((str (nth 2 (split-string (car (pyim-pymap-py2cchar-get "wang" t)) 
"|")))
          (quanpin (pyim-scheme-get 'quanpin))
-         (regexp1 (pyim-cregexp-create-1 "wang" quanpin 3 nil))
-         (regexp2 (pyim-cregexp-create-1 "wang" quanpin 2)))
+         (regexp1 (pyim-cregexp-create-cregexp-from-string "wang" quanpin 3 
nil))
+         (regexp2 (pyim-cregexp-create-cregexp-from-string "wang" quanpin 2)))
     (should (string-match-p regexp1 str))
     (should-not (string-match-p regexp2 str)))
 
@@ -974,7 +974,7 @@
     (should (equal (pyim-cregexp-build "adww") 
"\\(?:adww\\|[其匧惹斯欺歁莢蒙][人古]?人?\\)"))
     (should (equal (pyim-cregexp-build "aaaa'aaaa")
                    
"\\(?:\\(?:aaaa'\\|aaaa\\|[工恭]恭?敬?敬?\\)\\(?:aaaa\\|[工恭]恭?敬?敬?\\)\\)"))
-    (should (equal (pyim-cregexp-create-1 "aaaa'aaaa" wubi)
+    (should (equal (pyim-cregexp-create-cregexp-from-string "aaaa'aaaa" wubi)
                    
"\\(?:aaaa'\\|aaaa\\|[工恭][恭]?[敬]?[敬]?\\)\\(?:aaaa\\|[工恭][恭]?[敬]?[敬]?\\)"))
     (should (equal (pyim-cregexp-build-xingma-regexp-from-words '("工" "恭恭敬敬"))
                    "[工恭][恭]?[敬]?[敬]?"))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]