emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim 12435095d3 08/12: Sort dcache, dhashcache, dregcac


From: ELPA Syncer
Subject: [elpa] externals/pyim 12435095d3 08/12: Sort dcache, dhashcache, dregcache
Date: Thu, 9 Jun 2022 10:57:53 -0400 (EDT)

branch: externals/pyim
commit 12435095d326c868aff514a1c8f312d943d25598
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    Sort dcache, dhashcache, dregcache
---
 pyim-dcache.el     | 156 ++++-----
 pyim-dhashcache.el | 904 +++++++++++++++++++++++++++--------------------------
 pyim-dregcache.el  | 581 +++++++++++++++++-----------------
 3 files changed, 833 insertions(+), 808 deletions(-)

diff --git a/pyim-dcache.el b/pyim-dcache.el
index e812d3972c..8f0848f8f0 100644
--- a/pyim-dcache.el
+++ b/pyim-dcache.el
@@ -65,15 +65,7 @@ pyim 对资源的消耗。
 2. 自动更新功能无法正常工作,用户通过手工从其他机器上拷贝
 dcache 文件的方法让 pyim 正常工作。")
 
-;; ** Dcache 变量处理相关功能
-(cl-defgeneric pyim-dcache-init-variables ()
-  "初始化 dcache 缓存相关变量."
-  nil)
-
-(cl-defmethod pyim-dcache-init-variables :before ()
-  (unless (featurep pyim-dcache-backend)
-    (require pyim-dcache-backend)))
-
+;; ** Dcache 变量初始化相关函数
 (defmacro pyim-dcache-init-variable (variable &optional fallback-value)
   "初始化 VARIABLE.
 
@@ -85,18 +77,22 @@ dcache 文件的方法让 pyim 正常工作。")
                          ,fallback-value
                          (make-hash-table :test #'equal)))))
 
-(defmacro pyim-dcache-reload-variable (variable)
-  "从 `pyim-dcache-directory' 重新读取并设置 VARIABLE 的值."
-  `(when (symbolp ',variable)
-     (setq ,variable (or (pyim-dcache-get-value ',variable)
-                         (make-hash-table :test #'equal)))))
-
 (defun pyim-dcache-get-value (variable)
   "从 `pyim-dcache-directory' 中读取与 VARIABLE 对应的文件中保存的值."
   (let ((file (expand-file-name (url-hexify-string (symbol-name variable))
                                 pyim-dcache-directory)))
     (pyim-dcache-get-value-from-file file)))
 
+(defun pyim-dcache-get-value-from-file (file)
+  "读取保存到 FILE 里面的 value."
+  (when (and (> (length file) 0)
+             (file-exists-p file))
+    (with-temp-buffer
+      (insert-file-contents file)
+      (ignore-errors
+        (read (current-buffer))))))
+
+;; ** Dcache 保存变量相关函数
 (defun pyim-dcache-save-variable (variable value &optional 
auto-backup-threshold)
   "将 VARIABLE 变量的取值保存到 `pyim-dcache-directory' 中对应文件中.
 
@@ -106,14 +102,6 @@ dcache 文件的方法让 pyim 正常工作。")
                                 pyim-dcache-directory)))
     (pyim-dcache-save-value-to-file value file auto-backup-threshold)))
 
-(defun pyim-dcache-value-length (value)
-  "获取 VALUE 的某个可以作为长度的值."
-  (or (ignore-errors
-        (if (hash-table-p value)
-            (hash-table-count value)
-          (length value)))
-      0))
-
 (defun pyim-dcache-save-value-to-file (value file &optional 
auto-backup-threshold)
   "将 VALUE 保存到 FILE 文件中.
 
@@ -144,16 +132,14 @@ AUTO-BACKUP-THRESHOLD 倍, 那么原值将自动备份到 FILE 对应的备份
         (insert ";; End:")
         (pyim-dcache-write-file file)))))
 
-(defun pyim-dcache-get-value-from-file (file)
-  "读取保存到 FILE 里面的 value."
-  (when (and (> (length file) 0)
-             (file-exists-p file))
-    (with-temp-buffer
-      (insert-file-contents file)
-      (ignore-errors
-        (read (current-buffer))))))
+(defun pyim-dcache-value-length (value)
+  "获取 VALUE 的某个可以作为长度的值."
+  (or (ignore-errors
+        (if (hash-table-p value)
+            (hash-table-count value)
+          (length value)))
+      0))
 
-;; ** Dcache 文件处理功能
 (defun pyim-dcache-write-file (filename &optional confirm)
   "A helper function to write dcache files."
   (let ((coding-system-for-write 'utf-8-unix)
@@ -170,30 +156,52 @@ AUTO-BACKUP-THRESHOLD 倍, 那么原值将自动备份到 FILE 对应的备份
     (write-region (point-min) (point-max) filename nil :silent)
     (message "Saving file %s..." filename)))
 
-(cl-defgeneric pyim-dcache-save-caches ()
-  "保存 dcache.
+;; ** Dcache 重新加载变量相关函数
+(defmacro pyim-dcache-reload-variable (variable)
+  "从 `pyim-dcache-directory' 重新读取并设置 VARIABLE 的值."
+  `(when (symbolp ',variable)
+     (setq ,variable (or (pyim-dcache-get-value ',variable)
+                         (make-hash-table :test #'equal)))))
 
-将用户选择过的词生成的缓存和词频缓存的取值
-保存到它们对应的文件中.")
+;; ** Dcache 初始化功能接口
+(cl-defgeneric pyim-dcache-init-variables ()
+  "初始化 dcache 缓存相关变量."
+  nil)
 
-;; ** Dcache 导出功能
-(cl-defgeneric pyim-dcache-export-words-and-counts (file &optional confirm 
ignore-counts)
-  "将个人词条以及词条对应的词频信息导出到文件 FILE.
+(cl-defmethod pyim-dcache-init-variables :before ()
+  (unless (featurep pyim-dcache-backend)
+    (require pyim-dcache-backend)))
 
-如果 FILE 为 nil, 提示用户指定导出文件位置, 如果 CONFIRM 为
-non-nil,文件存在时将会提示用户是否覆盖,默认为覆盖模式")
+;; ** Dcache 检索词条功能接口
+(cl-defgeneric pyim-dcache-get (_code &optional _from)
+  "从 FROM 对应的 dcache 中搜索 CODE, 得到对应的词条.
 
-(cl-defgeneric pyim-dcache-export-personal-words (file &optional confirm)
-  "将用户的个人词条导出为 pyim 词库文件.
+当词库文件加载完成后,pyim 就可以用这个函数从词库缓存中搜索某个
+code 对应的中文词条了."
+  nil)
 
-如果 FILE 为 nil, 提示用户指定导出文件位置, 如果 CONFIRM 为 non-nil,
-文件存在时将会提示用户是否覆盖,默认为覆盖模式。")
+(cl-defmethod pyim-dcache-get :before (_code &optional _from)
+  (unless (featurep pyim-dcache-backend)
+    (require pyim-dcache-backend)))
 
-;; ** Dcache 更新功能
-(cl-defgeneric pyim-dcache-update (&optional force)
-  "读取并加载所有相关词库 dcache.
+;; ** Dcache 代码反查功能接口
+(cl-defgeneric pyim-dcache-search-word-code (word)
+  "从 dcache 中搜索 WROD 对应的 code.")
+
+;; ** Dcache 加词功能接口
+(cl-defgeneric pyim-dcache-insert-word (word code prepend)
+  "将词条 WORD 插入到 dcache 中。
+
+如果 PREPEND 为 non-nil, 词条将放到已有词条的最前面。
+内部函数会根据 CODE 来确定插入对应的 hash key.")
+
+;; ** Dcache 删词功能
+(cl-defgeneric pyim-dcache-delete-word (word)
+  "将中文词条 WORD 从个人词库中删除")
 
-如果 FORCE 为真,强制加载。")
+;; ** Dcache 更新功能接口
+(cl-defgeneric pyim-dcache-update (&optional force)
+  "读取并加载所有相关词库 dcache, 如果 FORCE 为真,强制加载。")
 
 (defun pyim-dcache-create-files-md5 (files)
   "为 FILES 生成 md5 字符串。"
@@ -204,6 +212,7 @@ non-nil,文件存在时将会提示用户是否覆盖,默认为覆盖模式"
                     (list version file (nth 5 (file-attributes file 'string))))
                   files)))))
 
+;; ** Dcache 更新词条统计量功能接口
 (cl-defgeneric pyim-dcache-update-wordcount (word &optional wordcount-handler)
   "保存 WORD 词频.
 
@@ -212,42 +221,35 @@ non-nil,文件存在时将会提示用户是否覆盖,默认为覆盖模式"
 2. 如果 WORDCOUNT-HANDLER 是一个数值:那么这个数值直接作为词频保存。
 3. 如果 WORDCOUNT-HANDLER 为其他值:词频不变.")
 
-;; ** Dcache 加词功能
-(cl-defgeneric pyim-dcache-insert-word (word code prepend)
-  "将词条 WORD 插入到 dcache 中。
-
-如果 PREPEND 为 non-nil, 词条将放到已有词条的最前面。
-内部函数会根据 CODE 来确定插入对应的 hash key.")
-
-;; ** Dcache 升级功能
+;; ** Dcache 升级功能接口
 (cl-defgeneric pyim-dcache-upgrade ()
   "升级词库缓存.")
 
-;; ** Dcache 删词功能
-(cl-defgeneric pyim-dcache-delete-word (word)
-  "将中文词条 WORD 从个人词库中删除")
+;; ** Dcache 排序功能接口
+(cl-defgeneric pyim-dcache-sort-words (words)
+  "对 WORDS 进行排序。"
+  words)
 
-;; ** Dcache 检索功能
-(cl-defgeneric pyim-dcache-get (code &optional from)
-  "从 FROM 对应的 dcache 中搜索 CODE, 得到对应的词条.
+;; ** Dcache 保存功能接口
+(cl-defgeneric pyim-dcache-save-caches ()
+  "保存 dcache.
 
-当词库文件加载完成后,pyim 就可以用这个函数从词库缓存中搜索某个
-code 对应的中文词条了."
-  ;; Fix compile warn
-  (ignore code from)
-  nil)
+将用户选择过的词生成的缓存和词频缓存的取值
+保存到它们对应的文件中.")
 
-(cl-defmethod pyim-dcache-get :before (_code &optional _from)
-  (unless (featurep pyim-dcache-backend)
-    (require pyim-dcache-backend)))
+;; ** Dcache 导出功能接口
+(cl-defgeneric pyim-dcache-export-words-and-counts (file &optional confirm 
ignore-counts)
+  "将个人词条以及词条对应的词频信息导出到文件 FILE.
 
-(cl-defgeneric pyim-dcache-search-word-code (word)
-  "从 dcache 中搜索 WROD 对应的 code.")
+如果 FILE 为 nil, 提示用户指定导出文件位置, 如果 CONFIRM 为
+non-nil,文件存在时将会提示用户是否覆盖,默认为覆盖模式")
+
+(cl-defgeneric pyim-dcache-export-personal-words (file &optional confirm)
+  "将用户的个人词条导出为 pyim 词库文件.
+
+如果 FILE 为 nil, 提示用户指定导出文件位置, 如果 CONFIRM 为 non-nil,
+文件存在时将会提示用户是否覆盖,默认为覆盖模式。")
 
-;; ** Dcache 排序功能
-(cl-defgeneric pyim-dcache-sort-words (words)
-  "对 WORDS 进行排序。"
-  words)
 
 ;; * Footer
 (provide 'pyim-dcache)
diff --git a/pyim-dhashcache.el b/pyim-dhashcache.el
index fc8b43cf06..f07cdf7811 100644
--- a/pyim-dhashcache.el
+++ b/pyim-dhashcache.el
@@ -79,6 +79,161 @@
 (defvar pyim-dhashcache-update-iword2priority-p nil)
 (defvar pyim-dhashcache-update-code2word-running-p nil)
 
+;; ** 初始化 dhashcache 相关函数
+(cl-defmethod pyim-dcache-init-variables
+  (&context (pyim-dcache-backend (eql pyim-dhashcache)))
+  "初始化 dcache 缓存相关变量."
+  (when (and (not pyim-dhashcache-icode2word)
+             pyim-dcache-directory
+             (file-directory-p pyim-dcache-directory)
+             (directory-files pyim-dcache-directory nil "-backup-"))
+    (message "PYIM: 在 %S 目录中发现备份文件的存在,可能是词库缓存文件损坏导致,请抓紧检查处理!!!"
+             pyim-dcache-directory))
+  (pyim-dhashcache-init-count-and-priority-variables)
+  (pyim-dcache-init-variable pyim-dhashcache-code2word)
+  (pyim-dcache-init-variable pyim-dhashcache-word2code)
+  (pyim-dcache-init-variable pyim-dhashcache-shortcode2word)
+  (pyim-dcache-init-variable pyim-dhashcache-icode2word)
+  (pyim-dcache-init-variable pyim-dhashcache-ishortcode2word))
+
+(defun pyim-dhashcache-init-count-and-priority-variables ()
+  "初始化 count 相关的变量。"
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count)
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count-log)
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count-recent-10-words)
+  (pyim-dcache-init-variable pyim-dhashcache-iword2count-recent-50-words)
+  (pyim-dcache-init-variable pyim-dhashcache-iword2priority))
+
+;; ** 从 dhashcache 搜索词条相关函数
+(cl-defmethod pyim-dcache-get
+  (code &context (pyim-dcache-backend (eql pyim-dhashcache))
+        &optional from)
+  "从 FROM 对应的 dcaches 中搜索 CODE, 得到对应的词条.
+
+当词库文件加载完成后,pyim 就可以用这个函数从词库缓存中搜索某个
+code 对应的中文词条了。
+
+如果 FROM 为 nil, 则默认搜索 `pyim-dhashcache-icode2word' 和
+`pyim-dhashcache-code2word' 两个 dcache."
+  (when code
+    (let* ((caches (mapcar (lambda (x)
+                             (intern (concat "pyim-dhashcache-" (symbol-name 
x))))
+                           (or (and from
+                                    (if (listp from)
+                                        from
+                                      (list from)))
+                               '(icode2word code2word))))
+           result)
+      (dolist (cache caches)
+        (let* ((cache (ignore-errors (symbol-value cache)))
+               (value (and cache (gethash code cache))))
+          ;; 处理 iword2count.
+          (unless (listp value)
+            (setq value (list value)))
+          (when value
+            (setq result (append result value)))))
+      result)))
+
+;; ** 从 dhashcache 搜索代码相关函数
+(cl-defmethod pyim-dcache-search-word-code
+  (string &context (pyim-dcache-backend (eql pyim-dhashcache)))
+  (gethash string pyim-dhashcache-word2code))
+
+;; ** 给 dhashcache 添加词条相关函数
+(cl-defmethod pyim-dcache-insert-word
+  (word code prepend
+        &context (pyim-dcache-backend (eql pyim-dhashcache)))
+  "将词条 WORD 插入到下面两个词库缓存中。
+
+1. `pyim-dhashcache-icode2word'
+2. `pyim-dhashcache-insert-word-into-ishortcode2word'."
+  (pyim-dhashcache-insert-word-into-icode2word word code prepend)
+  ;; NOTE: 保存词条到 icode2word 词库缓存的同时,也在 ishortcode2word 词库缓存中
+  ;; 临时写入一份,供当前 Emacs session 使用,但退出时 pyim 不会保存
+  ;; ishortcode2word 词库缓存到文件,因为下次启动 Emacs 的时候,ishortcode2word
+  ;; 词库缓存会从 icode2word 再次重建。
+  (pyim-dhashcache-insert-word-into-ishortcode2word word code prepend))
+
+(defmacro pyim-dhashcache-put (cache code &rest body)
+  "将 BODY 的返回值保存到 CACHE 对应的 CODE 中。
+
+注意事项:这个宏是一个指代宏,其中 orig-value 在这个宏中有特殊含
+义,代表原来 code 对应的取值。"
+  (declare (indent 0))
+  (let ((key (make-symbol "key"))
+        (table (make-symbol "table"))
+        (new-value (make-symbol "new-value")))
+    `(let* ((,key ,code)
+            (,table ,cache)
+            (orig-value (gethash ,key ,table))
+            ,new-value)
+       (setq ,new-value (progn ,@body))
+       (puthash ,key ,new-value ,table))))
+
+(defun pyim-dhashcache-insert-word-into-icode2word (word code prepend)
+  "将词条 WORD 插入到 icode2word 词库缓存 CODE 键对应的位置.
+
+默认 WORD 放到已有词条的最后,如果 PREPEND 为 non-nil, WORD 将放
+到已有词条的最前面。"
+  (pyim-dhashcache-put
+    pyim-dhashcache-icode2word code
+    (if prepend
+        `(,word ,@(remove word orig-value))
+      `(,@(remove word orig-value) ,word))))
+
+(defun pyim-dhashcache-insert-word-into-ishortcode2word (word code prepend)
+  "将词条 WORD 插入到 ishortcode2word 词库缓存 CODE 首字母字符串对应的位置.
+
+默认 WORD 放到已有词条的最后,如果 PREPEND 为 non-nil, WORD 将放
+到已有词条的最前面。"
+  (dolist (newcode (pyim-dhashcache-get-ishortcodes code))
+    (pyim-dhashcache-put
+      pyim-dhashcache-ishortcode2word
+      newcode
+      (if prepend
+          `(,word ,@(remove word orig-value))
+        `(,@(remove word orig-value) ,word)))))
+
+(defun pyim-dhashcache-get-ishortcodes (code)
+  "获取CODE 所有的简写 ishortcodes.
+
+比如: ni-hao -> (n-h)
+
+注意事项:这个函数用于全拼输入法。"
+  (when (and (> (length code) 0)
+             (not (pyim-string-match-p "/" code))
+             (not (pyim-string-match-p "[^a-z-]" code)))
+    (list (mapconcat
+           (lambda (x)
+             (substring x 0 1))
+           (split-string code "-") "-"))))
+
+;; ** 从 dhashcache 删除词条相关函数
+(cl-defmethod pyim-dcache-delete-word
+  (word &context (pyim-dcache-backend (eql pyim-dhashcache)))
+  "将中文词条 WORD 从个人词库中删除"
+  (maphash
+   (lambda (key value)
+     (when (member word value)
+       (let ((new-value (remove word value)))
+         (if new-value
+             (puthash key new-value pyim-dhashcache-icode2word)
+           (remhash key pyim-dhashcache-icode2word)))))
+   pyim-dhashcache-icode2word)
+  (maphash
+   (lambda (key value)
+     (when (member word value)
+       (print value)
+       (let ((new-value (remove word value)))
+         (if new-value
+             (puthash key new-value pyim-dhashcache-ishortcode2word)
+           (remhash key pyim-dhashcache-ishortcode2word)))))
+   pyim-dhashcache-ishortcode2word)
+  (remhash word pyim-dhashcache-iword2count)
+  (remhash word pyim-dhashcache-iword2count-log)
+  (remhash word pyim-dhashcache-iword2priority))
+
+;; ** 更新 dhashcache 相关函数
 (cl-defmethod pyim-dcache-update
   (&context (pyim-dcache-backend (eql pyim-dhashcache)) &optional force)
   "读取并加载所有相关词库 dcache.
@@ -92,23 +247,57 @@
            (dicts-md5 (pyim-dcache-create-files-md5 dict-files)))
       (pyim-dhashcache-update-code2word dict-files dicts-md5 force))))
 
-(cl-defmethod pyim-dcache-sort-words
-  (words-list &context (pyim-dcache-backend (eql pyim-dhashcache)))
-  "对 WORDS-LIST 排序"
-  (let ((iword2count pyim-dhashcache-iword2count)
-        (iword2priority pyim-dhashcache-iword2priority))
-    (sort words-list
-          (lambda (a b)
-            (let ((p1 (gethash a iword2priority))
-                  (p2 (gethash b iword2priority)))
-              (cond
-               ((and (listp p1)
-                     (listp p2)
-                     (not (equal p1 p2)))
-                (pyim-numbers> p1 p2))
-               (t (let ((n1 (or (gethash a iword2count) 0))
-                        (n2 (or (gethash b iword2count) 0)))
-                    (> n1 n2)))))))))
+(defun pyim-dhashcache-update-iword2priority (&optional force)
+  "更新词条优先级表,如果 FORCE 为真,强制更新。"
+  (interactive)
+  (when (or force (not pyim-dhashcache-update-iword2priority-p))
+    ;; NOTE: 这个变量按理说应该在回调函数里面设置,但 async 在某些情况下会卡死,
+    ;; 这个变量无法设置为 t, 导致后续产生大量的 emacs 进程,极其影响性能。
+    (setq pyim-dhashcache-update-iword2priority-p t)
+    (async-start
+     `(lambda ()
+        ,@(pyim-dhashcache-async-inject-variables)
+        (require 'pyim-dhashcache)
+        (pyim-dhashcache-init-count-and-priority-variables)
+        (maphash
+         (lambda (key value)
+           (puthash key
+                    (pyim-dhashcache-calculate-priority
+                     (pyim-dhashcache-get-counts-from-log
+                      value))
+                    pyim-dhashcache-iword2priority))
+         pyim-dhashcache-iword2count-log)
+        (pyim-dcache-save-variable
+         'pyim-dhashcache-iword2priority
+         pyim-dhashcache-iword2priority)
+        nil)
+     (lambda (_)
+       (pyim-dcache-reload-variable pyim-dhashcache-iword2priority)))))
+
+(defun pyim-dhashcache-async-inject-variables ()
+  "pyim's async-inject-variables."
+  (list (async-inject-variables "^load-path$")
+        (async-inject-variables "^exec-path$")
+        (async-inject-variables "^pyim-.+?directory$")))
+
+(defun pyim-dhashcache-calculate-priority (counts-info)
+  "根据 COUNTS-INFO 计算优先级(优先级是多个数字组成的一个列表),
+用于对词条进行排序。COUNTS-INFO 是一个 alist, 其结构类似:
+
+      ((day n1 n2 n3 ...))
+
+其中 (n1 n2 n3 ...) 代表从当前日期逐日倒推,每日 count 所组成的列表。"
+  (mapcar (lambda (x)
+            (let* ((label (car x))
+                   (plist (cdr x))
+                   (weights (plist-get plist :weights))
+                   (factor (plist-get plist :factor)))
+              (round (* (apply #'+ (cl-mapcar (lambda (a b)
+                                                (* (or a 0) b))
+                                              (cdr (assoc label counts-info))
+                                              weights))
+                        factor))))
+          pyim-dhashcache-count-types))
 
 (defun pyim-dhashcache-get-counts-from-log (log-info &optional time)
   "从 LOG-INFO 中获取所有的 count 值。
@@ -133,64 +322,37 @@
               `(,label ,@(reverse output))))
           pyim-dhashcache-count-types))
 
-(defun pyim-dhashcache-calculate-priority (counts-info)
-  "根据 COUNTS-INFO 计算优先级(优先级是多个数字组成的一个列表),
-用于对词条进行排序。COUNTS-INFO 是一个 alist, 其结构类似:
-
-      ((day n1 n2 n3 ...))
-
-其中 (n1 n2 n3 ...) 代表从当前日期逐日倒推,每日 count 所组成的列表。"
-  (mapcar (lambda (x)
-            (let* ((label (car x))
-                   (plist (cdr x))
-                   (weights (plist-get plist :weights))
-                   (factor (plist-get plist :factor)))
-              (round (* (apply #'+ (cl-mapcar (lambda (a b)
-                                                (* (or a 0) b))
-                                              (cdr (assoc label counts-info))
-                                              weights))
-                        factor))))
-          pyim-dhashcache-count-types))
-
-(defun pyim-dhashcache-get-shortcodes (code)
-  "获取 CODE 所有的 shortcodes.
-
-比如:wubi/aaaa -> (wubi/aaa wubi/aa)
-
-注意事项:这个函数目前只用于五笔等型码输入法,不用于拼音输入法,
-因为拼音输入法词库太大,这样处理之后,会生成一个特别大的哈希表,
-占用太多内存资源,拼音输入法使用 ishortcode 机制。"
-  (when (and (pyim-string-match-p "/" code)
-             (not (pyim-string-match-p "-" code)))
-    (let* ((x (split-string code "/"))
-           (prefix (concat (nth 0 x) "/"))
-           (code1 (nth 1 x))
-           (n (length code1))
-           results)
-      (dotimes (i n)
-        (when (> i 1)
-          (push (concat prefix (substring code1 0 i)) results)))
-      results)))
-
-(defun pyim-dhashcache-get-ishortcodes (code)
-  "获取CODE 所有的简写 ishortcodes.
-
-比如: ni-hao -> (n-h)
+(defun pyim-dhashcache-update-personal-words (&optional force)
+  (pyim-dhashcache-update-icode2word force))
 
-注意事项:这个函数用于全拼输入法。"
-  (when (and (> (length code) 0)
-             (not (pyim-string-match-p "/" code))
-             (not (pyim-string-match-p "[^a-z-]" code)))
-    (list (mapconcat
-           (lambda (x)
-             (substring x 0 1))
-           (split-string code "-") "-"))))
+(defun pyim-dhashcache-update-icode2word (&optional force)
+  "对 personal 缓存中的词条进行排序,加载排序后的结果.
 
-(defun pyim-dhashcache-async-inject-variables ()
-  "pyim's async-inject-variables."
-  (list (async-inject-variables "^load-path$")
-        (async-inject-variables "^exec-path$")
-        (async-inject-variables "^pyim-.+?directory$")))
+在这个过程中使用了 `pyim-dhashcache-iword2count' 中记录的词频信息。
+如果 FORCE 为真,强制排序。"
+  (interactive)
+  (when (or force (not pyim-dhashcache-update-icode2word-p))
+    ;; NOTE: 这个变量按理说应该在回调函数里面设置,但 async 在某些情况下会卡死,
+    ;; 这个变量无法设置为 t, 导致后续产生大量的 emacs 进程,极其影响性能。
+    (setq pyim-dhashcache-update-icode2word-p t)
+    (async-start
+     `(lambda ()
+        ,@(pyim-dhashcache-async-inject-variables)
+        (require 'pyim-dhashcache)
+        (pyim-dcache-init-variable pyim-dhashcache-icode2word)
+        (pyim-dhashcache-init-count-and-priority-variables)
+        (maphash
+         (lambda (key value)
+           (puthash key (pyim-dcache-sort-words value)
+                    pyim-dhashcache-icode2word))
+         pyim-dhashcache-icode2word)
+        (pyim-dcache-save-variable
+         'pyim-dhashcache-icode2word
+         pyim-dhashcache-icode2word)
+        nil)
+     (lambda (_)
+       (pyim-dcache-reload-variable pyim-dhashcache-icode2word)
+       (pyim-dhashcache-update-ishortcode2word force)))))
 
 (defun pyim-dhashcache-update-ishortcode2word (&optional force)
   "读取 `pyim-dhashcache-icode2word' 中的词库,创建 *简拼* 缓存,然后加载这个缓存.
@@ -233,54 +395,62 @@
      ishortcode2word)
     ishortcode2word))
 
-(defun pyim-dhashcache-update-shortcode2word (&optional force)
-  "使用 `pyim-dhashcache-code2word' 中的词条,创建简写 code 词库缓存并加载.
+(defun pyim-dhashcache-update-code2word (dict-files dicts-md5 &optional force)
+  "读取并加载词库.
 
-如果 FORCE 为真,强制运行。"
+读取词库文件 DICT-FILES,生成对应的词库缓冲文件,然后加载词库缓存。
+
+如果 FORCE 为真,强制加载。"
   (interactive)
-  (when (or force (not pyim-dhashcache-update-shortcode2word-p))
-    ;; NOTE: 这个变量按理说应该在回调函数里面设置,但 async 在某些情况下会卡死,
-    ;; 这个变量无法设置为 t, 导致后续产生大量的 emacs 进程,极其影响性能。
-    (setq pyim-dhashcache-update-shortcode2word-p t)
-    (async-start
-     `(lambda ()
-        ,@(pyim-dhashcache-async-inject-variables)
-        (require 'pyim-dhashcache)
-        (pyim-dcache-init-variable pyim-dhashcache-code2word)
-        (pyim-dhashcache-init-count-and-priority-variables)
-        (pyim-dcache-save-variable
-         'pyim-dhashcache-shortcode2word
-         (pyim-dhashcache-update-shortcode2word-1
-          pyim-dhashcache-code2word)))
-     (lambda (_)
-       (pyim-dcache-reload-variable pyim-dhashcache-shortcode2word)))))
+  (let* ((code2word-file (pyim-dhashcache-get-path 'pyim-dhashcache-code2word))
+         (word2code-file (pyim-dhashcache-get-path 'pyim-dhashcache-word2code))
+         (code2word-md5-file (pyim-dhashcache-get-path 
'pyim-dhashcache-code2word-md5)))
+    (when (or force (and (not (equal dicts-md5 
(pyim-dcache-get-value-from-file code2word-md5-file)))
+                         (not pyim-dhashcache-update-code2word-running-p)))
+      (setq pyim-dhashcache-update-code2word-running-p t)
+      ;; use hashtable
+      (async-start
+       `(lambda ()
+          ,@(pyim-dhashcache-async-inject-variables)
+          (require 'pyim-dhashcache)
+          (let ((dcache (pyim-dhashcache-generate-dcache-file ',dict-files 
,code2word-file)))
+            (pyim-dhashcache-generate-word2code-dcache-file dcache 
,word2code-file))
+          (pyim-dcache-save-value-to-file ',dicts-md5 ,code2word-md5-file))
+       (lambda (_)
+         (pyim-dcache-reload-variable pyim-dhashcache-code2word)
+         (pyim-dcache-reload-variable pyim-dhashcache-word2code)
+         (pyim-dhashcache-update-shortcode2word force)
+         (setq pyim-dhashcache-update-code2word-running-p nil))))))
 
-(defun pyim-dhashcache-update-shortcode2word-1 (code2word)
-  "`pyim-dhashcache-update-shortcode2word' 的内部函数"
-  (let ((shortcode2word (make-hash-table :test #'equal)))
-    (maphash
-     (lambda (key value)
-       (dolist (x (pyim-dhashcache-get-shortcodes key))
-         (puthash x
-                  (mapcar
-                   (lambda (word)
-                     ;; 这个地方的代码用于实现五笔 code 自动提示功能,
-                     ;; 比如输入 'aa' 后得到选词框:
-                     ;; ----------------------
-                     ;; | 1. 莁aa 2.匶wv ... |
-                     ;; ----------------------
-                     (if (get-text-property 0 :comment word)
-                         word
-                       (propertize word :comment (substring key (length x)))))
-                   (delete-dups `(,@(gethash x shortcode2word) ,@value)))
-                  shortcode2word)))
-     code2word)
-    (maphash
-     (lambda (key value)
-       (puthash key (pyim-dcache-sort-words value)
-                shortcode2word))
-     shortcode2word)
-    shortcode2word))
+(defun pyim-dhashcache-generate-word2code-dcache-file (dcache file)
+  "从 DCACHE 生成一个 word -> code 的反向查询表.
+DCACHE 是一个 code -> words 的 hashtable.
+并将生成的表保存到 FILE 中."
+  (when (hash-table-p dcache)
+    (let ((hashtable (make-hash-table :size 1000000 :test #'equal)))
+      (maphash
+       (lambda (code words)
+         ;; 这里主要考虑五笔仓颉等形码输入法,也就是 code-prefix 中包含 "/" 的输
+         ;; 入法,全拼输入法反查功能主要使用 pymap 实现,不使用这个表。
+         (when (pyim-string-match-p "/" code)
+           (dolist (word words)
+             (let ((value (gethash word hashtable))
+                   ;; NOTE: 这里使用 `cl-copy-seq', 可以让保存的文件内容类似:
+                   ;;
+                   ;;   "呵" ("he" "a")
+                   ;;
+                   ;; 而不是:
+                   ;;
+                   ;;   "呵" (#9="he" #2#)
+                   ;;
+                   (code (cl-copy-seq code)))
+               (puthash word
+                        (if value
+                            `(,code ,@value)
+                          (list code))
+                        hashtable)))))
+       dcache)
+      (pyim-dcache-save-value-to-file hashtable file))))
 
 (defun pyim-dhashcache-get-path (variable)
   "获取保存 VARIABLE 取值的文件的路径."
@@ -318,263 +488,76 @@ pyim 使用的词库文件是简单的文本文件,编码 *强制* 为 \\='utf
     (pyim-dcache-save-value-to-file hashtable dcache-file)
     hashtable))
 
-(defun pyim-dhashcache-generate-word2code-dcache-file (dcache file)
-  "从 DCACHE 生成一个 word -> code 的反向查询表.
-DCACHE 是一个 code -> words 的 hashtable.
-并将生成的表保存到 FILE 中."
-  (when (hash-table-p dcache)
-    (let ((hashtable (make-hash-table :size 1000000 :test #'equal)))
-      (maphash
-       (lambda (code words)
-         ;; 这里主要考虑五笔仓颉等形码输入法,也就是 code-prefix 中包含 "/" 的输
-         ;; 入法,全拼输入法反查功能主要使用 pymap 实现,不使用这个表。
-         (when (pyim-string-match-p "/" code)
-           (dolist (word words)
-             (let ((value (gethash word hashtable))
-                   ;; NOTE: 这里使用 `cl-copy-seq', 可以让保存的文件内容类似:
-                   ;;
-                   ;;   "呵" ("he" "a")
-                   ;;
-                   ;; 而不是:
-                   ;;
-                   ;;   "呵" (#9="he" #2#)
-                   ;;
-                   (code (cl-copy-seq code)))
-               (puthash word
-                        (if value
-                            `(,code ,@value)
-                          (list code))
-                        hashtable)))))
-       dcache)
-      (pyim-dcache-save-value-to-file hashtable file))))
-
-(defun pyim-dhashcache-update-code2word (dict-files dicts-md5 &optional force)
-  "读取并加载词库.
-
-读取词库文件 DICT-FILES,生成对应的词库缓冲文件,然后加载词库缓存。
-
-如果 FORCE 为真,强制加载。"
-  (interactive)
-  (let* ((code2word-file (pyim-dhashcache-get-path 'pyim-dhashcache-code2word))
-         (word2code-file (pyim-dhashcache-get-path 'pyim-dhashcache-word2code))
-         (code2word-md5-file (pyim-dhashcache-get-path 
'pyim-dhashcache-code2word-md5)))
-    (when (or force (and (not (equal dicts-md5 
(pyim-dcache-get-value-from-file code2word-md5-file)))
-                         (not pyim-dhashcache-update-code2word-running-p)))
-      (setq pyim-dhashcache-update-code2word-running-p t)
-      ;; use hashtable
-      (async-start
-       `(lambda ()
-          ,@(pyim-dhashcache-async-inject-variables)
-          (require 'pyim-dhashcache)
-          (let ((dcache (pyim-dhashcache-generate-dcache-file ',dict-files 
,code2word-file)))
-            (pyim-dhashcache-generate-word2code-dcache-file dcache 
,word2code-file))
-          (pyim-dcache-save-value-to-file ',dicts-md5 ,code2word-md5-file))
-       (lambda (_)
-         (pyim-dcache-reload-variable pyim-dhashcache-code2word)
-         (pyim-dcache-reload-variable pyim-dhashcache-word2code)
-         (pyim-dhashcache-update-shortcode2word force)
-         (setq pyim-dhashcache-update-code2word-running-p nil))))))
-
-(defun pyim-dhashcache-export (dcache file &optional confirm)
-  "将一个 pyim DCACHE 导出为文件 FILE.
-
-如果 CONFIRM 为 non-nil,文件存在时将会提示用户是否覆盖,
-默认为覆盖模式"
-  (with-temp-buffer
-    (insert ";;; -*- coding: utf-8-unix -*-\n")
-    (maphash
-     (lambda (key value)
-       (let ((value (cl-remove-if
-                     (lambda (x)
-                       ;; 如果某个词条的 text 属性 :noexport 设置为 t, 在导出的
-                       ;; 时候自动忽略这个词条。
-                       (and (stringp x)
-                            (get-text-property 0 :noexport x)))
-                     (if (listp value)
-                         value
-                       (list value)))))
-         (when value
-           (insert (format "%s %s\n" key (mapconcat #'identity value " "))))))
-     dcache)
-    (pyim-dcache-write-file file confirm)))
-
-(cl-defmethod pyim-dcache-get
-  (code &context (pyim-dcache-backend (eql pyim-dhashcache))
-        &optional from)
-  "从 FROM 对应的 dcaches 中搜索 CODE, 得到对应的词条.
-
-当词库文件加载完成后,pyim 就可以用这个函数从词库缓存中搜索某个
-code 对应的中文词条了。
-
-如果 FROM 为 nil, 则默认搜索 `pyim-dhashcache-icode2word' 和
-`pyim-dhashcache-code2word' 两个 dcache."
-  (when code
-    (let* ((caches (mapcar (lambda (x)
-                             (intern (concat "pyim-dhashcache-" (symbol-name 
x))))
-                           (or (and from
-                                    (if (listp from)
-                                        from
-                                      (list from)))
-                               '(icode2word code2word))))
-           result)
-      (dolist (cache caches)
-        (let* ((cache (ignore-errors (symbol-value cache)))
-               (value (and cache (gethash code cache))))
-          ;; 处理 iword2count.
-          (unless (listp value)
-            (setq value (list value)))
-          (when value
-            (setq result (append result value)))))
-      result)))
-
-(defun pyim-dhashcache-update-icode2word (&optional force)
-  "对 personal 缓存中的词条进行排序,加载排序后的结果.
+(defun pyim-dhashcache-update-shortcode2word (&optional force)
+  "使用 `pyim-dhashcache-code2word' 中的词条,创建简写 code 词库缓存并加载.
 
-在这个过程中使用了 `pyim-dhashcache-iword2count' 中记录的词频信息。
-如果 FORCE 为真,强制排序。"
+如果 FORCE 为真,强制运行。"
   (interactive)
-  (when (or force (not pyim-dhashcache-update-icode2word-p))
+  (when (or force (not pyim-dhashcache-update-shortcode2word-p))
     ;; NOTE: 这个变量按理说应该在回调函数里面设置,但 async 在某些情况下会卡死,
     ;; 这个变量无法设置为 t, 导致后续产生大量的 emacs 进程,极其影响性能。
-    (setq pyim-dhashcache-update-icode2word-p t)
+    (setq pyim-dhashcache-update-shortcode2word-p t)
     (async-start
      `(lambda ()
         ,@(pyim-dhashcache-async-inject-variables)
         (require 'pyim-dhashcache)
-        (pyim-dcache-init-variable pyim-dhashcache-icode2word)
+        (pyim-dcache-init-variable pyim-dhashcache-code2word)
         (pyim-dhashcache-init-count-and-priority-variables)
-        (maphash
-         (lambda (key value)
-           (puthash key (pyim-dcache-sort-words value)
-                    pyim-dhashcache-icode2word))
-         pyim-dhashcache-icode2word)
         (pyim-dcache-save-variable
-         'pyim-dhashcache-icode2word
-         pyim-dhashcache-icode2word)
-        nil)
+         'pyim-dhashcache-shortcode2word
+         (pyim-dhashcache-update-shortcode2word-1
+          pyim-dhashcache-code2word)))
      (lambda (_)
-       (pyim-dcache-reload-variable pyim-dhashcache-icode2word)
-       (pyim-dhashcache-update-ishortcode2word force)))))
-
-(cl-defmethod pyim-dcache-upgrade (&context (pyim-dcache-backend (eql 
pyim-dhashcache)))
-  "升级词库缓存.
-
-当前已有的功能:
-1. 基于 :code-prefix-history 信息,升级为新的 code-prefix。"
-  (pyim-dhashcache-upgrade-icode2word))
-
-(defun pyim-dhashcache-upgrade-icode2word ()
-  "升级 icode2word 缓存。"
-  (let ((delete-old-key-p (yes-or-no-p "Delete old key after upgrade? "))
-        (ruler-list (delete-dups
-                     (remove nil
-                             (mapcar
-                              (lambda (scheme)
-                                (let ((code-prefix (plist-get (cdr scheme) 
:code-prefix))
-                                      (code-prefix-history (plist-get (cdr 
scheme) :code-prefix-history)))
-                                  (when code-prefix-history
-                                    (cons code-prefix-history code-prefix))))
-                              pyim-schemes)))))
-    (dolist (ruler ruler-list)
-      (let ((old-prefix-list (car ruler))
-            (new-prefix (cdr ruler)))
-        (dolist (old-prefix old-prefix-list)
-          (maphash
-           (lambda (key _value)
-             (when (string-prefix-p old-prefix key)
-               (let* ((key-words (gethash key pyim-dhashcache-icode2word))
-                      (new-key (concat new-prefix (string-remove-prefix 
old-prefix key)))
-                      (new-key-words (gethash new-key 
pyim-dhashcache-icode2word))
-                      (merged-value (delete-dups `(,@new-key-words 
,@key-words))))
-                 (puthash new-key merged-value pyim-dhashcache-icode2word)
-                 (message "PYIM icode2word upgrade: %S %S -> %S %S" key 
key-words new-key merged-value)
-                 (when delete-old-key-p
-                   (remhash key pyim-dhashcache-icode2word)
-                   (message "PYIM icode2word upgrade: %S has been deleted." 
key)))))
-           pyim-dhashcache-icode2word))))))
-
-(defun pyim-dhashcache-update-personal-words (&optional force)
-  (pyim-dhashcache-update-icode2word force))
-
-(cl-defmethod pyim-dcache-init-variables
-  (&context (pyim-dcache-backend (eql pyim-dhashcache)))
-  "初始化 dcache 缓存相关变量."
-  (when (and (not pyim-dhashcache-icode2word)
-             pyim-dcache-directory
-             (file-directory-p pyim-dcache-directory)
-             (directory-files pyim-dcache-directory nil "-backup-"))
-    (message "PYIM: 在 %S 目录中发现备份文件的存在,可能是词库缓存文件损坏导致,请抓紧检查处理!!!"
-             pyim-dcache-directory))
-  (pyim-dhashcache-init-count-and-priority-variables)
-  (pyim-dcache-init-variable pyim-dhashcache-code2word)
-  (pyim-dcache-init-variable pyim-dhashcache-word2code)
-  (pyim-dcache-init-variable pyim-dhashcache-shortcode2word)
-  (pyim-dcache-init-variable pyim-dhashcache-icode2word)
-  (pyim-dcache-init-variable pyim-dhashcache-ishortcode2word))
-
-(defun pyim-dhashcache-init-count-and-priority-variables ()
-  "初始化 count 相关的变量。"
-  (pyim-dcache-init-variable pyim-dhashcache-iword2count)
-  (pyim-dcache-init-variable pyim-dhashcache-iword2count-log)
-  (pyim-dcache-init-variable pyim-dhashcache-iword2count-recent-10-words)
-  (pyim-dcache-init-variable pyim-dhashcache-iword2count-recent-50-words)
-  (pyim-dcache-init-variable pyim-dhashcache-iword2priority))
-
-(cl-defmethod pyim-dcache-save-caches
-  (&context (pyim-dcache-backend (eql pyim-dhashcache)))
-  (pyim-dhashcache-save-personal-dcache-to-file))
+       (pyim-dcache-reload-variable pyim-dhashcache-shortcode2word)))))
 
-(defun pyim-dhashcache-save-personal-dcache-to-file ()
-  ;; 用户选择过的词
-  (pyim-dcache-save-variable
-   'pyim-dhashcache-icode2word
-   pyim-dhashcache-icode2word 0.8)
-  ;; 词条总 count
-  (pyim-dcache-save-variable
-   'pyim-dhashcache-iword2count
-   pyim-dhashcache-iword2count 0.8)
-  ;; 词条 count 日志
-  (pyim-dcache-save-variable
-   'pyim-dhashcache-iword2count-log
-   pyim-dhashcache-iword2count-log 0.8)
-  ;; 词条优先级
-  (pyim-dcache-save-variable
-   'pyim-dhashcache-iword2priority
-   pyim-dhashcache-iword2priority 0.8))
+(defun pyim-dhashcache-update-shortcode2word-1 (code2word)
+  "`pyim-dhashcache-update-shortcode2word' 的内部函数"
+  (let ((shortcode2word (make-hash-table :test #'equal)))
+    (maphash
+     (lambda (key value)
+       (dolist (x (pyim-dhashcache-get-shortcodes key))
+         (puthash x
+                  (mapcar
+                   (lambda (word)
+                     ;; 这个地方的代码用于实现五笔 code 自动提示功能,
+                     ;; 比如输入 'aa' 后得到选词框:
+                     ;; ----------------------
+                     ;; | 1. 莁aa 2.匶wv ... |
+                     ;; ----------------------
+                     (if (get-text-property 0 :comment word)
+                         word
+                       (propertize word :comment (substring key (length x)))))
+                   (delete-dups `(,@(gethash x shortcode2word) ,@value)))
+                  shortcode2word)))
+     code2word)
+    (maphash
+     (lambda (key value)
+       (puthash key (pyim-dcache-sort-words value)
+                shortcode2word))
+     shortcode2word)
+    shortcode2word))
 
-(defmacro pyim-dhashcache-put (cache code &rest body)
-  "将 BODY 的返回值保存到 CACHE 对应的 CODE 中。
+(defun pyim-dhashcache-get-shortcodes (code)
+  "获取 CODE 所有的 shortcodes.
 
-注意事项:这个宏是一个指代宏,其中 orig-value 在这个宏中有特殊含
-义,代表原来 code 对应的取值。"
-  (declare (indent 0))
-  (let ((key (make-symbol "key"))
-        (table (make-symbol "table"))
-        (new-value (make-symbol "new-value")))
-    `(let* ((,key ,code)
-            (,table ,cache)
-            (orig-value (gethash ,key ,table))
-            ,new-value)
-       (setq ,new-value (progn ,@body))
-       (puthash ,key ,new-value ,table))))
+比如:wubi/aaaa -> (wubi/aaa wubi/aa)
 
-(defun pyim-dhashcache-update-iword2count-recent (word n hash-table)
-  (let (words-need-remove)
-    (pyim-dhashcache-put
-      hash-table :all-words
-      (setq orig-value (remove word orig-value))
-      (push word orig-value)
-      (if (<= (length orig-value) n)
-          orig-value
-        (setq words-need-remove (nthcdr n orig-value))
-        (cl-subseq orig-value 0 n)))
-    (dolist (w words-need-remove)
-      (remhash w hash-table))
-    (pyim-dhashcache-put
-      hash-table word
-      (+ (or orig-value 0) 1))
-    hash-table))
+注意事项:这个函数目前只用于五笔等型码输入法,不用于拼音输入法,
+因为拼音输入法词库太大,这样处理之后,会生成一个特别大的哈希表,
+占用太多内存资源,拼音输入法使用 ishortcode 机制。"
+  (when (and (pyim-string-match-p "/" code)
+             (not (pyim-string-match-p "-" code)))
+    (let* ((x (split-string code "/"))
+           (prefix (concat (nth 0 x) "/"))
+           (code1 (nth 1 x))
+           (n (length code1))
+           results)
+      (dotimes (i n)
+        (when (> i 1)
+          (push (concat prefix (substring code1 0 i)) results)))
+      results)))
 
+;; ** 更新 dhashcache 词条计数
 (cl-defmethod pyim-dcache-update-wordcount
   (word &context (pyim-dcache-backend (eql pyim-dhashcache))
         &optional wordcount-handler)
@@ -625,99 +608,105 @@ code 对应的中文词条了。
      (pyim-dhashcache-get-counts-from-log
       (gethash word pyim-dhashcache-iword2count-log)))))
 
-(defun pyim-dhashcache-update-iword2priority (&optional force)
-  "更新词条优先级表,如果 FORCE 为真,强制更新。"
-  (interactive)
-  (when (or force (not pyim-dhashcache-update-iword2priority-p))
-    ;; NOTE: 这个变量按理说应该在回调函数里面设置,但 async 在某些情况下会卡死,
-    ;; 这个变量无法设置为 t, 导致后续产生大量的 emacs 进程,极其影响性能。
-    (setq pyim-dhashcache-update-iword2priority-p t)
-    (async-start
-     `(lambda ()
-        ,@(pyim-dhashcache-async-inject-variables)
-        (require 'pyim-dhashcache)
-        (pyim-dhashcache-init-count-and-priority-variables)
-        (maphash
-         (lambda (key value)
-           (puthash key
-                    (pyim-dhashcache-calculate-priority
-                     (pyim-dhashcache-get-counts-from-log
-                      value))
-                    pyim-dhashcache-iword2priority))
-         pyim-dhashcache-iword2count-log)
-        (pyim-dcache-save-variable
-         'pyim-dhashcache-iword2priority
-         pyim-dhashcache-iword2priority)
-        nil)
-     (lambda (_)
-       (pyim-dcache-reload-variable pyim-dhashcache-iword2priority)))))
-
-(cl-defmethod pyim-dcache-delete-word
-  (word &context (pyim-dcache-backend (eql pyim-dhashcache)))
-  "将中文词条 WORD 从个人词库中删除"
-  (maphash
-   (lambda (key value)
-     (when (member word value)
-       (let ((new-value (remove word value)))
-         (if new-value
-             (puthash key new-value pyim-dhashcache-icode2word)
-           (remhash key pyim-dhashcache-icode2word)))))
-   pyim-dhashcache-icode2word)
-  (maphash
-   (lambda (key value)
-     (when (member word value)
-       (print value)
-       (let ((new-value (remove word value)))
-         (if new-value
-             (puthash key new-value pyim-dhashcache-ishortcode2word)
-           (remhash key pyim-dhashcache-ishortcode2word)))))
-   pyim-dhashcache-ishortcode2word)
-  (remhash word pyim-dhashcache-iword2count)
-  (remhash word pyim-dhashcache-iword2count-log)
-  (remhash word pyim-dhashcache-iword2priority))
-
-(cl-defmethod pyim-dcache-insert-word
-  (word code prepend
-        &context (pyim-dcache-backend (eql pyim-dhashcache)))
-  "将词条 WORD 插入到下面两个词库缓存中。
+(defun pyim-dhashcache-update-iword2count-recent (word n hash-table)
+  (let (words-need-remove)
+    (pyim-dhashcache-put
+      hash-table :all-words
+      (setq orig-value (remove word orig-value))
+      (push word orig-value)
+      (if (<= (length orig-value) n)
+          orig-value
+        (setq words-need-remove (nthcdr n orig-value))
+        (cl-subseq orig-value 0 n)))
+    (dolist (w words-need-remove)
+      (remhash w hash-table))
+    (pyim-dhashcache-put
+      hash-table word
+      (+ (or orig-value 0) 1))
+    hash-table))
 
-1. `pyim-dhashcache-icode2word'
-2. `pyim-dhashcache-insert-word-into-ishortcode2word'."
-  (pyim-dhashcache-insert-word-into-icode2word word code prepend)
-  ;; NOTE: 保存词条到 icode2word 词库缓存的同时,也在 ishortcode2word 词库缓存中
-  ;; 临时写入一份,供当前 Emacs session 使用,但退出时 pyim 不会保存
-  ;; ishortcode2word 词库缓存到文件,因为下次启动 Emacs 的时候,ishortcode2word
-  ;; 词库缓存会从 icode2word 再次重建。
-  (pyim-dhashcache-insert-word-into-ishortcode2word word code prepend))
+;; ** 根据 dhashcache 信息对词条进行排序
+(cl-defmethod pyim-dcache-sort-words
+  (words-list &context (pyim-dcache-backend (eql pyim-dhashcache)))
+  "对 WORDS-LIST 排序"
+  (let ((iword2count pyim-dhashcache-iword2count)
+        (iword2priority pyim-dhashcache-iword2priority))
+    (sort words-list
+          (lambda (a b)
+            (let ((p1 (gethash a iword2priority))
+                  (p2 (gethash b iword2priority)))
+              (cond
+               ((and (listp p1)
+                     (listp p2)
+                     (not (equal p1 p2)))
+                (pyim-numbers> p1 p2))
+               (t (let ((n1 (or (gethash a iword2count) 0))
+                        (n2 (or (gethash b iword2count) 0)))
+                    (> n1 n2)))))))))
 
-(defun pyim-dhashcache-insert-word-into-icode2word (word code prepend)
-  "将词条 WORD 插入到 icode2word 词库缓存 CODE 键对应的位置.
+;; ** 升级 dhashcache 相关函数
+(cl-defmethod pyim-dcache-upgrade
+  (&context (pyim-dcache-backend (eql pyim-dhashcache)))
+  "升级词库缓存.
 
-默认 WORD 放到已有词条的最后,如果 PREPEND 为 non-nil, WORD 将放
-到已有词条的最前面。"
-  (pyim-dhashcache-put
-    pyim-dhashcache-icode2word code
-    (if prepend
-        `(,word ,@(remove word orig-value))
-      `(,@(remove word orig-value) ,word))))
+当前已有的功能:
+1. 基于 :code-prefix-history 信息,升级为新的 code-prefix。"
+  (pyim-dhashcache-upgrade-icode2word))
 
-(defun pyim-dhashcache-insert-word-into-ishortcode2word (word code prepend)
-  "将词条 WORD 插入到 ishortcode2word 词库缓存 CODE 首字母字符串对应的位置.
+(defun pyim-dhashcache-upgrade-icode2word ()
+  "升级 icode2word 缓存。"
+  (let ((delete-old-key-p (yes-or-no-p "Delete old key after upgrade? "))
+        (ruler-list (delete-dups
+                     (remove nil
+                             (mapcar
+                              (lambda (scheme)
+                                (let ((code-prefix (plist-get (cdr scheme) 
:code-prefix))
+                                      (code-prefix-history (plist-get (cdr 
scheme) :code-prefix-history)))
+                                  (when code-prefix-history
+                                    (cons code-prefix-history code-prefix))))
+                              pyim-schemes)))))
+    (dolist (ruler ruler-list)
+      (let ((old-prefix-list (car ruler))
+            (new-prefix (cdr ruler)))
+        (dolist (old-prefix old-prefix-list)
+          (maphash
+           (lambda (key _value)
+             (when (string-prefix-p old-prefix key)
+               (let* ((key-words (gethash key pyim-dhashcache-icode2word))
+                      (new-key (concat new-prefix (string-remove-prefix 
old-prefix key)))
+                      (new-key-words (gethash new-key 
pyim-dhashcache-icode2word))
+                      (merged-value (delete-dups `(,@new-key-words 
,@key-words))))
+                 (puthash new-key merged-value pyim-dhashcache-icode2word)
+                 (message "PYIM icode2word upgrade: %S %S -> %S %S" key 
key-words new-key merged-value)
+                 (when delete-old-key-p
+                   (remhash key pyim-dhashcache-icode2word)
+                   (message "PYIM icode2word upgrade: %S has been deleted." 
key)))))
+           pyim-dhashcache-icode2word))))))
 
-默认 WORD 放到已有词条的最后,如果 PREPEND 为 non-nil, WORD 将放
-到已有词条的最前面。"
-  (dolist (newcode (pyim-dhashcache-get-ishortcodes code))
-    (pyim-dhashcache-put
-      pyim-dhashcache-ishortcode2word
-      newcode
-      (if prepend
-          `(,word ,@(remove word orig-value))
-        `(,@(remove word orig-value) ,word)))))
+;; ** 保存 dhashcache 相关函数
+(cl-defmethod pyim-dcache-save-caches
+  (&context (pyim-dcache-backend (eql pyim-dhashcache)))
+  (pyim-dhashcache-save-personal-dcache-to-file))
 
-(cl-defmethod pyim-dcache-search-word-code
-  (string &context (pyim-dcache-backend (eql pyim-dhashcache)))
-  (gethash string pyim-dhashcache-word2code))
+(defun pyim-dhashcache-save-personal-dcache-to-file ()
+  ;; 用户选择过的词
+  (pyim-dcache-save-variable
+   'pyim-dhashcache-icode2word
+   pyim-dhashcache-icode2word 0.8)
+  ;; 词条总 count
+  (pyim-dcache-save-variable
+   'pyim-dhashcache-iword2count
+   pyim-dhashcache-iword2count 0.8)
+  ;; 词条 count 日志
+  (pyim-dcache-save-variable
+   'pyim-dhashcache-iword2count-log
+   pyim-dhashcache-iword2count-log 0.8)
+  ;; 词条优先级
+  (pyim-dcache-save-variable
+   'pyim-dhashcache-iword2priority
+   pyim-dhashcache-iword2priority 0.8))
 
+;; ** 导出相关函数
 (cl-defmethod pyim-dcache-export-personal-words
   (file &context (pyim-dcache-backend (eql pyim-dhashcache))
         &optional confirm)
@@ -725,6 +714,29 @@ code 对应的中文词条了。
   (pyim-dcache-init-variables)
   (pyim-dhashcache-export pyim-dhashcache-icode2word file confirm))
 
+(defun pyim-dhashcache-export (dcache file &optional confirm)
+  "将一个 pyim DCACHE 导出为文件 FILE.
+
+如果 CONFIRM 为 non-nil,文件存在时将会提示用户是否覆盖,
+默认为覆盖模式"
+  (with-temp-buffer
+    (insert ";;; -*- coding: utf-8-unix -*-\n")
+    (maphash
+     (lambda (key value)
+       (let ((value (cl-remove-if
+                     (lambda (x)
+                       ;; 如果某个词条的 text 属性 :noexport 设置为 t, 在导出的
+                       ;; 时候自动忽略这个词条。
+                       (and (stringp x)
+                            (get-text-property 0 :noexport x)))
+                     (if (listp value)
+                         value
+                       (list value)))))
+         (when value
+           (insert (format "%s %s\n" key (mapconcat #'identity value " "))))))
+     dcache)
+    (pyim-dcache-write-file file confirm)))
+
 (cl-defmethod pyim-dcache-export-words-and-counts
   (file &context (pyim-dcache-backend (eql pyim-dhashcache))
         &optional confirm ignore-counts)
@@ -754,6 +766,6 @@ code 对应的中文词条了。
     (pyim-dcache-write-file file confirm)))
 
 ;; * Footer
-
 (provide 'pyim-dhashcache)
+
 ;;; pyim-dhashcache.el ends here
diff --git a/pyim-dregcache.el b/pyim-dregcache.el
index ef06687b8a..8cacf5a9e7 100644
--- a/pyim-dregcache.el
+++ b/pyim-dregcache.el
@@ -44,133 +44,77 @@
 (defvar pyim-dregcache-iword2count nil)
 (defvar pyim-dregcache-dicts-md5 nil)
 
-(cl-defmethod pyim-dcache-update
-  (&context (pyim-dcache-backend (eql pyim-dregcache)) &optional force)
-  "读取并加载所有相关词库 dcache.
-
-如果 FORCE 为真,强制加载。"
-  (pyim-dcache-init-variables)
-  (when pyim-dcache-auto-update
-    (pyim-dregcache-update-personal-words force)
-    (let* ((dict-files (pyim-dict-get-enabled-dict-files))
-           (dicts-md5 (pyim-dcache-create-files-md5 dict-files)))
-      (when pyim-debug
-        (message "pyim-dregcache-update: pyim-dicts=%s pyim-extra-dicts=%s 
dict-files=%s"
-                 pyim-dicts
-                 pyim-extra-dicts
-                 dict-files))
-      (pyim-dregcache-update-code2word dict-files dicts-md5 force))))
-
-(defun pyim-dregcache-variable-file (variable)
-  "Get VARIABLE dcache file path."
-  (concat (file-name-as-directory pyim-dcache-directory)
-          (symbol-name variable)))
-
-(defun pyim-dregcache-save-variable (variable value)
-  "Save VARIABLE with its VALUE."
-  (let* ((file (pyim-dregcache-variable-file variable))
-         (save-silently t))
-    (make-directory (file-name-directory file) t)
-    (with-temp-buffer
-      (insert value)
-      (pyim-dcache-write-file file))))
-
-(defun pyim-dregcache-load-variable (variable)
-  "载入 VARIABLE 对应的文件内容."
-  (let* ((file (pyim-dregcache-variable-file variable)))
-    (when (and file (file-exists-p file))
-      (with-temp-buffer
-        (insert-file-contents file)
-        (buffer-string)))))
-
-(defun pyim-dregcache-sort-words (words-list)
-  "对 WORDS-LIST 排序,词频大的排在前面."
-  (let ((iword2count pyim-dregcache-iword2count))
-    (sort words-list
-          (lambda (a b)
-            (let ((a (car (split-string a ":")))
-                  (b (car (split-string b ":"))))
-              (> (or (gethash a iword2count) 0)
-                 (or (gethash b iword2count) 0)))))))
-
-(defun pyim-dregcache-sort-icode2word ()
-  "对个人词库排序."
-  ;; https://github.com/redguardtoo/zhfreq
-  (with-temp-buffer
-    (dolist (l (split-string pyim-dregcache-icode2word "\n"))
-      (cond
-       ((string-match "^\\([a-z-]+ \\)\\(.*\\)" l)
-        ;; 3字以上词很少,如果只处理单字,2字词,3字词
-        ;; ((string-match "^\\([a-z]+ \\|[a-z]+-[a-z]+ \\|[a-z]+-[a-z]+-[a-z]+ 
\\)\\(.*\\)" l)
-        (let* ((pinyin (match-string 1 l))
-               (words (pyim-dregcache-sort-words (split-string (match-string 2 
l) " "))))
-          (insert (format "%s\n" (concat pinyin (string-join words " "))))))
-       ;; 其他词
-       ((string= l "")
-        ;; skip empty line
-        )
-       (t
-        (insert (format "%s\n" l)))))
-    (setq pyim-dregcache-icode2word (buffer-string))))
-
-(defun pyim-dregcache-create-cache-content (raw-content)
-  "将 RAW-CONTENT 划分成可以更高效搜索的缓冲区."
-  (let ((chars "bcdefghjklmnopqrstwxyz")
-        (i 0)
-        content-segments
-        (start (string-match "^a" raw-content))
-        chunk
-        end)
-    ;; 将字典缓存划分成多个"子搜索区域"
-    (while (< i (length chars))
-      (when (setq end (string-match (string ?^ (elt chars i))
-                                    raw-content
-                                    start))
-        (setq chunk (substring-no-properties raw-content start end))
-        (push chunk content-segments)
-        (setq start end))
-      (setq i (1+ i)))
-
-    ;; last chunk
-    (setq chunk (substring-no-properties raw-content end (length raw-content)))
-    (push chunk content-segments)
-    (list :content (nreverse content-segments))))
-
-(defun pyim-dregcache-load-dictionary-file (dict-file)
-  "READ from DICT-FILE."
-  (let* ((raw-content (with-temp-buffer
-                        (insert-file-contents dict-file)
-                        (buffer-string))))
-    (setq pyim-dregcache-cache
-          ;; use string type as key, so have to use `lax-plist-put'
-          ;; @see 
https://www.gnu.org/software/emacs/manual/html_node/elisp/Plist-Access.html#Plist-Access
-          (lax-plist-put pyim-dregcache-cache
-                         (file-truename dict-file)
-                         (pyim-dregcache-create-cache-content raw-content)))))
-
-(defun pyim-dregcache-update-code2word (dict-files dicts-md5 &optional force)
-  "读取并加载词库.
+;; ** 初始化 dregcache 相关函数
+(cl-defmethod pyim-dcache-init-variables
+  (&context (pyim-dcache-backend (eql pyim-dregcache)))
+  "初始化 cache 缓存相关变量."
+  (pyim-dcache-init-variable
+   pyim-dregcache-iword2count
+   ;; dregcache 引擎也需要词频信息,第一次使用 dregcache 引擎的时候,
+   ;; 自动导入 dhashcache 引擎的词频信息,以后两个引擎的词频信息就
+   ;; 完全分开了。
+   (pyim-dcache-get-value 'pyim-dhashcache-iword2count))
+  (unless pyim-dregcache-icode2word
+    (pyim-dregcache-update-personal-words t)))
 
-读取词库文件 DICT-FILES,生成对应的词库缓冲文件,然后加载词库缓存。
+;; ** 从 dregcache 搜索词条相关函数
+(cl-defmethod pyim-dcache-get
+  (code &context (pyim-dcache-backend (eql pyim-dregcache))
+        &optional from)
+  "从 `pyim-dregcache-cache' 搜索 CODE, 得到对应的词条."
+  (when code
+    (cond ((or (memq 'icode2word from)
+               (memq 'ishortcode2word from))
+           (pyim-dregcache-get-icode2word-ishortcode2word code))
+          ;; FIXME: pyim-dregcache 暂时不支持 iword2count-recent-10-words 和
+          ;; iword2count-recent-50-words.
+          ((or (memq 'iword2count-recent-10-words from)
+               (memq 'iword2count-recent-50-words from))
+           nil)
+          (t (let ((dict-files (pyim-dregcache-all-dict-files))
+                   result)
 
-DICT-FILES 是词库文件列表. DICTS-MD5 是词库的MD5校验码.
+               (when pyim-debug (message "pyim-dregcache-get is called. 
code=%s" code))
+               (when dict-files
+                 (dolist (file dict-files)
+                   (let* ((file-info (lax-plist-get pyim-dregcache-cache file))
+                          (content (pyim-dregcache-get-content code 
file-info)))
+                     (setq result (append (pyim-dregcache-get-1 content code) 
result)))))
+               ;; `push' plus `nreverse' is more efficient than `add-to-list'
+               ;; Many examples exist in Emacs' own code
+               (nreverse result))))))
 
-如果 FORCE 为真,强制加载。"
-  (interactive)
-  (when (or force (not (equal dicts-md5 pyim-dregcache-dicts-md5)))
-    ;; no hashtable i file mapping algorithm
-    (dolist (file dict-files)
-      (pyim-dregcache-load-dictionary-file file))
-    (setq pyim-dregcache-dicts-md5 dicts-md5)))
+(defun pyim-dregcache-get-icode2word-ishortcode2word (code)
+  "以 CODE 搜索个人词和个人联想词.  正则表达式搜索词库,不需要为联想词开单独缓存."
+  (when pyim-debug (message "pyim-dregcache-get-icode2word-ishortcode2word 
called => %s" code))
+  (when pyim-dregcache-icode2word
+    (nreverse (pyim-dregcache-get-1 pyim-dregcache-icode2word code))))
 
-(defmacro pyim-dregcache-shenmu2regexp (char)
-  "将声母 CHAR 转换为通用正则表达式匹配所有以该声母开头的汉字."
-  `(concat ,char "[a-z]*"))
+(defun pyim-dregcache-get-1 (content code)
+  (let ((case-fold-search t)
+        (start 0)
+        (pattern (pyim-dregcache-match-line code))
+        (content-length (length content))
+        word
+        output)
+    (while (and (< start content-length)
+                (setq start (string-match pattern content start)))
+      ;; 提取词
+      (setq word (match-string-no-properties 1 content))
+      (when word
+        (cond
+         ((string-match "^[^ ]+$" word)
+          ;; 单个词
+          (push word output))
+         (t
+          ;; 多个字
+          (setq output (append (nreverse (split-string word " +")) output)))))
+      ;; 继续搜索
+      (setq start (+ start 2 (length code) (length word))))
+    output))
 
-(defmacro pyim-dregcache-is-shenmu (code)
-  "判断CODE 是否是一个声母."
-  `(and (eq (length ,code) 1)
-        (not (string-match ,code "aeo"))))
+(defmacro pyim-dregcache-match-line (code)
+  `(concat "^" (pyim-dregcache-code2regexp ,code) " \\(.+\\)"))
 
 (defun pyim-dregcache-code2regexp (code)
   "将 CODE 转换成正则表达式用来搜索辞典缓存中的匹配项目.
@@ -210,8 +154,14 @@ DICT-FILES 是词库文件列表. DICTS-MD5 是词库的MD5校验码.
           ;; tian-an-men => tian-an-men[a-z-]*
           (concat s "[a-z-]*"))))))))
 
-(defmacro pyim-dregcache-match-line (code)
-  `(concat "^" (pyim-dregcache-code2regexp ,code) " \\(.+\\)"))
+(defmacro pyim-dregcache-is-shenmu (code)
+  "判断CODE 是否是一个声母."
+  `(and (eq (length ,code) 1)
+        (not (string-match ,code "aeo"))))
+
+(defmacro pyim-dregcache-shenmu2regexp (char)
+  "将声母 CHAR 转换为通用正则表达式匹配所有以该声母开头的汉字."
+  `(concat ,char "[a-z]*"))
 
 (defun pyim-dregcache-all-dict-files ()
   "所有词典文件."
@@ -239,60 +189,109 @@ DICT-FILES 是词库文件列表. DICTS-MD5 是词库的MD5校验码.
     ;; fetch segment using the first character of pinyin code
     (nth idx rlt)))
 
-(defun pyim-dregcache-get-1 (content code)
-  (let ((case-fold-search t)
-        (start 0)
-        (pattern (pyim-dregcache-match-line code))
-        (content-length (length content))
-        word
-        output)
-    (while (and (< start content-length)
-                (setq start (string-match pattern content start)))
-      ;; 提取词
-      (setq word (match-string-no-properties 1 content))
-      (when word
-        (cond
-         ((string-match "^[^ ]+$" word)
-          ;; 单个词
-          (push word output))
-         (t
-          ;; 多个字
-          (setq output (append (nreverse (split-string word " +")) output)))))
-      ;; 继续搜索
-      (setq start (+ start 2 (length code) (length word))))
-    output))
+;; ** 从 dregcache 搜索代码相关函数
+(cl-defmethod pyim-dcache-search-word-code
+  (word &context (pyim-dcache-backend (eql pyim-dregcache)))
+  "从 `pyim-dregcache-cache' 和 `pyim-dregcache-icode2word' 搜索 word, 得到对应的code."
+  (when pyim-debug (message "pyim-dregcache-search-word-code word=%s" word))
+  (when pyim-dregcache-cache
+    (catch 'result
+      (let ((dict-files (pyim-dregcache-all-dict-files))
+            code)
+        (when pyim-dregcache-icode2word
+          (setq code (pyim-dregcache-search-word-code-1 word 
pyim-dregcache-icode2word))
+          (when code (throw 'result (list code))))
+        (dolist (file dict-files)
+          (let* ((file-info (lax-plist-get pyim-dregcache-cache file))
+                 (contents (lax-plist-get file-info :content)))
+            (dolist (content contents)
+              (setq code (pyim-dregcache-search-word-code-1 word content))
+              (when code (throw 'result (list code))))))))))
 
-(cl-defmethod pyim-dcache-get
-  (code &context (pyim-dcache-backend (eql pyim-dregcache))
-        &optional from)
-  "从 `pyim-dregcache-cache' 搜索 CODE, 得到对应的词条."
-  (when code
-    (cond ((or (memq 'icode2word from)
-               (memq 'ishortcode2word from))
-           (pyim-dregcache-get-icode2word-ishortcode2word code))
-          ;; FIXME: pyim-dregcache 暂时不支持 iword2count-recent-10-words 和
-          ;; iword2count-recent-50-words.
-          ((or (memq 'iword2count-recent-10-words from)
-               (memq 'iword2count-recent-50-words from))
-           nil)
-          (t (let ((dict-files (pyim-dregcache-all-dict-files))
-                   result)
+(defun pyim-dregcache-search-word-code-1 (word content)
+  (let* ((case-fold-search t)
+         (regexp (concat "^\\([a-z-]+\\)\\(.*\\) " "\\(" word " \\|" word 
"$\\)")))
+    (when (string-match regexp content)
+      (match-string-no-properties 1 content))))
+
+;; ** 给 dregcache 添加词条相关函数
+(cl-defmethod pyim-dcache-insert-word
+  (word code prepend
+        &context (pyim-dcache-backend (eql pyim-dregcache)))
+  "将词条 WORD 插入到 `pyim-dregcache-icode2word'."
+  (pyim-dregcache-insert-word-into-icode2word word code prepend))
+
+(defun pyim-dregcache-insert-word-into-icode2word (word code prepend)
+  "保存个人词到缓存,和其他词库格式一样以共享正则搜索算法."
+  (when pyim-debug
+    (message "pyim-dregcache-insert-word-into-icode2word called => %s %s %s"
+             word
+             code
+             prepend))
+  (with-temp-buffer
+    (when pyim-dregcache-icode2word
+      (insert pyim-dregcache-icode2word))
+    (goto-char (point-min))
+    (let* ((case-fold-search t)
+           substring replace-string beg end old-word-list)
+      (if (re-search-forward (concat "^" code " \\(.*\\)") nil t)
+          (progn
+            (setq beg (match-beginning 0))
+            (setq end (match-end 0))
+            (setq substring (match-string-no-properties 1))
+            (delete-region beg end)
+            ;; 这里不进行排序,在pyim-dregcache-update-personal-words排序
+            (setq old-word-list (pyim-dregcache-sort-words (split-string 
substring " ")))
+            (setq replace-string (concat code " " (string-join (delete-dups 
`(,@old-word-list ,word)) " "))))
+        (setq replace-string (concat code " " (or replace-string word) "\n")))
+      (goto-char (or beg (point-max)))
+      (insert replace-string))
+    (setq pyim-dregcache-icode2word
+          (buffer-string))))
+
+;; ** 从 dregcache 删除词条相关函数
+(cl-defmethod pyim-dcache-delete-word
+  (word &context (pyim-dcache-backend (eql pyim-dregcache)))
+  "将中文词条 WORD 从个人词库中删除."
+  (with-temp-buffer
+    (insert pyim-dregcache-icode2word)
+    (goto-char (point-min))
+    (let* ((case-fold-search t)
+           substring beg end)
+      (while (re-search-forward (concat "^\\([a-z-]+\\) \\(.*\\)" word 
"\\(.*\\)$") nil t)
+        (setq beg (match-beginning 0))
+        (setq end (match-end 0))
+        (setq substring (concat (match-string-no-properties 1)
+                                (match-string-no-properties 2)
+                                (match-string-no-properties 3)))
+
+        ;; delete string and the newline char
+        (delete-region beg (+ 1 end))
+        (when (> (length (split-string substring " ")) 1)
+          (goto-char beg)
+          (insert substring)))
+      (setq pyim-dregcache-icode2word
+            (buffer-string))))
+  ;; 删除对应词条的词频
+  (remhash word pyim-dregcache-iword2count))
 
-               (when pyim-debug (message "pyim-dregcache-get is called. 
code=%s" code))
-               (when dict-files
-                 (dolist (file dict-files)
-                   (let* ((file-info (lax-plist-get pyim-dregcache-cache file))
-                          (content (pyim-dregcache-get-content code 
file-info)))
-                     (setq result (append (pyim-dregcache-get-1 content code) 
result)))))
-               ;; `push' plus `nreverse' is more efficient than `add-to-list'
-               ;; Many examples exist in Emacs' own code
-               (nreverse result))))))
+;; ** 更新 dhashcache 相关函数
+(cl-defmethod pyim-dcache-update
+  (&context (pyim-dcache-backend (eql pyim-dregcache)) &optional force)
+  "读取并加载所有相关词库 dcache.
 
-(defun pyim-dregcache-get-icode2word-ishortcode2word (code)
-  "以 CODE 搜索个人词和个人联想词.  正则表达式搜索词库,不需要为联想词开单独缓存."
-  (when pyim-debug (message "pyim-dregcache-get-icode2word-ishortcode2word 
called => %s" code))
-  (when pyim-dregcache-icode2word
-    (nreverse (pyim-dregcache-get-1 pyim-dregcache-icode2word code))))
+如果 FORCE 为真,强制加载。"
+  (pyim-dcache-init-variables)
+  (when pyim-dcache-auto-update
+    (pyim-dregcache-update-personal-words force)
+    (let* ((dict-files (pyim-dict-get-enabled-dict-files))
+           (dicts-md5 (pyim-dcache-create-files-md5 dict-files)))
+      (when pyim-debug
+        (message "pyim-dregcache-update: pyim-dicts=%s pyim-extra-dicts=%s 
dict-files=%s"
+                 pyim-dicts
+                 pyim-extra-dicts
+                 dict-files))
+      (pyim-dregcache-update-code2word dict-files dicts-md5 force))))
 
 (defun pyim-dregcache-update-personal-words (&optional force)
   "合并 `pyim-dregcache-icode2word' 磁盘文件. 加载排序后的结果.
@@ -331,39 +330,70 @@ DICT-FILES 是词库文件列表. DICTS-MD5 是词库的MD5校验码.
   (when (and force pyim-dregcache-icode2word)
     (pyim-dregcache-sort-icode2word)))
 
-(cl-defmethod pyim-dcache-init-variables
-  (&context (pyim-dcache-backend (eql pyim-dregcache)))
-  "初始化 cache 缓存相关变量."
-  (pyim-dcache-init-variable
-   pyim-dregcache-iword2count
-   ;; dregcache 引擎也需要词频信息,第一次使用 dregcache 引擎的时候,
-   ;; 自动导入 dhashcache 引擎的词频信息,以后两个引擎的词频信息就
-   ;; 完全分开了。
-   (pyim-dcache-get-value 'pyim-dhashcache-iword2count))
-  (unless pyim-dregcache-icode2word
-    (pyim-dregcache-update-personal-words t)))
+(defun pyim-dregcache-load-variable (variable)
+  "载入 VARIABLE 对应的文件内容."
+  (let* ((file (pyim-dregcache-variable-file variable)))
+    (when (and file (file-exists-p file))
+      (with-temp-buffer
+        (insert-file-contents file)
+        (buffer-string)))))
 
-(cl-defmethod pyim-dcache-save-caches
-  (&context (pyim-dcache-backend (eql pyim-dregcache)))
-  (pyim-dregcache-save-personal-dcache-to-file))
+(defun pyim-dregcache-variable-file (variable)
+  "Get VARIABLE dcache file path."
+  (concat (file-name-as-directory pyim-dcache-directory)
+          (symbol-name variable)))
 
-(defun pyim-dregcache-save-personal-dcache-to-file ()
-  "保存缓存内容到默认目录."
-  (when pyim-debug (message "pyim-dregcache-save-personal-dcache-to-file 
called"))
-  ;; 用户选择过的词存为标准辞典格式保存
-  (when pyim-dregcache-icode2word
-    (pyim-dregcache-save-variable
-     'pyim-dregcache-icode2word
-     pyim-dregcache-icode2word))
-  ;; 词频
-  (pyim-dcache-save-variable
-   'pyim-dregcache-iword2count
-   pyim-dregcache-iword2count))
+(defun pyim-dregcache-update-code2word (dict-files dicts-md5 &optional force)
+  "读取并加载词库.
 
-(defun pyim-dregcache-export-words-and-counts ()
-  "TODO"
-  )
+读取词库文件 DICT-FILES,生成对应的词库缓冲文件,然后加载词库缓存。
+
+DICT-FILES 是词库文件列表. DICTS-MD5 是词库的MD5校验码.
+
+如果 FORCE 为真,强制加载。"
+  (interactive)
+  (when (or force (not (equal dicts-md5 pyim-dregcache-dicts-md5)))
+    ;; no hashtable i file mapping algorithm
+    (dolist (file dict-files)
+      (pyim-dregcache-load-dictionary-file file))
+    (setq pyim-dregcache-dicts-md5 dicts-md5)))
+
+(defun pyim-dregcache-load-dictionary-file (dict-file)
+  "READ from DICT-FILE."
+  (let* ((raw-content (with-temp-buffer
+                        (insert-file-contents dict-file)
+                        (buffer-string))))
+    (setq pyim-dregcache-cache
+          ;; use string type as key, so have to use `lax-plist-put'
+          ;; @see 
https://www.gnu.org/software/emacs/manual/html_node/elisp/Plist-Access.html#Plist-Access
+          (lax-plist-put pyim-dregcache-cache
+                         (file-truename dict-file)
+                         (pyim-dregcache-create-cache-content raw-content)))))
+
+(defun pyim-dregcache-create-cache-content (raw-content)
+  "将 RAW-CONTENT 划分成可以更高效搜索的缓冲区."
+  (let ((chars "bcdefghjklmnopqrstwxyz")
+        (i 0)
+        content-segments
+        (start (string-match "^a" raw-content))
+        chunk
+        end)
+    ;; 将字典缓存划分成多个"子搜索区域"
+    (while (< i (length chars))
+      (when (setq end (string-match (string ?^ (elt chars i))
+                                    raw-content
+                                    start))
+        (setq chunk (substring-no-properties raw-content start end))
+        (push chunk content-segments)
+        (setq start end))
+      (setq i (1+ i)))
 
+    ;; last chunk
+    (setq chunk (substring-no-properties raw-content end (length raw-content)))
+    (push chunk content-segments)
+    (list :content (nreverse content-segments))))
+
+;; ** 更新 dregcache 词条计数。
 (cl-defmethod pyim-dcache-update-wordcount
   (word &context (pyim-dcache-backend (eql pyim-dregcache))
         &optional wordcount-handler)
@@ -383,65 +413,7 @@ DICT-FILES 是词库文件列表. DICTS-MD5 是词库的MD5校验码.
     (unless (equal orig-value new-value)
       (puthash word new-value pyim-dregcache-iword2count))))
 
-(cl-defmethod pyim-dcache-delete-word
-  (word &context (pyim-dcache-backend (eql pyim-dregcache)))
-  "将中文词条 WORD 从个人词库中删除."
-  (with-temp-buffer
-    (insert pyim-dregcache-icode2word)
-    (goto-char (point-min))
-    (let* ((case-fold-search t)
-           substring beg end)
-      (while (re-search-forward (concat "^\\([a-z-]+\\) \\(.*\\)" word 
"\\(.*\\)$") nil t)
-        (setq beg (match-beginning 0))
-        (setq end (match-end 0))
-        (setq substring (concat (match-string-no-properties 1)
-                                (match-string-no-properties 2)
-                                (match-string-no-properties 3)))
-
-        ;; delete string and the newline char
-        (delete-region beg (+ 1 end))
-        (when (> (length (split-string substring " ")) 1)
-          (goto-char beg)
-          (insert substring)))
-      (setq pyim-dregcache-icode2word
-            (buffer-string))))
-  ;; 删除对应词条的词频
-  (remhash word pyim-dregcache-iword2count))
-
-(cl-defmethod pyim-dcache-insert-word
-  (word code prepend
-        &context (pyim-dcache-backend (eql pyim-dregcache)))
-  "将词条 WORD 插入到 `pyim-dregcache-icode2word'."
-  (pyim-dregcache-insert-word-into-icode2word word code prepend))
-
-(defun pyim-dregcache-insert-word-into-icode2word (word code prepend)
-  "保存个人词到缓存,和其他词库格式一样以共享正则搜索算法."
-  (when pyim-debug
-    (message "pyim-dregcache-insert-word-into-icode2word called => %s %s %s"
-             word
-             code
-             prepend))
-  (with-temp-buffer
-    (when pyim-dregcache-icode2word
-      (insert pyim-dregcache-icode2word))
-    (goto-char (point-min))
-    (let* ((case-fold-search t)
-           substring replace-string beg end old-word-list)
-      (if (re-search-forward (concat "^" code " \\(.*\\)") nil t)
-          (progn
-            (setq beg (match-beginning 0))
-            (setq end (match-end 0))
-            (setq substring (match-string-no-properties 1))
-            (delete-region beg end)
-            ;; 这里不进行排序,在pyim-dregcache-update-personal-words排序
-            (setq old-word-list (pyim-dregcache-sort-words (split-string 
substring " ")))
-            (setq replace-string (concat code " " (string-join (delete-dups 
`(,@old-word-list ,word)) " "))))
-        (setq replace-string (concat code " " (or replace-string word) "\n")))
-      (goto-char (or beg (point-max)))
-      (insert replace-string))
-    (setq pyim-dregcache-icode2word
-          (buffer-string))))
-
+;; ** 升级 dhashcache 相关函数
 (cl-defmethod pyim-dcache-upgrade (&context (pyim-dcache-backend (eql 
pyim-dregcache)))
   "升级词库缓存.
 
@@ -456,30 +428,45 @@ dregcache 只支持全拼和双拼,不能用于五笔之类的型码输入法
 update-icode2word 目前只要是用于更新型码输入法的 code-prefix, 所
 以不需要具体实现细节。")
 
-(defun pyim-dregcache-search-word-code-1 (word content)
-  (let* ((case-fold-search t)
-         (regexp (concat "^\\([a-z-]+\\)\\(.*\\) " "\\(" word " \\|" word 
"$\\)")))
-    (when (string-match regexp content)
-      (match-string-no-properties 1 content))))
+;; ** 根据 dregcache 信息对词条进行排序
+(defun pyim-dregcache-sort-words (words-list)
+  "对 WORDS-LIST 排序,词频大的排在前面."
+  (let ((iword2count pyim-dregcache-iword2count))
+    (sort words-list
+          (lambda (a b)
+            (let ((a (car (split-string a ":")))
+                  (b (car (split-string b ":"))))
+              (> (or (gethash a iword2count) 0)
+                 (or (gethash b iword2count) 0)))))))
 
-(cl-defmethod pyim-dcache-search-word-code
-  (word &context (pyim-dcache-backend (eql pyim-dregcache)))
-  "从 `pyim-dregcache-cache' 和 `pyim-dregcache-icode2word' 搜索 word, 得到对应的code."
-  (when pyim-debug (message "pyim-dregcache-search-word-code word=%s" word))
-  (when pyim-dregcache-cache
-    (catch 'result
-      (let ((dict-files (pyim-dregcache-all-dict-files))
-            code)
-        (when pyim-dregcache-icode2word
-          (setq code (pyim-dregcache-search-word-code-1 word 
pyim-dregcache-icode2word))
-          (when code (throw 'result (list code))))
-        (dolist (file dict-files)
-          (let* ((file-info (lax-plist-get pyim-dregcache-cache file))
-                 (contents (lax-plist-get file-info :content)))
-            (dolist (content contents)
-              (setq code (pyim-dregcache-search-word-code-1 word content))
-              (when code (throw 'result (list code))))))))))
+;; ** 保存 dregcache 相关函数
+(cl-defmethod pyim-dcache-save-caches
+  (&context (pyim-dcache-backend (eql pyim-dregcache)))
+  (pyim-dregcache-save-personal-dcache-to-file))
+
+(defun pyim-dregcache-save-personal-dcache-to-file ()
+  "保存缓存内容到默认目录."
+  (when pyim-debug (message "pyim-dregcache-save-personal-dcache-to-file 
called"))
+  ;; 用户选择过的词存为标准辞典格式保存
+  (when pyim-dregcache-icode2word
+    (pyim-dregcache-save-variable
+     'pyim-dregcache-icode2word
+     pyim-dregcache-icode2word))
+  ;; 词频
+  (pyim-dcache-save-variable
+   'pyim-dregcache-iword2count
+   pyim-dregcache-iword2count))
+
+(defun pyim-dregcache-save-variable (variable value)
+  "Save VARIABLE with its VALUE."
+  (let* ((file (pyim-dregcache-variable-file variable))
+         (save-silently t))
+    (make-directory (file-name-directory file) t)
+    (with-temp-buffer
+      (insert value)
+      (pyim-dcache-write-file file))))
 
+;; ** 导出 dregcache 相关函数
 (cl-defmethod pyim-dcache-export-personal-words
   (file &context (pyim-dcache-backend (eql pyim-dregcache))
         &optional confirm)
@@ -498,6 +485,30 @@ update-icode2word 目前只要是用于更新型码输入法的 code-prefix, 所
       (sort-lines nil (point-min) (point-max))
       (pyim-dcache-write-file file confirm))))
 
+(defun pyim-dregcache-sort-icode2word ()
+  "对个人词库排序."
+  ;; https://github.com/redguardtoo/zhfreq
+  (with-temp-buffer
+    (dolist (l (split-string pyim-dregcache-icode2word "\n"))
+      (cond
+       ((string-match "^\\([a-z-]+ \\)\\(.*\\)" l)
+        ;; 3字以上词很少,如果只处理单字,2字词,3字词
+        ;; ((string-match "^\\([a-z]+ \\|[a-z]+-[a-z]+ \\|[a-z]+-[a-z]+-[a-z]+ 
\\)\\(.*\\)" l)
+        (let* ((pinyin (match-string 1 l))
+               (words (pyim-dregcache-sort-words (split-string (match-string 2 
l) " "))))
+          (insert (format "%s\n" (concat pinyin (string-join words " "))))))
+       ;; 其他词
+       ((string= l "")
+        ;; skip empty line
+        )
+       (t
+        (insert (format "%s\n" l)))))
+    (setq pyim-dregcache-icode2word (buffer-string))))
+
+(defun pyim-dregcache-export-words-and-counts ()
+  "TODO"
+  )
+
 ;; * Footer
 
 (provide 'pyim-dregcache)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]