emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[nongnu] elpa/flx 20e3073352 148/182: Merge pull request #78 from lewang


From: ELPA Syncer
Subject: [nongnu] elpa/flx 20e3073352 148/182: Merge pull request #78 from lewang/0.6
Date: Tue, 13 Dec 2022 03:59:38 -0500 (EST)

branch: elpa/flx
commit 20e3073352c86e6b19ce4fc0ace8c6c893bd59d9
Merge: 10db531369 8959c45907
Author: Le Wang <lewang@users.noreply.github.com>
Commit: Le Wang <lewang@users.noreply.github.com>

    Merge pull request #78 from lewang/0.6
    
    Version 0.6
---
 .travis.yml       |   4 +
 Cask              |   5 ++
 Makefile          |   2 +-
 flx.el            | 223 ++++++++++++++++++++++++++++++++++--------------------
 tests/flx-test.el |  35 +++++----
 tests/run-test.el |   7 ++
 6 files changed, 178 insertions(+), 98 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 673f50e13a..eaae514c4a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,6 +12,10 @@ before_install:
       sudo apt-get install -qq
           emacs24 emacs24-el emacs24-common-non-dfsg;
     fi
+  - curl -fsSL https://raw.githubusercontent.com/cask/cask/master/go | python
+  - pwd
+  - ~/.cask/bin/cask
+
 env:
   - EMACS=emacs24 TAGS="--tags ~@requires-e24-3"
   - EMACS=emacs-snapshot TAGS=""
diff --git a/Cask b/Cask
new file mode 100644
index 0000000000..ba211511fc
--- /dev/null
+++ b/Cask
@@ -0,0 +1,5 @@
+(source gnu)
+(source melpa)
+
+(development
+ (depends-on "async"))
diff --git a/Makefile b/Makefile
index 9c613a7d22..84a21f9b7a 100644
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,7 @@ all: $(ELCS)
 clean:
        $(RM) $(ELCS) $(TEST_ELCS)
 
-show-version: show-version
+show-version:
        echo "*** Emacs version ***"
        echo "EMACS = `which ${EMACS}`"
        ${EMACS} --version
diff --git a/flx.el b/flx.el
index ba7b8e8dfe..006f10b4aa 100644
--- a/flx.el
+++ b/flx.el
@@ -6,7 +6,7 @@
 ;; Maintainer: Le Wang
 ;; Description: fuzzy matching with good sorting
 ;; Created: Wed Apr 17 01:01:41 2013 (+0800)
-;; Version: 0.5
+;; Version: 0.6
 ;; Package-Requires: ((cl-lib "0.3"))
 ;; URL: https://github.com/lewang/flx
 
@@ -52,6 +52,16 @@
 
 (require 'cl-lib)
 
+(defgroup flx nil
+  "Fuzzy matching with good sorting"
+  :group 'convenience
+  :prefix "flx-")
+
+(defcustom flx-word-separators '(?\  ?- ?_ ?: ?. ?/ ?\\)
+  "List of characters that act as word separators in flx"
+  :type '(repeat character)
+  :group 'flx)
+
 (defface flx-highlight-face  '((t (:inherit font-lock-variable-name-face :bold 
t :underline t)))
   "Face used by flx for highlighting flx match characters."
   :group 'flx)
@@ -60,7 +70,7 @@
 (defsubst flx-word-p (char)
   "Check if CHAR is a word character."
   (and char
-       (not (memq char '(?\  ?- ?_ ?: ?. ?/ ?\\)))))
+       (not (memq char flx-word-separators))))
 
 (defsubst flx-capital-p (char)
   "Check if CHAR is an uppercase character."
@@ -69,9 +79,9 @@
        (= char (upcase char))))
 
 (defsubst flx-boundary-p (last-char char)
-  "Check is LAST-CHAR is the end of a word and CHAR the start of the next.
+  "Check if LAST-CHAR is the end of a word and CHAR the start of the next.
 
-The function is camel-case aware."
+This function is camel-case aware."
   (or (null last-char)
       (and (not (flx-capital-p last-char))
            (flx-capital-p char))
@@ -79,9 +89,8 @@ The function is camel-case aware."
            (flx-word-p char))))
 
 (defsubst flx-inc-vec (vec &optional inc beg end)
-  "increment each element of vectory by INC(default=1)
-from BEG (inclusive) to end (not inclusive).
-"
+  "Increment each element of vectory by INC(default=1)
+from BEG (inclusive) to END (not inclusive)."
   (or inc
       (setq inc 1))
   (or beg
@@ -94,8 +103,8 @@ from BEG (inclusive) to end (not inclusive).
   vec)
 
 (defun flx-get-hash-for-string (str heatmap-func)
-  "Return hash-table for string where keys are characters value
-  is a sorted list of indexes for character occurrences."
+  "Return hash-table for string where keys are characters.
+Value is a sorted list of indexes for character occurrences."
   (let* ((res (make-hash-table :test 'eq :size 32))
          (str-len (length str))
          down-char)
@@ -114,7 +123,7 @@ from BEG (inclusive) to end (not inclusive).
 
 ;; So we store one fixnum per character.  Is this too memory inefficient?
 (defun flx-get-heatmap-str (str &optional group-separator)
-  "Generate heat map vector of string.
+  "Generate the heatmap vector of string.
 
 See documentation for logic."
   (let* ((str-len (length str))
@@ -211,7 +220,7 @@ See documentation for logic."
 
 
 (defsubst flx-bigger-sublist (sorted-list val)
-  "return sublist bigger than VAL from sorted SORTED-LIST
+  "Return sublist bigger than VAL from sorted SORTED-LIST
 
   if VAL is nil, return entire list."
   (if val
@@ -220,40 +229,12 @@ See documentation for logic."
                  (cl-return sub)))
       sorted-list))
 
-(defun flx-get-matches (hash query &optional greater-than q-index)
-  "Return list of all unique indexes into str where query can match.
-
-That is all character sequences of query that occur in str are returned.
-
-HASH accept as the cached analysis of str.
-sstr
-e.g. (\"aab\" \"ab\") returns
-       '((0 2) (1 2)
-"
-
-  (setq q-index (or q-index 0))
-  (let* ((q-char (aref query q-index))
-         (indexes (flx-bigger-sublist
-                   (gethash q-char hash) greater-than)))
-    (if (< q-index (1- (length query)))
-        (apply                        ; `mapcan'
-         'nconc
-         (mapcar
-          (lambda (index)
-            (let ((next-matches-for-rest (flx-get-matches hash query  index 
(1+ q-index))))
-              (when next-matches-for-rest
-                (mapcar (lambda (match)
-                          (cons index match))
-                        next-matches-for-rest))))
-          indexes))
-      (mapcar 'list indexes))))
-
 (defun flx-make-filename-cache ()
-  "Return cache hashtable appropraite for storeing filenames."
+  "Return cache hashtable appropraite for storing filenames."
   (flx-make-string-cache 'flx-get-heatmap-file))
 
 (defun flx-make-string-cache (&optional heat-func)
-  "Return cache hashtable appropraite for storeing strings."
+  "Return cache hashtable appropraite for storing strings."
   (let ((hash (make-hash-table :test 'equal
                                :size 4096)))
     (puthash 'heatmap-func (or heat-func 'flx-get-heatmap-str) hash)
@@ -273,43 +254,122 @@ e.g. (\"aab\" \"ab\") returns
             (puthash str res cache))
           res))))
 
+(defun flx-find-best-match (str-info
+                            heatmap
+                            greater-than
+                            query
+                            query-length
+                            q-index
+                            match-cache)
+  "Recursively compute the best match for a string, passed as STR-INFO and
+HEATMAP, according to QUERY.
+
+This function uses MATCH-CACHE to memoize its return values.
+For other parameters, see `flx-score'"
+
+  ;; Here, we use a simple N'ary hashing scheme
+  ;; You could use (/ hash-key query-length) to get greater-than
+  ;; Or, (mod hash-key query-length) to get q-index
+  ;; We use this instead of a cons key for the sake of efficiency
+  (let* ((hash-key (+ q-index
+                      (* (or greater-than 0)
+                         query-length)))
+         (hash-value (gethash hash-key match-cache)))
+    (if hash-value
+        ;; Here, we use the value 'no-match to distinguish a cache miss
+        ;; from a nil (i.e. non-matching) return value
+        (if (eq hash-value 'no-match)
+            nil
+          hash-value)
+      (let ((indexes (flx-bigger-sublist
+                       (gethash (aref query q-index) str-info)
+                       greater-than))
+            (match)
+            (temp-score)
+            (best-score most-negative-fixnum))
+
+        ;; Matches are of the form:
+        ;; ((match_indexes) . (score . contiguous-count))
+        (if (>= q-index (1- query-length))
+            ;; At the tail end of the recursion, simply
+            ;; generate all possible matches with their scores
+            ;; and return the list to parent.
+            (setq match (mapcar (lambda (index)
+                                  (cons (list index)
+                                        (cons (aref heatmap index) 0)))
+                                indexes))
+          (dolist (index indexes)
+            (dolist (elem (flx-find-best-match str-info
+                                               heatmap
+                                               index
+                                               query
+                                               query-length
+                                               (1+ q-index)
+                                               match-cache))
+              (setq temp-score
+                    (if (= (1- (caar elem)) index)
+                        (+ (cadr elem)
+                           (aref heatmap index)
+
+                           ;; boost contiguous matches
+                           (* (min (cddr elem)
+                                   3)
+                              15)
+                           60)
+                      (+ (cadr elem)
+                         (aref heatmap index))))
+
+              ;; We only care about the optimal match, so only
+              ;; forward the match with the best score to parent
+              (when (> temp-score best-score)
+                (setq best-score temp-score
+                      match (list (cons (cons index (car elem))
+                                        (cons temp-score
+                                              (if (= (1- (caar elem))
+                                                     index)
+                                                  (1+ (cddr elem))
+                                                0)))))))))
+
+        ;; Calls are cached to avoid exponential time complexity
+        (puthash hash-key
+                 (if match match 'no-match)
+                 match-cache)
+        match))))
 
 (defun flx-score (str query &optional cache)
-  "return best score matching QUERY against STR"
+  "Return best score matching QUERY against STR"
   (unless (or (zerop (length query))
               (zerop (length str)))
-    (let* ((info-hash (flx-process-cache str cache))
-           (heatmap (gethash 'heatmap info-hash))
-           (matches (flx-get-matches info-hash query))
-           (query-length (length query))
-           (full-match-boost (and (< query-length 5)
-                                  (> query-length 1)))
-           (best-score nil))
-      (mapc (lambda (match-positions)
-              (let ((score (if (and
-                                full-match-boost
-                                (= (length match-positions)
-                                   (length str)))
-                               10000
-                             0))
-                    (contiguous-count 0)
-                    last-match)
-                (cl-loop for index in match-positions
-                      do (progn
-                           (if (and last-match
-                                    (= (1+ last-match) index))
-                               (cl-incf contiguous-count)
-                             (setq contiguous-count 0))
-                           (cl-incf score (aref heatmap index))
-                           (when (> contiguous-count 0)
-                             (cl-incf score (+ 45 (* 15 (min contiguous-count 
4)))))
-                           (setq last-match index)))
-                (if (or (null best-score)
-                        (> score (car best-score)))
-                    (setq best-score (cons score match-positions)))))
-            matches)
-      best-score)))
-
+    (let*
+        ((str-info (flx-process-cache str cache))
+         (heatmap (gethash 'heatmap str-info))
+         (query-length (length query))
+         (full-match-boost (and (< 1 query-length)
+                                (< query-length 5)))
+
+         ;; Dynamic Programming table for memoizing flx-find-best-match
+         (match-cache (make-hash-table :test 'eql :size 10))
+
+         (optimal-match (flx-find-best-match str-info
+                                             heatmap
+                                             nil
+                                             query
+                                             query-length
+                                             0
+                                             match-cache)))
+      ;; Postprocess candidate
+      (and optimal-match
+           (cons
+            ;; This is the computed score, adjusted to boost the scores
+            ;; of exact matches.
+            (if (and full-match-boost
+                     (=  (length (caar optimal-match))
+                         (length str)))
+                (+ (cl-cadar optimal-match) 10000)
+              (cl-cadar optimal-match))
+
+            ;; This is the list of match positions
+            (caar optimal-match))))))
 
 (defun flx-propertize (obj score &optional add-score)
   "Return propertized copy of obj according to score.
@@ -321,14 +381,13 @@ SCORE of nil means to clear the properties."
                  (substring-no-properties (car obj))
                (substring-no-properties obj))))
 
-    (unless (null score)
-      (cl-loop for char in (cdr score)
-            do (progn
-                 (when (and last-char
-                            (not (= (1+ last-char) char)))
-                   (put-text-property block-started  (1+ last-char) 'face 
'flx-highlight-face str)
-                   (setq block-started char))
-                 (setq last-char char)))
+    (when score
+      (dolist (char (cdr score))
+        (when (and last-char
+                   (not (= (1+ last-char) char)))
+          (put-text-property block-started  (1+ last-char) 'face 
'flx-highlight-face str)
+          (setq block-started char))
+        (setq last-char char))
       (put-text-property block-started  (1+ last-char) 'face 
'flx-highlight-face str)
       (when add-score
         (setq str (format "%s [%s]" str (car score)))))
diff --git a/tests/flx-test.el b/tests/flx-test.el
index 635e1beeb4..57b2a26be5 100644
--- a/tests/flx-test.el
+++ b/tests/flx-test.el
@@ -34,6 +34,7 @@
 (eval-when-compile (require 'cl))
 
 (require 'ert)
+(require 'async)
 (require 'flx)
 
 (ert-deftest flx-test-sanity ()
@@ -79,21 +80,6 @@
   (let ((vec (vector 1 2 3)))
     (should (equal (vector 2 3 4) (flx-inc-vec vec)))))
 
-(ert-deftest flx-matches-basic ()
-  (let* ((str "aggg")
-         (h (flx-get-hash-for-string str 'flx-get-heatmap-str))
-         (res (flx-get-matches h "g")))
-    (should (equal res '((1) (2) (3))))))
-
-
-(ert-deftest flx-matches-more ()
-  (let* ((str "ab-gh-ab")
-         (h (flx-get-hash-for-string str 'flx-get-heatmap-str))
-         (res (flx-get-matches h "ab")))
-    (should (equal res '((0 1)
-                         (0 7)
-                         (6 7))))))
-
 (ert-deftest flx-get-heatmap-vector-basic ()
   "see worksheet for derivation"
   (let ((res (flx-get-heatmap-file "__abcab")))
@@ -214,6 +200,7 @@ In this case, the match with more contiguous characters is 
better."
 ;;; makes, we've gone the opposite way.  :)
 ;;;
 ;;; We strongly prefer basename matches, where as they do not.
+
 (ert-deftest flx-imported-prioritizes-matches-after-/ ()
   (let ((query "b"))
     (let ((higher (flx-score "foo/bar" query (flx-make-filename-cache)))
@@ -363,6 +350,24 @@ substring can overpower abbreviation."
     (should (not upper-no-folds))))
 
 
+;;; perf
+
+(ert-deftest flx-prune-search-space-optimizations ()
+  "Make sure optimizations that prune bad paths early are working."
+  (let ((future (async-start
+                 `(lambda ()
+                    ,(async-inject-variables "\\`load-path\\'")
+                    (require 'flx)
+                    (flx-score 
"~/foo/bar/blah.elllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll"
 "lllllllllllllllllllllllllllllllll" (flx-make-filename-cache)))
+                 nil))
+        result)
+    (with-timeout (1 (kill-process future) )
+      (while (not result) ;; while process is running
+        (sit-for .2)
+        (when (async-ready future)
+          (setq result (async-get future)))))
+    (should result)))
+
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;; flx-test.el ends here
diff --git a/tests/run-test.el b/tests/run-test.el
index 6e0bd119a8..c9842f7348 100644
--- a/tests/run-test.el
+++ b/tests/run-test.el
@@ -23,6 +23,13 @@
             flx-root-dir))
 
 
+;; Cask
+(setq package-user-dir
+      (expand-file-name (format ".cask/%s/elpa" emacs-version) flx-root-dir))
+(package-initialize)
+
+
+
 ;; Use ERT from github when this Emacs does not have it
 (unless (locate-library "ert")
   (add-to-list



reply via email to

[Prev in Thread] Current Thread [Next in Thread]