emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[nongnu] elpa/flx f6cd01d759 146/182: Merge branch 'perf/dynamic-program


From: ELPA Syncer
Subject: [nongnu] elpa/flx f6cd01d759 146/182: Merge branch 'perf/dynamic-programming' into 0.6
Date: Tue, 13 Dec 2022 03:59:38 -0500 (EST)

branch: elpa/flx
commit f6cd01d759d4955d2515c984e4577ad63b9c7521
Merge: b340205929 db1a328e41
Author: PythonNut <PythonNut@users.noreply.github.com>
Commit: PythonNut <PythonNut@users.noreply.github.com>

    Merge branch 'perf/dynamic-programming' into 0.6
---
 .travis.yml       |   4 ++
 Cask              |   5 ++
 Makefile          |   2 +-
 flx.el            | 171 +++++++++++++++++++++++++++++++++++-------------------
 tests/flx-test.el |  35 ++++++-----
 tests/run-test.el |   7 +++
 6 files changed, 148 insertions(+), 76 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 673f50e13a..eaae514c4a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,6 +12,10 @@ before_install:
       sudo apt-get install -qq
           emacs24 emacs24-el emacs24-common-non-dfsg;
     fi
+  - curl -fsSL https://raw.githubusercontent.com/cask/cask/master/go | python
+  - pwd
+  - ~/.cask/bin/cask
+
 env:
   - EMACS=emacs24 TAGS="--tags ~@requires-e24-3"
   - EMACS=emacs-snapshot TAGS=""
diff --git a/Cask b/Cask
new file mode 100644
index 0000000000..ba211511fc
--- /dev/null
+++ b/Cask
@@ -0,0 +1,5 @@
+(source gnu)
+(source melpa)
+
+(development
+ (depends-on "async"))
diff --git a/Makefile b/Makefile
index 9c613a7d22..84a21f9b7a 100644
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,7 @@ all: $(ELCS)
 clean:
        $(RM) $(ELCS) $(TEST_ELCS)
 
-show-version: show-version
+show-version:
        echo "*** Emacs version ***"
        echo "EMACS = `which ${EMACS}`"
        ${EMACS} --version
diff --git a/flx.el b/flx.el
index 97985553ec..f038d8ae24 100644
--- a/flx.el
+++ b/flx.el
@@ -229,34 +229,6 @@ See documentation for logic."
                  (cl-return sub)))
       sorted-list))
 
-(defun flx-get-matches (hash query &optional greater-than q-index)
-  "Return list of all unique indexes into str where query can match.
-
-That is all character sequences of query that occur in str are returned.
-
-HASH accept as the cached analysis of str.
-sstr
-e.g. (\"aab\" \"ab\") returns
-       '((0 2) (1 2)
-"
-
-  (setq q-index (or q-index 0))
-  (let* ((q-char (aref query q-index))
-         (indexes (flx-bigger-sublist
-                   (gethash q-char hash) greater-than)))
-    (if (< q-index (1- (length query)))
-        (apply                        ; `mapcan'
-         'nconc
-         (mapcar
-          (lambda (index)
-            (let ((next-matches-for-rest (flx-get-matches hash query  index 
(1+ q-index))))
-              (when next-matches-for-rest
-                (mapcar (lambda (match)
-                          (cons index match))
-                        next-matches-for-rest))))
-          indexes))
-      (mapcar 'list indexes))))
-
 (defun flx-make-filename-cache ()
   "Return cache hashtable appropraite for storing filenames."
   (flx-make-string-cache 'flx-get-heatmap-file))
@@ -282,43 +254,122 @@ e.g. (\"aab\" \"ab\") returns
             (puthash str res cache))
           res))))
 
+(defun flx-find-best-match (str-info
+                            heatmap
+                            greater-than
+                            query
+                            query-length
+                            q-index
+                            match-cache)
+  "Recursively compute the best match for a string, passed as STR-INFO and
+HEATMAP, according to QUERY.
+
+This function uses MATCH-CACHE to memoize its return values.
+For other parameters, see `flx-score'"
+
+  ;; Here, we use a simple N'ary hashing scheme
+  ;; You could use (/ hash-key query-length) to get greater-than
+  ;; Or, (mod hash-key query-length) to get q-index
+  ;; We use this instead of a cons key for the sake of efficiency
+  (let* ((hash-key (+ q-index
+                      (* (or greater-than 0)
+                         query-length)))
+         (hash-value (gethash hash-key match-cache)))
+    (if hash-value
+        ;; Here, we use the value 'no-match to distinguish a cache miss
+        ;; from a nil (i.e. non-matching) return value
+        (if (eq hash-value 'no-match)
+            nil
+          hash-value)
+      (let ((indexes (flx-bigger-sublist
+                       (gethash (aref query q-index) str-info)
+                       greater-than))
+            (match)
+            (temp-score)
+            (best-score most-negative-fixnum))
+
+        ;; Matches are of the form:
+        ;; ((match_indexes) . (score . contiguous-count))
+        (if (>= q-index (1- query-length))
+            ;; At the tail end of the recursion, simply
+            ;; generate all possible matches with their scores
+            ;; and return the list to parent.
+            (setq match (mapcar (lambda (index)
+                                  (cons (list index)
+                                        (cons (aref heatmap index) 0)))
+                                indexes))
+          (dolist (index indexes)
+            (dolist (elem (flx-find-best-match str-info
+                                               heatmap
+                                               index
+                                               query
+                                               query-length
+                                               (1+ q-index)
+                                               match-cache))
+              (setq temp-score
+                    (if (= (1- (caar elem)) index)
+                        (+ (cadr elem)
+                           (aref heatmap index)
+
+                           ;; boost contiguous matches
+                           (* (min (cddr elem)
+                                   3)
+                              15)
+                           60)
+                      (+ (cadr elem)
+                         (aref heatmap index))))
+
+              ;; We only care about the optimal match, so only
+              ;; forward the match with the best score to parent
+              (when (> temp-score best-score)
+                (setq best-score temp-score
+                      match (list (cons (cons index (car elem))
+                                        (cons temp-score
+                                              (if (= (1- (caar elem))
+                                                     index)
+                                                  (1+ (cddr elem))
+                                                0)))))))))
+
+        ;; Calls are cached to avoid exponential time complexity
+        (puthash hash-key
+                 (if match match 'no-match)
+                 match-cache)
+        match))))
 
 (defun flx-score (str query &optional cache)
   "Return best score matching QUERY against STR"
   (unless (or (zerop (length query))
               (zerop (length str)))
-    (let* ((info-hash (flx-process-cache str cache))
-           (heatmap (gethash 'heatmap info-hash))
-           (matches (flx-get-matches info-hash query))
-           (query-length (length query))
-           (full-match-boost (and (< query-length 5)
-                                  (> query-length 1)))
-           (best-score nil))
-      (mapc (lambda (match-positions)
-              (let ((score (if (and
-                                full-match-boost
-                                (= (length match-positions)
-                                   (length str)))
-                               10000
-                             0))
-                    (contiguous-count 0)
-                    last-match)
-                (cl-loop for index in match-positions
-                      do (progn
-                           (if (and last-match
-                                    (= (1+ last-match) index))
-                               (cl-incf contiguous-count)
-                             (setq contiguous-count 0))
-                           (cl-incf score (aref heatmap index))
-                           (when (> contiguous-count 0)
-                             (cl-incf score (+ 45 (* 15 (min contiguous-count 
4)))))
-                           (setq last-match index)))
-                (if (or (null best-score)
-                        (> score (car best-score)))
-                    (setq best-score (cons score match-positions)))))
-            matches)
-      best-score)))
-
+    (let*
+        ((str-info (flx-process-cache str cache))
+         (heatmap (gethash 'heatmap str-info))
+         (query-length (length query))
+         (full-match-boost (and (< 1 query-length)
+                                (< query-length 5)))
+
+         ;; Dynamic Programming table for memoizing flx-find-best-match
+         (match-cache (make-hash-table :test 'eql :size 10))
+
+         (optimal-match (flx-find-best-match str-info
+                                             heatmap
+                                             nil
+                                             query
+                                             query-length
+                                             0
+                                             match-cache)))
+      ;; Postprocess candidate
+      (and optimal-match
+           (cons
+            ;; This is the computed score, adjusted to boost the scores
+            ;; of exact matches.
+            (if (and full-match-boost
+                     (=  (length (caar optimal-match))
+                         (length str)))
+                (+ (cl-cadar optimal-match) 10000)
+              (cl-cadar optimal-match))
+
+            ;; This is the list of match positions
+            (caar optimal-match))))))
 
 (defun flx-propertize (obj score &optional add-score)
   "Return propertized copy of obj according to score.
diff --git a/tests/flx-test.el b/tests/flx-test.el
index 635e1beeb4..57b2a26be5 100644
--- a/tests/flx-test.el
+++ b/tests/flx-test.el
@@ -34,6 +34,7 @@
 (eval-when-compile (require 'cl))
 
 (require 'ert)
+(require 'async)
 (require 'flx)
 
 (ert-deftest flx-test-sanity ()
@@ -79,21 +80,6 @@
   (let ((vec (vector 1 2 3)))
     (should (equal (vector 2 3 4) (flx-inc-vec vec)))))
 
-(ert-deftest flx-matches-basic ()
-  (let* ((str "aggg")
-         (h (flx-get-hash-for-string str 'flx-get-heatmap-str))
-         (res (flx-get-matches h "g")))
-    (should (equal res '((1) (2) (3))))))
-
-
-(ert-deftest flx-matches-more ()
-  (let* ((str "ab-gh-ab")
-         (h (flx-get-hash-for-string str 'flx-get-heatmap-str))
-         (res (flx-get-matches h "ab")))
-    (should (equal res '((0 1)
-                         (0 7)
-                         (6 7))))))
-
 (ert-deftest flx-get-heatmap-vector-basic ()
   "see worksheet for derivation"
   (let ((res (flx-get-heatmap-file "__abcab")))
@@ -214,6 +200,7 @@ In this case, the match with more contiguous characters is 
better."
 ;;; makes, we've gone the opposite way.  :)
 ;;;
 ;;; We strongly prefer basename matches, where as they do not.
+
 (ert-deftest flx-imported-prioritizes-matches-after-/ ()
   (let ((query "b"))
     (let ((higher (flx-score "foo/bar" query (flx-make-filename-cache)))
@@ -363,6 +350,24 @@ substring can overpower abbreviation."
     (should (not upper-no-folds))))
 
 
+;;; perf
+
+(ert-deftest flx-prune-search-space-optimizations ()
+  "Make sure optimizations that prune bad paths early are working."
+  (let ((future (async-start
+                 `(lambda ()
+                    ,(async-inject-variables "\\`load-path\\'")
+                    (require 'flx)
+                    (flx-score 
"~/foo/bar/blah.elllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll"
 "lllllllllllllllllllllllllllllllll" (flx-make-filename-cache)))
+                 nil))
+        result)
+    (with-timeout (1 (kill-process future) )
+      (while (not result) ;; while process is running
+        (sit-for .2)
+        (when (async-ready future)
+          (setq result (async-get future)))))
+    (should result)))
+
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;; flx-test.el ends here
diff --git a/tests/run-test.el b/tests/run-test.el
index 6e0bd119a8..c9842f7348 100644
--- a/tests/run-test.el
+++ b/tests/run-test.el
@@ -23,6 +23,13 @@
             flx-root-dir))
 
 
+;; Cask
+(setq package-user-dir
+      (expand-file-name (format ".cask/%s/elpa" emacs-version) flx-root-dir))
+(package-initialize)
+
+
+
 ;; Use ERT from github when this Emacs does not have it
 (unless (locate-library "ert")
   (add-to-list



reply via email to

[Prev in Thread] Current Thread [Next in Thread]