emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/elisa 24b0ba8f2e 6/7: Merge pull request #28 from s-kos


From: ELPA Syncer
Subject: [elpa] externals/elisa 24b0ba8f2e 6/7: Merge pull request #28 from s-kostyaev/add-batch-embeddings-calculation
Date: Sat, 23 Nov 2024 12:57:57 -0500 (EST)

branch: externals/elisa
commit 24b0ba8f2ef8d7641d0dc65b4daf3fbe933be29c
Merge: 4a4d1db359 860936af49
Author: Sergey Kostyaev <s-kostyaev@users.noreply.github.com>
Commit: GitHub <noreply@github.com>

    Merge pull request #28 from s-kostyaev/add-batch-embeddings-calculation
    
    Speed up semantic splitting with batch embeddings
---
 README.org |  4 ++++
 elisa.el   | 37 +++++++++++++++++++++++++++++--------
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/README.org b/README.org
index b1176d571d..acf155f1e0 100644
--- a/README.org
+++ b/README.org
@@ -305,6 +305,10 @@ Example configuration.
     * Type: String
     * Description: Template used for rewriting prompts for better retrieval.
 
++ ~elisa-batch-embeddings-enabled~:
+    * Type: Boolean
+    * Description: Enable batch embeddings if supported.
+
 **** Web Search and Integration
 
 + ~elisa-searxng-url~:
diff --git a/elisa.el b/elisa.el
index 5a1c4ab725..70932a9c9a 100644
--- a/elisa.el
+++ b/elisa.el
@@ -5,7 +5,7 @@
 ;; Author: Sergey Kostyaev <sskostyaev@gmail.com>
 ;; URL: http://github.com/s-kostyaev/elisa
 ;; Keywords: help local tools
-;; Package-Requires: ((emacs "29.2") (ellama "0.11.2") (llm "0.9.1") (async 
"1.9.8") (plz "0.9"))
+;; Package-Requires: ((emacs "29.2") (ellama "0.11.2") (llm "0.18.1") (async 
"1.9.8") (plz "0.9"))
 ;; Version: 1.1.1
 ;; SPDX-License-Identifier: GPL-3.0-or-later
 ;; Created: 18th Feb 2024
@@ -275,6 +275,10 @@ If set, all quotes with similarity less than threshold 
will be filtered out."
   "Supported complex document file extensions."
   :type '(repeat string))
 
+(defcustom elisa-batch-embeddings-enabled nil
+  "Enable batch embeddings if supported."
+  :type 'boolean)
+
 (defun elisa-supported-complex-document-p (path)
   "Check if PATH contain supported complex document."
   (cl-find (file-name-extension path)
@@ -455,6 +459,23 @@ FOREIGN KEY(collection_id) REFERENCES collections(rowid)
   "Calculate breakpoint threshold for DISTANCES based on K standard 
deviations."
   (+ (elisa-avg distances) (* k (elisa-std-dev distances))))
 
+(defun elisa-string-empty-p (s)
+  "Check if string S contain only spacing."
+  (length= (string-trim s) 0))
+
+(defun elisa-filter-strings (chunks)
+  "Filter out empty CHUNKS."
+  (cl-remove-if #'elisa-string-empty-p chunks))
+
+(defun elisa-embeddings (chunks)
+  "Calculate embeddings for CHUNKS.
+Return list of vectors."
+  (let ((provider elisa-embeddings-provider))
+    (if (and elisa-batch-embeddings-enabled
+            (member 'embeddings-batch (llm-capabilities provider)))
+       (llm-batch-embeddings provider chunks)
+      (mapcar (lambda (chunk) (llm-embedding provider chunk)) chunks))))
+
 (defun elisa-parse-info-manual (name collection-name)
   "Parse info manual with NAME and save index to COLLECTION-NAME."
   (with-temp-buffer
@@ -673,13 +694,8 @@ ARGS contains keys for fine control.
 than T, it will be packed into single semantic chunk."
   (if-let* ((func (or (plist-get args :function) 
elisa-semantic-split-function))
            (k (or (plist-get args :threshold-amount) 
elisa-breakpoint-threshold-amount))
-           (chunks (funcall func))
-           (embeddings (cl-remove-if
-                        #'not
-                        (mapcar (lambda (s)
-                                  (when (length> (string-trim s) 0)
-                                    (llm-embedding elisa-embeddings-provider 
s)))
-                                chunks)))
+           (chunks (elisa-filter-strings (funcall func)))
+           (embeddings (elisa-embeddings chunks))
            (distances (elisa--distances embeddings))
            (threshold (elisa-calculate-threshold k distances))
            (current (car chunks))
@@ -867,8 +883,11 @@ When FORCE parse even if already parsed."
                     (elisa-parse-directory
                      (expand-file-name dir)))))
 
+(defvar eww-accept-content-types)
+
 (defun elisa-search-duckduckgo (prompt)
   "Search duckduckgo for PROMPT and return list of urls."
+  (require 'eww)
   (let* ((url (format "https://duckduckgo.com/html/?q=%s"; (url-hexify-string 
prompt)))
         (buffer-name (plz 'get url :as 'buffer
                        :headers `(("Accept" . ,eww-accept-content-types)
@@ -962,6 +981,7 @@ You can customize `elisa-searxng-url' to use non local 
instance."
 
 (defun elisa-get-webpage-buffer (url)
   "Get buffer with URL content."
+  (require 'eww)
   (let ((buffer-name (ignore-errors
                       (plz 'get url :as 'buffer
                         :headers `(("Accept" . ,eww-accept-content-types)
@@ -1244,6 +1264,7 @@ Call ON-DONE callback with result as an argument after 
FUNC evaluation done."
                    ,(async-inject-variables "elisa-find-executable")
                    ,(async-inject-variables "elisa-tar-executable")
                    ,(async-inject-variables "elisa-prompt-rewriting-enabled")
+                   ,(async-inject-variables "elisa-batch-embeddings-enabled")
                    ,(async-inject-variables "elisa-rewrite-prompt-template")
                    ,(async-inject-variables "elisa-semantic-split-function")
                    ,(async-inject-variables 
"elisa-webpage-extraction-function")



reply via email to

[Prev in Thread] Current Thread [Next in Thread]