[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/elisa 3874a7007c 36/98: Improve semantic split api
From: |
ELPA Syncer |
Subject: |
[elpa] externals/elisa 3874a7007c 36/98: Improve semantic split api |
Date: |
Wed, 17 Jul 2024 18:58:02 -0400 (EDT) |
branch: externals/elisa
commit 3874a7007c11dde8de0f136ae2587cf87811cb3f
Author: Sergey Kostyaev <kostyaev.sergey2@wb.ru>
Commit: Sergey Kostyaev <kostyaev.sergey2@wb.ru>
Improve semantic split api
---
elisa.el | 30 ++++++++++++++++++++++--------
1 file changed, 22 insertions(+), 8 deletions(-)
diff --git a/elisa.el b/elisa.el
index a222b61745..4f443d5318 100644
--- a/elisa.el
+++ b/elisa.el
@@ -97,6 +97,11 @@
:group 'tools
:type 'float)
+(defcustom elisa-semantic-split-function 'elisa-split-by-paragraph
+ "Function for semantic text split."
+ :group 'tools
+ :type 'function)
+
(defcustom elisa-prompt-rewriting-enabled t
"Enable prompt rewriting for better retrieving."
:group 'tools
@@ -264,11 +269,11 @@ concise. Act like user. Prompt:
(setq pt (point)))
(nreverse (cl-remove-if #'string-empty-p result)))))
-(defun elisa--split-by-sentence ()
+(defun elisa-split-by-sentence ()
"Split byffer to list of sentences."
(elisa--split-by #'forward-sentence))
-(defun elisa--split-by-paragraph ()
+(defun elisa-split-by-paragraph ()
"Split buffer to list of paragraphs."
(elisa--split-by #'forward-paragraph))
@@ -308,16 +313,24 @@ closer it is to 1, the more similar it is."
(setq tail (cdr tail)))
(nreverse result)))
-(defun elisa-split-semantically ()
- "Split buffer data semantically."
- (let* ((paragraphs (elisa--split-by-paragraph))
+(defun elisa-split-semantically (&rest args)
+ "Split buffer data semantically.
+ARGS contains keys for fine control.
+
+:function FUNC -- FUNC is a function for split buffer into chunks.
+
+:threshold T -- T is a floating point number. If similarity of two chunks more
+than T, it will be packed into single semantic chunk."
+ (let* ((func (or (plist-get args :function) elisa-semantic-split-function))
+ (threshold (or (plist-get args :threshold)
elisa-semantic-split-threshold))
+ (chunks (funcall func))
(embeddings (mapcar (lambda (s)
(llm-embedding elisa-embeddings-provider s))
- paragraphs))
+ chunks))
(similarities (elisa--similarities embeddings))
(result nil)
- (current (car paragraphs))
- (tail (cdr paragraphs)))
+ (current (car chunks))
+ (tail (cdr chunks)))
(mapc
(lambda (el)
(if (> el elisa-semantic-split-threshold)
@@ -408,6 +421,7 @@ closer it is to 1, the more similar it is."
,(async-inject-variables "elisa-find-executable")
,(async-inject-variables "elisa-tar-executable")
,(async-inject-variables "elisa-semantic-split-threshold")
+ ,(async-inject-variables "elisa-semantic-split-function")
,(async-inject-variables "load-path")
(require 'elisa)
(,func))
- [elpa] externals/elisa 7d8da4cd04 79/98: Fix checkdoc warning, (continued)
- [elpa] externals/elisa 7d8da4cd04 79/98: Fix checkdoc warning, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 539b2ab04c 03/98: Add readme skeleton, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa fc5f9cae2a 45/98: Fix semantic split, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 2d8c6ef402 56/98: Use WAL for sqlite for handle concurrency, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 27fc248901 71/98: Remove files metadata on collection remove, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 5bf5d63fc1 07/98: Improve defaults, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 61dd36890a 21/98: Enable load file check in CI, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 1f5393d37e 22/98: Warn user instead of network call during package loading, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 80401a0b52 30/98: Search for gzipped builtin manuals, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa c59d491e18 35/98: Add prompt rewriting with ellama-chain, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 3874a7007c 36/98: Improve semantic split api,
ELPA Syncer <=
- [elpa] externals/elisa f2bf34b201 39/98: Improve sqlite escape function, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 45b854ba2d 46/98: Fix linter warning, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa d58e172912 48/98: Make web search async, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa f312d189f2 51/98: Fix one word lines in webpage quotes parsed asyncronously, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 0fc73b4b9a 69/98: Add instruction to elisa-chat, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 4710f87851 72/98: Refactor parsing info manuals, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 86f4ea0afb 74/98: Fix parsing info manuals with infinite loop, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 9c7004e15c 77/98: Add functionality to parse web pages and add them to collections, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 4063c45908 78/98: Add custom variables documentation, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 0c432dd1c8 81/98: Add commands to documentation, ELPA Syncer, 2024/07/17