[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/elisa ef06534f46 62/98: Implement incremental parsing
From: |
ELPA Syncer |
Subject: |
[elpa] externals/elisa ef06534f46 62/98: Implement incremental parsing |
Date: |
Wed, 17 Jul 2024 18:58:04 -0400 (EDT) |
branch: externals/elisa
commit ef06534f461106a7e00fc308cd1c78f21ed29aa7
Author: Sergey Kostyaev <kostyaev.sergey2@wb.ru>
Commit: Sergey Kostyaev <kostyaev.sergey2@wb.ru>
Implement incremental parsing
---
elisa.el | 88 ++++++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 55 insertions(+), 33 deletions(-)
diff --git a/elisa.el b/elisa.el
index 3b713923ea..d70085a728 100644
--- a/elisa.el
+++ b/elisa.el
@@ -679,27 +679,16 @@ When FORCE parse even if already parsed."
(not (string-equal hash prev-hash)))
(with-current-buffer buf
(let ((chunks (elisa-split-semantically))
- (row-ids (flatten-tree (sqlite-select
- elisa-db
- (format "select rowid from data where
path = '%s';"
- (elisa-sqlite-escape path)))))
+ (old-row-ids
+ (flatten-tree (sqlite-select
+ elisa-db
+ (format "select rowid from data where path =
'%s';"
+ (elisa-sqlite-escape path)))))
+ (row-ids nil)
(kind-id (caar (sqlite-select
elisa-db
"select rowid from kinds where name = 'file';"))))
;; remove old data
- (when row-ids
- (sqlite-execute
- elisa-db
- (format "delete from data_fts where rowid in %s;"
- (elisa-sqlite-format-int-list row-ids)))
- (sqlite-execute
- elisa-db
- (format "delete from data_embeddings where rowid in %s;"
- (elisa-sqlite-format-int-list row-ids)))
- (sqlite-execute
- elisa-db
- (format "delete from data where rowid in %s;"
- (elisa-sqlite-format-int-list row-ids))))
(when prev-hash
(sqlite-execute
elisa-db
@@ -709,14 +698,15 @@ When FORCE parse even if already parsed."
(mapc
(lambda (text)
(let* ((hash (secure-hash 'sha256 text))
- (embedding (llm-embedding elisa-embeddings-provider text))
(rowid
(if-let ((rowid (caar (sqlite-select
elisa-db
(format "select rowid from data
where kind_id = %s and collection_id = %s and path = '%s' and hash = '%s';"
kind-id collection-id
(elisa-sqlite-escape path)
hash)))))
- nil
+ (progn
+ (push rowid row-ids)
+ nil)
(sqlite-execute
elisa-db
(format
@@ -732,12 +722,20 @@ When FORCE parse even if already parsed."
(sqlite-execute
elisa-db
(format "insert into data_embeddings(rowid, embedding) values
(%s, %s);"
- rowid (elisa-vector-to-sqlite embedding)))
+ rowid (elisa-vector-to-sqlite
+ (llm-embedding elisa-embeddings-provider
text))))
(sqlite-execute
elisa-db
(format "insert into data_fts(rowid, data) values (%s, '%s');"
- rowid (elisa-sqlite-escape text))))))
+ rowid (elisa-sqlite-escape text)))
+ (push rowid row-ids))))
chunks)
+ ;; remove old data
+ (when row-ids
+ (let ((delete-rows (cl-remove-if (lambda (id)
+ (cl-find id row-ids))
+ old-row-ids)))
+ (elisa--delete-data delete-rows)))
;; save hash to files table
(sqlite-execute
elisa-db
@@ -747,25 +745,49 @@ When FORCE parse even if already parsed."
(when (not opened)
(kill-buffer buf))))
+(defun elisa--delete-data (ids)
+ "Delete data with IDS."
+ (sqlite-execute
+ elisa-db
+ (format "delete from data_fts where rowid in %s;"
+ (elisa-sqlite-format-int-list ids)))
+ (sqlite-execute
+ elisa-db
+ (format "delete from data_embeddings where rowid in %s;"
+ (elisa-sqlite-format-int-list ids)))
+ (sqlite-execute
+ elisa-db
+ (format "delete from data where rowid in %s;"
+ (elisa-sqlite-format-int-list ids))))
+
(defun elisa-parse-directory (dir)
"Parse DIR as new collection syncronously."
(interactive "DSelect directory: ")
(setq dir (expand-file-name dir))
- (let ((collection-id (progn
- (sqlite-execute
- elisa-db
- (format
- "insert into collections (name) values ('%s') on
conflict do nothing;"
- (elisa-sqlite-escape dir)))
- (caar (sqlite-select
- elisa-db
- (format
- "select rowid from collections where name =
'%s';"
- (elisa-sqlite-escape dir)))))))
+ (let* ((collection-id (progn
+ (sqlite-execute
+ elisa-db
+ (format
+ "insert into collections (name) values ('%s') on
conflict do nothing;"
+ (elisa-sqlite-escape dir)))
+ (caar (sqlite-select
+ elisa-db
+ (format
+ "select rowid from collections where name =
'%s';"
+ (elisa-sqlite-escape dir))))))
+ (files (elisa--file-list dir))
+ (delete-ids (flatten-tree
+ (sqlite-select
+ elisa-db
+ (format
+ "select rowid from data where collection_id = %d and
path not in %s;"
+ collection-id
+ (elisa-sqlite-format-string-list files))))))
+ (elisa--delete-data delete-ids)
(mapc (lambda (file)
(message "parsing %s" file)
(elisa-parse-file collection-id file))
- (elisa--file-list dir))))
+ files)))
(defun elisa-search-duckduckgo (prompt)
"Search duckduckgo for PROMPT and return list of urls."
- [elpa] externals/elisa 0f94c23a5d 40/98: Add more sqlite tables, (continued)
- [elpa] externals/elisa 0f94c23a5d 40/98: Add more sqlite tables, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa e877f8f5c7 44/98: First implementation for web search, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 5ca66e9f0d 47/98: Fix custom variables, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 273a1d492d 50/98: Add reranker to RAG pipeline, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa ade7ac0af9 52/98: Update info manual parsing, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 8a2c92dc34 54/98: Fix parsing info manuals, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa cecc5cb13f 55/98: Make sync parsing interactive, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 4cad3085fd 57/98: Use more async calls to prevent emacs from blocking, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa ad130b564f 60/98: Add parse file function, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa b419fb2cf2 61/98: Add code for parsing directory as an elisa collection, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa ef06534f46 62/98: Implement incremental parsing,
ELPA Syncer <=
- [elpa] externals/elisa 0e32d7bb5c 63/98: Add async directory parsing, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa f744ce305a 67/98: Add reparse current collection command, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 9ad7827337 70/98: Fix semantic split with single chunk, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 439ed1d4f8 76/98: Make executable customization simpler, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa e5691f59c5 80/98: Make syncronous functions non-interactive, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa e92628390b 82/98: Update example configuration, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 39915439a4 84/98: Update installation instructions, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa fbfe3b4ae1 86/98: Merge pull request #12 from s-kostyaev/semantic-split, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 3882b9b322 87/98: Bump version, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 04d580f072 92/98: add vector- and vss-path to injected variables on async, ELPA Syncer, 2024/07/17