[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/elisa ade7ac0af9 52/98: Update info manual parsing
From: |
ELPA Syncer |
Subject: |
[elpa] externals/elisa ade7ac0af9 52/98: Update info manual parsing |
Date: |
Wed, 17 Jul 2024 18:58:04 -0400 (EDT) |
branch: externals/elisa
commit ade7ac0af9d63e74d04b15c7c43e60d711e49fd5
Author: Sergey Kostyaev <kostyaev.sergey2@wb.ru>
Commit: Sergey Kostyaev <kostyaev.sergey2@wb.ru>
Update info manual parsing
---
elisa.el | 87 ++++++++++++++++++++++++++++++++++++++++++----------------------
1 file changed, 58 insertions(+), 29 deletions(-)
diff --git a/elisa.el b/elisa.el
index e14b62be60..d78fa71184 100644
--- a/elisa.el
+++ b/elisa.el
@@ -300,7 +300,8 @@ FOREIGN KEY(collection_id) REFERENCES collections(rowid)
(thread-last
s
(string-replace "'" "''")
- (string-replace "\\" "\\\\")))
+ (string-replace "\\" "\\\\")
+ (string-replace "\0" "\n")))
(defun elisa-sqlite-format-int-list (ids)
"Convert list of integer IDS list to sqlite list representation."
@@ -329,40 +330,67 @@ FOREIGN KEY(collection_id) REFERENCES collections(rowid)
"Calculate breakpoint threshold for DISTANCES based on K standard
deviations."
(+ (elisa-avg distances) (* k (elisa-std-dev distances))))
-(defun elisa-parse-info-manual (name)
- "Parse info manual with NAME and save index to database."
+(defun elisa-parse-info-manual (name collection-name)
+ "Parse info manual with NAME and save index to COLLECTION-NAME."
(with-temp-buffer
(info name (current-buffer))
- (let ((continue t))
+ (let ((collection-id (or (caar (sqlite-select
+ elisa-db
+ (format
+ "select rowid from collections where name
= '%s';"
+ collection-name)))
+ (progn
+ (sqlite-execute
+ elisa-db
+ (format
+ "insert into collections (name) values ('%s');"
+ collection-name))
+ (caar (sqlite-select
+ elisa-db
+ (format
+ "select rowid from collections where
name = '%s';"
+ collection-name))))))
+ (kind-id (caar (sqlite-select
+ elisa-db "select rowid from kinds where name =
'info';")))
+ (continue t))
(while continue
(let* ((node-name (concat "(" (file-name-sans-extension
(file-name-nondirectory
Info-current-file))
") "
Info-current-node))
- (content (buffer-substring-no-properties (point-min)
(point-max)))
- (embedding (llm-embedding elisa-embeddings-provider content))
- (rowid (progn
- (sqlite-execute elisa-db
- (format
- "insert into info values('%s') on
conflict do nothing;"
- (elisa-sqlite-escape node-name)))
- (caar
- (sqlite-select
- elisa-db
- (format "select rowid from info where node='%s';"
- (elisa-sqlite-escape node-name)))))))
- (when (not (caar
- (sqlite-select
- elisa-db
- (format "select rowid from elisa_embeddings where
rowid=%s;" rowid))))
- (sqlite-execute
- elisa-db
- (format "insert into elisa_embeddings(rowid, embedding) values
(%s, %s);"
- rowid
- (elisa-vector-to-sqlite embedding))))
+ (chunks (elisa-split-semantically)))
+ (mapc
+ (lambda (text)
+ (let* ((hash (secure-hash 'sha256 text))
+ (embedding (llm-embedding elisa-embeddings-provider text))
+ (rowid
+ (if-let ((rowid (caar (sqlite-select
+ elisa-db
+ (format "select rowid from data
where kind_id = %s and collection_id = %s and path = '%s' and hash = '%s';"
+ kind-id collection-id
node-name hash)))))
+ nil
+ (sqlite-execute
+ elisa-db
+ (format
+ "insert into data(kind_id, collection_id, path, hash,
data) values (%s, %s, '%s', '%s', '%s');"
+ kind-id collection-id node-name hash
(elisa-sqlite-escape text)))
+ (caar (sqlite-select
+ elisa-db
+ (format "select rowid from data where kind_id =
%s and collection_id = %s and path = '%s' and hash = '%s';"
+ kind-id collection-id node-name hash))))))
+ (when rowid
+ (sqlite-execute
+ elisa-db
+ (format "insert into data_embeddings(rowid, embedding) values
(%s, %s);"
+ rowid (elisa-vector-to-sqlite embedding)))
+ (sqlite-execute
+ elisa-db
+ (format "insert into data_fts(rowid, data) values (%s, '%s');"
+ rowid (elisa-sqlite-escape text))))))
+ chunks)
(condition-case nil
(progn (funcall-interactively #'Info-forward-node)
- (sleep-for 0 100))
+ (sleep-for 0 10))
(error
(setq continue nil))))))))
@@ -781,13 +809,13 @@ WHERE d.rowid in %s;"
(defun elisa-parse-builtin-manuals ()
"Parse builtin manuals."
(mapc (lambda (s)
- (ignore-errors (elisa-parse-info-manual s)))
+ (ignore-errors (elisa-parse-info-manual s "builtin manuals")))
(elisa-get-builtin-manuals)))
(defun elisa-parse-external-manuals ()
"Parse external manuals."
(mapc (lambda (s)
- (ignore-errors (elisa-parse-info-manual s)))
+ (ignore-errors (elisa-parse-info-manual s "external manuals")))
(elisa-get-external-manuals)))
(defun elisa-parse-all-manuals ()
@@ -802,7 +830,8 @@ WHERE d.rowid in %s;"
(setq elisa-db db)))
(defun elisa--async-do (func &optional on-done)
- "Parse asyncronously with FUNC."
+ "Do FUNC asyncronously.
+Call ON-DONE callback with result as an argument after FUNC evaluation done."
(async-start `(lambda ()
,(async-inject-variables "elisa-embeddings-provider")
,(async-inject-variables "elisa-db-directory")
- [elpa] externals/elisa 48d96a9716 27/98: Merge pull request #5 from s-kostyaev/fix-load-file, (continued)
- [elpa] externals/elisa 48d96a9716 27/98: Merge pull request #5 from s-kostyaev/fix-load-file, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 8794e14d75 29/98: Improve built-in manuals directory location method, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa c03baded1e 32/98: Bump version, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa a99ed0b234 33/98: Add semantic splitting, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 90a76fc7c2 37/98: Add webpage semantic chunks extraction function, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa d90df5889d 38/98: Add function that return buffer with url content, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 0f94c23a5d 40/98: Add more sqlite tables, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa e877f8f5c7 44/98: First implementation for web search, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 5ca66e9f0d 47/98: Fix custom variables, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 273a1d492d 50/98: Add reranker to RAG pipeline, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa ade7ac0af9 52/98: Update info manual parsing,
ELPA Syncer <=
- [elpa] externals/elisa 8a2c92dc34 54/98: Fix parsing info manuals, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa cecc5cb13f 55/98: Make sync parsing interactive, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 4cad3085fd 57/98: Use more async calls to prevent emacs from blocking, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa ad130b564f 60/98: Add parse file function, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa b419fb2cf2 61/98: Add code for parsing directory as an elisa collection, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa ef06534f46 62/98: Implement incremental parsing, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 0e32d7bb5c 63/98: Add async directory parsing, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa f744ce305a 67/98: Add reparse current collection command, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 9ad7827337 70/98: Fix semantic split with single chunk, ELPA Syncer, 2024/07/17
- [elpa] externals/elisa 439ed1d4f8 76/98: Make executable customization simpler, ELPA Syncer, 2024/07/17