[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[nongnu] elpa/subed ea669b571b: subed-word-data: Color words based on sc
From: |
ELPA Syncer |
Subject: |
[nongnu] elpa/subed ea669b571b: subed-word-data: Color words based on score (WhisperX JSON) |
Date: |
Fri, 15 Nov 2024 22:00:39 -0500 (EST) |
branch: elpa/subed
commit ea669b571bf9a60d8e184c8a7c9ca7302784a3ae
Author: Sacha Chua <sacha@sachachua.com>
Commit: Sacha Chua <sacha@sachachua.com>
subed-word-data: Color words based on score (WhisperX JSON)
* subed/subed-word-data.el (subed-word-data-score-faces):
New option.
* subed/subed-word-data.el
(subed-word-data--extract-words-from-whisperx-json):
Include scores, too.
* subed/subed-word-data.el (subed-word-data-load-from-file):
Limit file completion.
* subed/subed-word-data.el (subed-word-data-load-from-string):
Handle strings.
* subed/subed-word-data.el
(subed-word-data-refresh-text-properties-for-subtitle):
Use thresholds if specified.
(subed-word-data--candidate-face): New defsubst.
(subed-word-data--add-word-properties): New defsubst.
---
subed/subed-word-data.el | 93 +++++++++++++++++++++++++++++++-----------------
1 file changed, 60 insertions(+), 33 deletions(-)
diff --git a/subed/subed-word-data.el b/subed/subed-word-data.el
index f99c312a6e..131996dc28 100644
--- a/subed/subed-word-data.el
+++ b/subed/subed-word-data.el
@@ -41,10 +41,16 @@
(defvar-local subed-word-data--cache nil
"Word-level timing in the form ((start . ms) (end . ms) (text . ms))")
+(defcustom subed-word-data-score-faces '((0.8 . compilation-info)
+ (0.4 . compilation-warning)
+ (0 . compilation-error))
+ "Alist of score thresholds and faces to use."
+ :type '(alist :key-type float :value-type face))
+
(defface subed-word-data-face '((((class color) (background light))
- :foreground "darkgreen")
- (((class color) (background dark))
- :foreground "lightgreen"))
+ :foreground "darkgreen")
+ (((class color) (background dark))
+ :foreground "lightgreen"))
"Face used for words with word data available.")
(defun subed-word-data--extract-words-from-srv2 (data)
@@ -72,19 +78,23 @@ Return a list of ((start . ?), (end . ?) (text . ?))."
rec))
text-elements))))
-(defun subed-word-data--extract-words-from-whisperx-json (file)
+(defun subed-word-data--extract-words-from-whisperx-json (file &optional
from-string)
"Extract the timing from file in WhisperX's JSON format.
-Return a list of ((start . ?), (end . ?) (text . ?))."
+Return a list of ((start . ?), (end . ?) (text . ?) (score . ?)).
+If FROM-STRING is non-nil, treat FILE as the data itself."
(let* ((json-object-type 'alist)
(json-array-type 'list)
- (data (json-read-file file))
+ (data (if from-string
+ (json-read-from-string file)
+ (json-read-file file)))
(base (seq-mapcat
(lambda
(segment)
(seq-map (lambda (info)
(let-alist info
`((start . ,(and .start (* 1000
.start)))
(end . ,(and .end (* 1000 .end)))
-
(text . ,(and .word)))))
+
(text . ,(identity .word))
+ (score . ,(identity .score)))))
(alist-get 'words segment)))
(alist-get
'segments data)))
last-end
@@ -92,9 +102,9 @@ Return a list of ((start . ?), (end . ?) (text . ?))."
;; numbers at the end of a sentence sometimes don't end up with
times
;; so we need to fix them
(while current
- (unless (alist-get 'start (car current))
; start
+ (unless (alist-get 'start (car current)) ; start
(set-cdr (assoc 1 'start (car current)) (1+
last-end)))
- (unless (alist-get 'end (car current))
; start
+ (unless (alist-get 'end (car current)) ; start
(set-cdr (assoc 1 'end (car current)) (1-
(alist-get 'start (cadr current)))))
(setq
last-end (alist-get 'end (car current))
@@ -112,16 +122,26 @@ For now, only SRV2 files are supported."
(defun subed-word-data-load-from-file (file)
"Load word-level timing from FILE.
For now, only SRV2 and JSON files are supported."
- (interactive "fFile: ")
+ (interactive (list (read-file-name "JSON or srv2: "
+ nil
+ nil
+ nil
+ nil
+ (lambda (f)
+ (string-match
+ "\\.json\\'\\|\\.srv2\\'"
+ f)))))
(subed-word-data--load
- (if (string-match "\\.json\\'" file)
- (subed-word-data--extract-words-from-whisperx-json file)
+ (if (and (stringp file) (string-match "\\.json\\'" file))
+ (subed-word-data--extract-words-from-whisperx-json file t)
(subed-word-data--extract-words-from-srv2 (xml-parse-file file)))))
(defun subed-word-data-load-from-string (string)
"Load word-level timing from STRING.
-For now, only SRV2 files are supported."
- (subed-word-data--load (subed-word-data--extract-words-from-srv2 string)))
+For now, only JSON or SRV2 files are supported."
+ (subed-word-data--load (if (string-match "^{" string)
+ (subed-word-data--extract-words-from-whisperx-json string)
+ (subed-word-data--extract-words-from-srv2 string))))
(defvar subed-word-data-extensions '(".en.srv2" ".srv2") "Extensions to search
for word data.")
@@ -249,17 +269,31 @@ Return non-nil if they are the same after normalization."
(setq candidate nil))
(setq try-list (cdr try-list)))
(when (and candidate (subed-word-data-compare (buffer-substring
(point) pos)
- (alist-get 'text
candidate)))
- (add-text-properties (point) pos
- (list 'subed-word-data-start
- (assoc-default 'start candidate)
- 'subed-word-data-end
- (assoc-default 'end candidate)
- 'font-lock-face 'subed-word-data-face))
- (add-face-text-property (point) pos
- 'subed-word-data-face)
+ (alist-get 'text candidate)))
+ (subed-word-data--add-word-properties (point) pos candidate)
(setq word-data try-list))))))
+(defsubst subed-word-data--candidate-face (candidate)
+ "Return the face to use for CANDIDATE."
+ (if (and (alist-get 'score candidate)
+ subed-word-data-confidence-faces)
+ (cdr (seq-find (lambda (threshold) (>= (alist-get 'score candidate) (car
threshold)))
+ subed-word-data-score-faces))
+ subed-word-data-face))
+
+(defsubst subed-word-data--add-word-properties (start end candidate)
+ "Add properties from START to END for CANDIDATE."
+ (let ((face (subed-word-data--candidate-face candidate)))
+ (add-text-properties start end
+ (list 'subed-word-data-start
+ (assoc-default 'start candidate)
+ 'subed-word-data-end
+ (assoc-default 'end candidate)
+ 'subed-word-data-score
+ (assoc-default 'score candidate)
+ 'font-lock-face face))
+ (add-face-text-property start end face)))
+
(defun subed-word-data-refresh-text-properties ()
"Add word data properties and face when available."
(interactive)
@@ -284,19 +318,12 @@ Return non-nil if they are the same after normalization."
(setq try-list (cdr try-list))
(while (and candidate
(not (subed-word-data-compare (buffer-substring
(point) pos)
- (alist-get 'text
candidate))))
+ (alist-get 'text candidate))))
(setq candidate (car try-list))
(setq try-list (cdr try-list)))
(when (and candidate (subed-word-data-compare (buffer-substring
(point) pos)
- (alist-get 'text
candidate)))
- (add-text-properties (point) pos
- (list 'subed-word-data-start
- (assoc-default 'start candidate)
- 'subed-word-data-end
- (assoc-default 'end candidate)
- 'font-lock-face
'subed-word-data-face))
- (add-face-text-property (point) pos
- 'subed-word-data-face)
+ (alist-get 'text candidate)))
+ ( subed-word-data--add-word-properties (point) pos candidate)
(setq word-data try-list)))))
(or (subed-forward-subtitle-id)
(goto-char (point-max)))))))
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [nongnu] elpa/subed ea669b571b: subed-word-data: Color words based on score (WhisperX JSON),
ELPA Syncer <=