[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/dired-duplicates 80a6243e01 1/7: Implement fallback to
|
From: |
ELPA Syncer |
|
Subject: |
[elpa] externals/dired-duplicates 80a6243e01 1/7: Implement fallback to internal hashing function |
|
Date: |
Thu, 9 Nov 2023 09:57:41 -0500 (EST) |
branch: externals/dired-duplicates
commit 80a6243e01df7d6e344846edf19938ae81e15346
Author: Harald Judt <h.judt@gmx.at>
Commit: Harald Judt <h.judt@gmx.at>
Implement fallback to internal hashing function
When the checksum command cannot run because the executable is not
available,
fallback to the internal hashing function `secure-hash'. When comparing
multiple locations, e.g. searching for files on local and remote hosts, the
executable might be usable on one host but not the other, so remember this
per
host and try to be smart, because using the internal function works
everywhere
but brings its own problems, like being generally slower (inserting into
temp
buffer) and having the potential to exhaust memory when being too big.
Signed-off-by: Harald Judt <h.judt@gmx.at>
---
dired-duplicates.el | 88 ++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 70 insertions(+), 18 deletions(-)
diff --git a/dired-duplicates.el b/dired-duplicates.el
index c4099449db..5d206e7bce 100644
--- a/dired-duplicates.el
+++ b/dired-duplicates.el
@@ -66,6 +66,22 @@ size."
:tag "Checksum executable"
:type 'string)
+(defcustom dired-duplicates-external-internal-algo-mapping
+ '(("sha512sum" . sha512)
+ ("sha384sum" . sha384)
+ ("sha256sum" . sha256)
+ ("sha224sum" . sha224)
+ ("sha1sum" . sha1)
+ ("md5sum" . md5))
+ "Mappings of checksum execs to internal secure hash algorithms.
+
+These mappings will be used in fallback cases to determine the
+secure hash function to use when the desired checksum
+executable (see `dired-duplicates-checksum-exec') cannot be
+found."
+ :tag "Checksum exec to internal algo mappings."
+ :type 'list)
+
(defcustom dired-duplicates-size-comparison-function
'<
"The comparison function used for sorting grouped results.
@@ -93,22 +109,46 @@ return boolean t if the file matches a criteria, otherwise
nil."
(defvar dired-duplicates-directories nil
"List of directories that will be searched for duplicate files.")
-(defun dired-duplicates-checksum-file (file)
- "Create a checksum for FILE.
-
-The executable used is defined by `dired-duplicates-checksum-exec'."
- (let* ((default-directory (file-name-directory (expand-file-name file)))
- (exec (executable-find dired-duplicates-checksum-exec t))
- (file (expand-file-name (file-local-name file))))
- (unless exec
- (user-error "Checksum program %s not found in `exec-path'" exec))
- (with-temp-buffer
- (unless (zerop (process-file exec nil t nil file))
- (error "Failed to start checksum program %s" exec))
- (goto-char (point-min))
- (if (looking-at "\\`[[:alnum:]]+")
- (match-string 0)
- (error "Unexpected output from checksum program %s" exec)))))
+
+(defun dired-duplicates--checksum-file (file &optional exec)
+ "Create a checksum for FILE, optionally using EXEC.
+
+EXEC needs to be specified with its full path. If nil, use the
+internal function `secure-hash' with the appropriate algorithm,
+which will be deduced from `dired-duplicates-checksum-exec' via
+the `dired-duplicates-external-internal-algo-mapping'. Using
+`secure-hash' instead of spawning a process can be faster for
+very small files and will work even when the TRAMP method used
+does not provide a shell, but is usually slower and could cause
+memory issues for files bigger than the Emacs process or the
+machine can handle because they have to be loaded into a
+temporary buffer for the hash calculation."
+ (if (not exec)
+ (let ((message-log-max nil)
+ (hash-algo (alist-get dired-duplicates-checksum-exec
+
dired-duplicates-external-internal-algo-mapping
+ nil nil #'string=)))
+ (unless hash-algo
+ (user-error "Could not determine the correct hash algorithm for %s
via %s"
+ dired-duplicates-checksum-exec
+ "`dired-duplicates-external-internal-algo-mapping'"))
+ (message "Internal checksumming of %s" file)
+ (with-temp-buffer
+ (let ((inhibit-message t))
+ (insert-file-contents-literally file))
+ (secure-hash hash-algo
+ (current-buffer))))
+ (let* ((default-directory (file-name-directory (expand-file-name file)))
+ (file (expand-file-name (file-local-name file)))
+ (message-log-max nil))
+ (with-temp-buffer
+ (message "External checksumming of %s" file)
+ (unless (zerop (process-file exec nil t nil file))
+ (error "Failed to start checksum program %s" exec))
+ (goto-char (point-min))
+ (if (looking-at "\\`[[:alnum:]]+")
+ (match-string 0)
+ (error "Unexpected output from checksum program %s" exec))))))
(defun dired-duplicates--apply-file-filter-functions (files)
"Apply file filter functions to FILES, returning the resulting list."
@@ -132,13 +172,25 @@ duplicate files as values."
and checksum-table = (make-hash-table :test 'equal)
for f in files
for size = (file-attribute-size (file-attributes f))
+ initially do
+ (message "Collecting sizes of %d files..." (length files))
do (setf (gethash size same-size-table)
(append (gethash size same-size-table) (list f)))
finally
- (cl-loop for same-size-files being the hash-value in same-size-table
+ (cl-loop with checksum-exec-availability = (make-hash-table :test
'equal)
+ initially do
+ (cl-loop for d in directories do
+ (let* ((default-directory (file-name-directory
(expand-file-name d)))
+ (exec (executable-find
dired-duplicates-checksum-exec t)))
+ (if exec
+ (setf (gethash (file-remote-p d)
checksum-exec-availability) exec)
+ (message "Checksum program %s not found in
exec-path, falling back to internal routines" exec))))
+
+ for same-size-files being the hash-value in same-size-table
if (cdr same-size-files) do
(cl-loop for f in same-size-files
- for checksum = (dired-duplicates-checksum-file f)
+ for checksum = (dired-duplicates--checksum-file f
(gethash (file-remote-p f)
+
checksum-exec-availability))
do (setf (gethash checksum checksum-table)
(append (gethash checksum
checksum-table) (list f)))))
(cl-loop for same-files being the hash-value in checksum-table
using (hash-key checksum)
- [elpa] externals/dired-duplicates updated (f821380a4f -> 349eebbd8b), ELPA Syncer, 2023/11/09
- [elpa] externals/dired-duplicates aa658b93d5 3/7: Always set the correct keymap as parent keymap, ELPA Syncer, 2023/11/09
- [elpa] externals/dired-duplicates 80a6243e01 1/7: Implement fallback to internal hashing function,
ELPA Syncer <=
- [elpa] externals/dired-duplicates 349eebbd8b 7/7: Bump version to 0.2, ELPA Syncer, 2023/11/09
- [elpa] externals/dired-duplicates 2533a39dd9 4/7: Reorder assignment of the revert-buffer-function, ELPA Syncer, 2023/11/09
- [elpa] externals/dired-duplicates 9335c22d88 2/7: Do not truncate directory names in messages, ELPA Syncer, 2023/11/09
- [elpa] externals/dired-duplicates 3cdb20c714 6/7: Update README.org, ELPA Syncer, 2023/11/09
- [elpa] externals/dired-duplicates 41492269b7 5/7: Prevent use of internal hash functions for files exceeding a certain limit, ELPA Syncer, 2023/11/09