guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

02/02: Speed up the finding of missing sources


From: Christopher Baines
Subject: 02/02: Speed up the finding of missing sources
Date: Tue, 1 Mar 2022 16:35:32 -0500 (EST)

cbaines pushed a commit to branch master
in repository data-service.

commit c5b504e94a08aab8d19d752542874a588fe9a765
Author: Christopher Baines <mail@cbaines.net>
AuthorDate: Tue Mar 1 20:36:22 2022 +0000

    Speed up the finding of missing sources
    
    Use larger batches and more efficient duplicate deletion.
---
 guix-data-service/model/derivation.scm | 41 ++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/guix-data-service/model/derivation.scm 
b/guix-data-service/model/derivation.scm
index 47b5adc..c1d1c69 100644
--- a/guix-data-service/model/derivation.scm
+++ b/guix-data-service/model/derivation.scm
@@ -1738,17 +1738,7 @@ WHERE " criteria ";"))
        (chunk! missing-file-names 2000)))))
 
 (define (derivation-file-names->derivation-ids conn derivation-file-names)
-  (define (select-source-files-missing-nars derivation-ids)
-    (define (split ids max-length)
-      (if (> (length ids)
-             max-length)
-          (call-with-values (lambda ()
-                              (split-at ids max-length))
-            (lambda (ids-lst rest)
-              (cons ids-lst
-                    (split rest max-length))))
-          (list ids)))
-
+  (define (select-source-files-missing-nars! derivation-ids)
     (define (derivation-ids->all-related-derivation-ids ids)
       (define query
         (string-append
@@ -1773,7 +1763,12 @@ WITH RECURSIVE all_derivations AS (
 SELECT all_derivations.derivation_id
 FROM all_derivations"))
 
-      (map car (exec-query conn query)))
+      (map (lambda (row)
+             (string->number
+              (car row)))
+           (with-time-logging
+               "querying for batch of all related derivation ids"
+             (exec-query conn query))))
 
     (define (derivation-ids->missing-sources ids)
       (define query
@@ -1788,17 +1783,25 @@ INNER JOIN derivation_source_files
   ON derivation_sources.derivation_source_file_id =
      derivation_source_files.id
      WHERE derivation_sources.derivation_id IN ("
-         (string-join ids ", ")
+         (string-join (map number->string ids) ", ")
          ")
        AND derivation_source_file_nars.derivation_source_file_id IS NULL"))
 
-      (exec-query conn query))
+      (with-time-logging "finding batch of missing sources"
+        (exec-query conn query)))
 
     (let ((all-derivation-ids
-           (append-map
-            derivation-ids->all-related-derivation-ids
-            (split derivation-ids 250))))
-      (derivation-ids->missing-sources all-derivation-ids)))
+           (with-time-logging "querying for all related dervation ids"
+             (delete-duplicates/sort!
+              (append-map!
+               derivation-ids->all-related-derivation-ids
+               (chunk! derivation-ids 5000))
+              <))))
+
+      (with-time-logging "querying for missing sources"
+        (append-map! derivation-ids->missing-sources
+                     (chunk! all-derivation-ids
+                             10000)))))
 
   (if (null? derivation-file-names)
       '()
@@ -1858,6 +1861,6 @@ INNER JOIN derivation_source_files
                             conn
                             (string->number derivation-source-file-id)
                             store-path)))
-                        (select-source-files-missing-nars all-ids)))
+                        (select-source-files-missing-nars! all-ids)))
 
             all-ids)))))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]