[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
01/03: hydra: deploy-node-130: Do more data service setup.
From: |
Christopher Baines |
Subject: |
01/03: hydra: deploy-node-130: Do more data service setup. |
Date: |
Mon, 16 Dec 2024 06:04:35 -0500 (EST) |
cbaines pushed a commit to branch master
in repository maintenance.
commit 040282dfc1c6b0a1f2f19ab38193346993a31a58
Author: Christopher Baines <mail@cbaines.net>
AuthorDate: Mon Dec 16 10:55:13 2024 +0000
hydra: deploy-node-130: Do more data service setup.
Copy over the mcron jobs from beid, add git-repositories and
build-servers config (applying this automatically doesn't seem to work
yet), and make some fixes.
* hydra/deploy-node-130.scm (guix-data-service-guix-cleanup,
guix-data-service-derivation-cleanup, guix-data-service-nars-cleanup,
guix-data-service-branch-cleanup): New procedures.
(mcron-service-configuration): New variable.
(node-130-os): Update configuration.
---
hydra/deploy-node-130.scm | 385 ++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 370 insertions(+), 15 deletions(-)
diff --git a/hydra/deploy-node-130.scm b/hydra/deploy-node-130.scm
index e886f8e9..f73ead42 100644
--- a/hydra/deploy-node-130.scm
+++ b/hydra/deploy-node-130.scm
@@ -1,4 +1,5 @@
-(use-modules (gnu bootloader)
+(use-modules (srfi srfi-1)
+ (gnu bootloader)
(gnu bootloader grub)
(gnu services base)
(gnu services mcron)
@@ -7,6 +8,7 @@
(gnu services databases)
(gnu services monitoring)
(gnu services cuirass)
+ (gnu packages sqlite)
(gnu packages databases)
(gnu packages web)
(gnu machine)
@@ -26,6 +28,287 @@
;; List of authorized 'guix archive' keys.
(list (local-file "keys/guix/berlin.guixsd.org-export.pub")))
+(define (guix-data-service-guix-cleanup guix-data-service)
+ (program-file
+ "guix-cleanup"
+ (with-extensions
+ (cons guix-data-service
+ (map second (package-transitive-propagated-inputs
+ guix-data-service)))
+ #~(begin
+ (setvbuf (current-output-port) 'line)
+ (setvbuf (current-error-port) 'line)
+
+ (simple-format #t "~A: start: guix-cleanup\n"
+ (strftime "%c" (localtime (current-time))))
+
+ (use-modules (prometheus)
+ ((guix scripts processes) #:select (daemon-sessions))
+ (guix build utils)
+ (guix build syscalls))
+
+ (with-file-lock/no-wait "/tmp/guix-cleanup-lock"
+ (lambda ()
+ (simple-format #t "~A: skipping: guix-cleanup\n"
+ (strftime "%c" (localtime (current-time)))))
+ (let* ((daemon-idle? (null? (daemon-sessions)))
+ (free-space-threshold (* 35 (expt 2 30)))
+ (free-space (free-disk-space "/gnu/store"))
+ (daemon-db-size
+ (stat:size (stat "/var/guix/db/db.sqlite")))
+ (low-free-space?
+ (< free-space free-space-threshold))
+ (large-daemon-db?
+ (> daemon-db-size
+ (* 13
+ (expt 2 30)))))
+ (simple-format #t "guix-cleanup: free-space: ~A, daemon-db-size:
~A\n"
+ free-space
+ daemon-db-size)
+ (when low-free-space?
+ (invoke "guix" "gc"))
+ (when large-daemon-db?
+ (let loop ((attempt 0))
+ (with-exception-handler
+ (lambda (exn)
+ (if (< attempt 30)
+ (begin
+ (sleep 20)
+ (loop (+ 1 attempt)))
+ (simple-format #t "guix-cleanup: giving up
vacuuming database\n")))
+ (lambda ()
+ (invoke "guix" "gc" "--vacuum-database"))
+ #:unwind? #t)))
+
+ ;; Just record if something happened
+ (when (or low-free-space?
+ large-daemon-db?)
+ (let* ((registry (make-metrics-registry
+ #:namespace "guixdataservicecleanup"))
+ (completion-time-metric
+ (make-gauge-metric registry
"guix_cleanup_completion_time")))
+ (metric-set completion-time-metric (current-time))
+ (write-textfile registry
+ (string-append
+ "/var/lib/prometheus/node-exporter/"
+
"guix-data-service-guix-cleanup.prom"))))))))))
+
+(define (guix-data-service-derivation-cleanup guix-data-service)
+ (program-file
+ "guix-data-service-derivation-cleanup"
+ (with-extensions
+ (cons guix-data-service
+ (map second (package-transitive-propagated-inputs
+ guix-data-service)))
+ #~(begin
+ (setvbuf (current-output-port) 'line)
+ (setvbuf (current-error-port) 'line)
+
+ (simple-format #t "~A: start: guix-data-service-derivation-cleanup\n"
+ (strftime "%c" (localtime (current-time))))
+ (use-modules (prometheus)
+ (guix-data-service data-deletion))
+ (begin
+ (delete-unreferenced-derivations)
+
+ (let* ((registry (make-metrics-registry
+ #:namespace "guixdataservicecleanup"))
+ (completion-time-metric
+ (make-gauge-metric registry "derivation_completion_time")))
+ (metric-set completion-time-metric (current-time))
+ (write-textfile registry
+ (string-append
+ "/var/lib/prometheus/node-exporter/"
+ "guix-data-service-derivation-cleanup.prom"))))))
+ #:guile
+ (car
+ (assoc-ref (package-native-inputs guix-data-service)
+ "guile"))))
+
+(define (guix-data-service-nars-cleanup guix-data-service)
+ (program-file
+ "guix-data-service-nars-cleanup"
+ (with-extensions
+ (cons guix-data-service
+ (map second (package-transitive-propagated-inputs
+ guix-data-service)))
+ #~(begin
+ (setvbuf (current-output-port) 'line)
+ (setvbuf (current-error-port) 'line)
+
+ (simple-format #t "~A: start: guix-data-service-nars-cleanup\n"
+ (strftime "%c" (localtime (current-time))))
+ (use-modules (prometheus)
+ (guix-data-service data-deletion))
+ (begin
+ (delete-nars-for-unknown-store-paths)
+
+ (let* ((registry (make-metrics-registry
+ #:namespace "guixdataservicecleanup"))
+ (completion-time-metric
+ (make-gauge-metric registry "nars_completion_time")))
+ (metric-set completion-time-metric (current-time))
+ (write-textfile registry
+ (string-append
+ "/var/lib/prometheus/node-exporter/"
+ "guix-data-service-nars-cleanup.prom"))))))
+ #:guile
+ (car
+ (assoc-ref (package-native-inputs guix-data-service)
+ "guile"))))
+
+(define (guix-data-service-branch-cleanup guix-data-service)
+ (program-file
+ "guix-data-service-branch-cleanup"
+ (with-extensions
+ (cons guix-data-service
+ (map second (package-transitive-propagated-inputs
+ guix-data-service)))
+ #~(begin
+ (setvbuf (current-output-port) 'line)
+ (setvbuf (current-error-port) 'line)
+
+ (simple-format #t "~A: start: guix-data-service-branch-cleanup\n"
+ (strftime "%c" (localtime (current-time))))
+ (use-modules (squee)
+ (ice-9 match)
+ (prometheus)
+ (guix-data-service database)
+ (guix-data-service data-deletion)
+ (guix-data-service model
package-derivation-by-guix-revision-range))
+ (begin
+ (define delete-revisions-from-branch
+ (@@ (guix-data-service data-deletion)
delete-revisions-from-branch))
+
+ (delete-data-for-all-deleted-branches)
+ (with-exception-handler
+ (lambda (exn)
+ (simple-format
+ #t "failed deleting revisions except most recent: ~A\n"
+ exn))
+ (lambda ()
+ (delete-revisions-for-all-branches-except-most-recent-n 200))
+ #:unwind? #t)
+
+ (with-postgresql-connection
+ "data-deletion"
+ (lambda (conn)
+ (for-each
+ (match-lambda
+ ((git-repository-id branch-name)
+ ;; Temporary cleanup
+ (when (or (string-prefix? "wip-" branch-name)
+ (string-prefix? "version-" branch-name))
+ (simple-format #t "deleting ~A\n" branch-name)
+ (delete-data-for-branch conn
+ (string->number git-repository-id)
+ branch-name))
+
+ ;; We want to delete all revisions prior to the
+ ;; latest processed revision
+ (let ((commits
+ (map
+ car
+ (exec-query
+ conn
+ "
+SELECT commit
+FROM git_commits
+INNER JOIN git_branches
+ ON git_branches.id = git_commits.git_branch_id
+ AND git_branches.git_repository_id = $1
+ AND git_branches.name = $2
+WHERE git_commits.datetime < (
+ SELECT datetime
+ FROM git_commits
+ INNER JOIN git_branches
+ ON git_branches.id = git_commits.git_branch_id
+ AND git_branches.git_repository_id = $1
+ AND git_branches.name = $2
+ INNER JOIN load_new_guix_revision_jobs
+ ON load_new_guix_revision_jobs.commit = git_commits.commit
+ AND load_new_guix_revision_jobs.git_repository_id = $1
+ ORDER BY CASE WHEN load_new_guix_revision_jobs.succeeded_at IS NULL THEN 0
+ ELSE 1
+ END DESC,
+ datetime DESC
+ LIMIT 1
+)
+ AND commit != ''
+ORDER BY datetime DESC"
+ (list git-repository-id
+ branch-name)))))
+ (unless (null? commits)
+ (simple-format
+ #t
+ "deleting ~A commits from ~A\n"
+ (length commits)
+ branch-name)
+ (delete-revisions-from-branch
+ conn
+ (string->number git-repository-id)
+ branch-name
+ ;; Delete the "branch deleted" commits as well
+ ;; TODO Handle this better
+ (cons "" commits))
+
+ (simple-format
+ #t
+ "repopulating
package_derivations_by_guix_revision_range\n")
+ (insert-guix-revision-package-derivation-entries
+ conn
+ git-repository-id
+ branch-name)))))
+ (exec-query
+ conn
+ "
+SELECT git_repository_id, name
+FROM git_branches
+WHERE
+ (git_repository_id = 1 AND name LIKE 'issue-%')
+ OR
+ (git_repository_id = 2 AND name NOT LIKE 'master')
+ORDER BY id ASC"))))
+
+ (let* ((registry (make-metrics-registry
+ #:namespace "guixdataservicecleanup"))
+ (completion-time-metric
+ (make-gauge-metric registry "branch_completion_time")))
+ (metric-set completion-time-metric (current-time))
+ (write-textfile registry
+ (string-append
+ "/var/lib/prometheus/node-exporter/"
+ "guix-data-service-branch-cleanup.prom"))))))
+ #:guile
+ (car
+ (assoc-ref (package-native-inputs guix-data-service)
+ "guile"))))
+
+(define mcron-service-configuration
+ (mcron-configuration
+ (jobs
+ (list
+ #~(job "0 * * * *"
+ #$(guix-data-service-guix-cleanup guix-data-service))
+ #~(job "2 * * * *"
+ #$(file-append
+ sqlite
+ "/bin/sqlite3 /var/guix/db/db.sqlite \"PRAGMA
wal_checkpoint(TRUNCATE);\""))
+ #~(job "0 1 * * 1"
+ "rm -r /var/lib/guix-data-service/.cache/guix/substitute/*")
+ #~(job "0 1 * * 1"
+ "rm -r /var/log/postgresql/postgres*")
+ #~(job "0 0 * * *"
+ #$(guix-data-service-branch-cleanup guix-data-service))
+ #~(job "0 8 * * *"
+ #$(guix-data-service-branch-cleanup guix-data-service))
+ #~(job "0 16 * * *"
+ #$(guix-data-service-branch-cleanup guix-data-service))
+ #~(job "0 1 * * *"
+ #$(guix-data-service-nars-cleanup guix-data-service))
+ #~(job "0 2 * * 0"
+ #$(guix-data-service-derivation-cleanup guix-data-service))))))
+
(define node-130-os
(let ((base-os (berlin-new-build-machine-os
@@ -33,9 +316,7 @@
#:authorized-guix-keys %authorized-guix-keys
#:emulated-architectures '()
#:childhurd? #f
- #:systems '("x86_64-linux" "i686-linux")
- #:max-jobs 6
- #:max-cores 48)))
+ #:systems '("x86_64-linux" "i686-linux"))))
(operating-system
(inherit base-os)
(services
@@ -47,17 +328,85 @@
(extra-options
'("--postgresql-statement-timeout=120000"
"--postgresql-connections=72"
-
"--narinfo-signing-public-key=/etc/guix-data-serivce/signing-key.pub"
+
"--narinfo-signing-public-key=/etc/guix-data-service/signing-key.pub"
"--narinfo-signing-private-key=/etc/guix-data-service/signing-key.sec"
"--host=0.0.0.0"))
(extra-process-jobs-options
`("--max-processes=1"
- "--latest-branch-revision-max-processes=2"
- "--per-job-parallelism=12"
+ "--latest-branch-revision-max-processes=3"
+ "--per-job-parallelism=6"
"--inferior-set-environment-variable=GUIX_DOWNLOAD_METHODS=upstream"
"--skip-system-tests"
,(simple-format #f "--free-space-requirement=~A"
- (* 60 (expt 2 30)))))))
+ (* 60 (expt 2 30)))))
+ (git-repositories
+ '(((id . 1)
+ (label . "guix")
+ (url . "https://git.savannah.gnu.org/git/guix.git")
+ (cgit_url_base .
"https://git.savannah.gnu.org/cgit/guix.git/")
+ (x_git_repo_header . "guix")
+ (excluded_branches . #("keyring" "/wip-.*/"
"/version-.*/"))
+ (fetch_with_authentication . #t)
+ (query_substitutes . #t)
+ (poll_interval . 600)
+ (job_priority . 1))
+ ((id . 2)
+ (label . "guix-patches")
+ (url . "https://git.qa.guix.gnu.org/git/guix-patches")
+ (cgit_url_base .
"https://git.qa.guix.gnu.org/guix-patches/")
+ (x_git_repo_header . "guix-patches")
+ (excluded_branches . #("master"))
+ (fetch_with_authentication . #f)
+ (query_substitutes #f)
+ (poll_interval . 60)
+ (job_priority . 0))))
+ (build-servers
+ `(((id . 1)
+ (url . "https://ci.guix.gnu.org/")
+ (lookup_all_derivations . #t)
+ (lookup_builds . #t)
+ (systems-and-targets
+ . (("x86_64-linux" . "")
+ ("i686-linux" . "")
+ ("aarch64-linux" . "")
+ ("armhf-linux" . "")
+ ("powerpc64le-linux" . "")
+ ("i586-gnu" . "")
+ ("riscv64-linux" . "")
+ ("x86_64-linux" . "i586-pc-gnu")
+ ("x86_64-linux" . "mips64el-linux-gnu")
+ ("x86_64-linux" . "aarch64-linux-gnu")
+ ("x86_64-linux" . "powerpc64le-linux-gnu")
+ ("x86_64-linux" . "riscv64-linux-gnu")
+ ("x86_64-linux" . "x86_64-w64-mingw32")
+ ("x86_64-linux" . "arm-linux-gnueabihf")
+ ("x86_64-linux" . "i686-w64-mingw32")
+ ("x86_64-linux" . "powerpc-linux-gnu"))))
+ ((id . 2)
+ (url . "https://bordeaux.guix.gnu.org/")
+ (lookup_all_derivations . #t)
+ (lookup_builds . #t)
+ (systems-and-targets
+ . (("x86_64-linux" . "")
+ ("i686-linux" . "")
+ ("aarch64-linux" . "")
+ ("armhf-linux" . "")
+ ("powerpc64le-linux" . "")
+ ("i586-gnu" . "")
+ ("riscv64-linux" . "")
+ ("x86_64-linux" . "i586-pc-gnu")
+ ("x86_64-linux" . "mips64el-linux-gnu")
+ ("x86_64-linux" . "aarch64-linux-gnu")
+ ("x86_64-linux" . "powerpc64le-linux-gnu")
+ ("x86_64-linux" . "riscv64-linux-gnu")
+ ("x86_64-linux" . "x86_64-w64-mingw32")
+ ("x86_64-linux" . "arm-linux-gnueabihf")
+ ("x86_64-linux" . "i686-w64-mingw32")
+ ("x86_64-linux" . "powerpc-linux-gnu")))
+ (token-seeds . ("token1")))))))
+
+ (service mcron-service-type
+ mcron-service-configuration)
(service postgresql-service-type
(postgresql-configuration
@@ -96,18 +445,26 @@ host all all ::1/128 md5"))
(modify-services (operating-system-user-services base-os)
(guix-service-type
config => (guix-configuration
- (inherit config)
+ ;; Don't inherit so avoid build machine specific
+ ;; configuration
(substitute-urls
`("https://bordeaux.guix.gnu.org"
"https://ci.guix.gnu.org"
- ;; So that the data service can
- ;; substitute from itself
- "https://data.qa.guix.gnu.org"))
+ ;; So that the data service can substitute
+ ;; from itself, for some reason node 130
+ ;; can't talk to node 129 so use the local
+ ;; address
+ "http://localhost:8765"))
(authorized-keys
(list
(local-file
"keys/guix/bordeaux.guix.gnu.org-export.pub")
(local-file "keys/guix/berlin.guixsd.org-export.pub")
- (local-file "keys/guix/data.qa.guix.gnu.org.pub")))))
+ (local-file "keys/guix/data.qa.guix.gnu.org.pub")))
+ (build-accounts 64)
+ (max-silent-time (* 60 60 3))
+ (timeout (* 60 60 24))
+ (extra-options
+ (list "--max-jobs" "4" "--cores" "42"))))
(delete cuirass-remote-worker-service-type)
(delete mcron-service-type)))))))
@@ -122,5 +479,3 @@ host all all ::1/128 md5"))
(build-locally? #t)
(host-key "ssh-ed25519
AAAAC3NzaC1lZDI1NTE5AAAAIMuCdrMoF25T9ejPLAAcS92b6lVIz5+U0avyYPQTG5NI")
(system "x86_64-linux")))))
-
-node-130-os