guix-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[bug#78051] [WIP] services: root-file-system: In 'stop' method, find and


From: Danny Milosavljevic
Subject: [bug#78051] [WIP] services: root-file-system: In 'stop' method, find and kill processes that are writing to our filesystems, and then umount the filesystems.
Date: Fri, 25 Apr 2025 01:03:17 +0200

* gnu/services/base.scm (%root-file-system-shepherd-service): In 'stop'
method, find and kill processes that are writing to our filesystems, and then
umount the filesystems.

Change-Id: If244a1594281057ee5b6163e23bcf11fab3968ff
---
 gnu/services/base.scm | 381 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 367 insertions(+), 14 deletions(-)

diff --git a/gnu/services/base.scm b/gnu/services/base.scm
index 8c6563c99d..de24d07b4e 100644
--- a/gnu/services/base.scm
+++ b/gnu/services/base.scm
@@ -348,10 +348,337 @@ (define %root-file-system-shepherd-service
    (provision '(root-file-system))
    (start #~(const #t))
    (stop #~(lambda _
-             ;; Return #f if successfully stopped.
+             ;;; Return #f if successfully stopped.
+
+             ;;; Beginning of inlined module (fuser)
+
+             (use-modules (ice-9 textual-ports)
+                          (ice-9 control)
+                          (ice-9 string-fun)
+                          (ice-9 match)
+                          (ice-9 ftw) ; scandir
+                          (srfi srfi-1)        ; filter, for-each, find.
+                          (srfi srfi-26)       ; cut
+                          (ice-9 exceptions)) ; guard
+
+             (define PROC-DIR-NAME "/proc")
+             (define DEFAULT-SILENT-ERRORS
+               (list ENOENT ESRCH))
+
+             (define* (call-with-safe-syscall thunk
+                                              #:key
+                                              (on-error #f)
+                                              (silent-errors 
DEFAULT-SILENT-ERRORS)
+                                              (error-message-format #f)
+                                              (error-context '()))
+               "Call THUNK, handling system errors:
+- If ERROR-MESSAGE-FORMAT and the error is not in SILENT-ERRORS, calls format
+with ERROR-MESSAGE-FORMAT and ERROR-CONTEXT and (strerror errno) as arguments.
+- Return ON-ERROR on error."
+               (catch 'system-error
+                      thunk
+                      (lambda args
+                        (let ((errno (system-error-errno args)))
+                          (unless (member errno silent-errors)
+                            (when error-message-format
+                              (apply format
+                                     (current-error-port)
+                                     error-message-format
+                                     (append
+                                      error-context
+                                      (list (strerror errno))))))
+                          on-error))))
+
+             (define (safe-stat path)
+               "Get stat info for PATH--or #f if not possible."
+               (call-with-safe-syscall (lambda () (stat path))
+                                       #:error-message-format "Error: Cannot 
stat ~s: ~a~%"
+                                       #:error-context (list path)
+                                       #:silent-errors '()
+                                       #:on-error #f))
+
+             (define (safe-umount path)
+               "Umount PATH--if possible.."
+               (call-with-safe-syscall (lambda () (umount path))
+                                       #:error-message-format "Error: Cannot 
umount ~s: ~a~%"
+                                       #:error-context (list path)
+                                       #:silent-errors '()
+                                       #:on-error 'error))
+
+             (define (safe-lstat path)
+               "Get lstat info for PATH--or #f if not possible."
+               (call-with-safe-syscall (lambda () (lstat path))
+                                       #:error-message-format "Error: Cannot 
lstat ~s: ~a~%"
+                                       #:error-context (list path)
+                                       #:on-error #f))
+
+             (define (safe-scandir path)
+               "scandir PATH--or #f if not possible."
+               (let ((result (scandir path)))
+                 (if result
+                     result
+                     (begin
+                       (format (current-error-port) "Error: Cannot scandir ~s: 
?~%" path)
+                       '()))))
+
+;;; Processes
+
+             (define (safe-get-fd-flags pid fd)
+               "Get flags for FD in PID--or #f if not possible."
+               (let ((fdinfo-path (format #f "~a/~a/fdinfo/~a" PROC-DIR-NAME 
pid fd)))
+                 (call-with-safe-syscall (lambda ()
+                                           (call-with-input-file fdinfo-path
+                                             (lambda (port)
+                                               ;; Find 'flags:' line and parse 
octal value
+                                               (let loop ()
+                                                 (let ((line (get-line port)))
+                                                   (cond ((eof-object? line) 
#f)
+                                                         ((string-prefix? 
"flags:\t" line)
+                                                          (match (string-split 
line #\tab)
+                                                            ((_ flags-str . _)
+                                                             (catch 
'invalid-argument
+                                                                    (lambda ()
+                                                                      
(string->number flags-str 8))
+                                                                    (lambda 
args
+                                                                      #f)))
+                                                            (_ #f)))
+                                                         (else (loop))))))))
+                                         #:error-message-format "Error: Cannot 
read ~s: ~a~%"
+                                         #:error-context (list fdinfo-path)
+                                         #:on-error #f)))
+
+             (define (safe-get-processes)
+               "Get a list of all PIDs from proc--or #f if not possible."
+               (let ((proc-dir PROC-DIR-NAME))
+                 (catch 'system-error
+                        (lambda ()
+                          ;; Keep only numbers.
+                          (filter-map string->number (safe-scandir proc-dir)))
+                        ;; FIXME is errno even useful?
+                        (lambda scan-err
+                          (format (current-error-port) "Error scanning ~s: 
~a~%"
+                                  proc-dir (strerror (system-error-errno 
scan-err)))
+                          '()))))
+
+             (define (safe-fd-on-device? pid fd target-device)
+               "Return whether fd FD on pid PID is on device TARGET-DEVICE."
+               (let* ((fd-path (format #f "~a/~a/fd/~a" PROC-DIR-NAME pid fd))
+                      (link-stat (safe-lstat fd-path)))
+                 (and link-stat (eqv? (stat:dev link-stat)
+                                      target-device))))
+
+             (define (safe-get-process-fds pid)
+               "Get a list of all FDs of PID from proc--or #f if not possible."
+               (let ((fd-dir (format #f "~a/~a/fd" PROC-DIR-NAME pid)))
+                 ;; Keep only numbers.
+
+
+                 (filter-map string->number (safe-scandir fd-dir))))
+
+             (define (filter-process-fd-flags pid fds predicate)
+               "Get FLAGS from proc for PID and call PREDICATE with (FD FLAGS) 
each."
+               (filter (lambda (fd)
+                         (predicate fd (safe-get-fd-flags pid fd)))
+                       fds))
+
+             (define (safe-get-process-command pid)
+               "Return command of process PID--or #f if not possible."
+               (let ((cmdline-path (format #f "~a/~a/cmdline" PROC-DIR-NAME 
pid)))
+                 (call-with-safe-syscall (lambda ()
+                                           (call-with-input-file cmdline-path
+                                             (lambda (port)
+                                               (let ((full-cmdline 
(get-string-all port)))
+                                                 (match (string-split 
full-cmdline #\nul)
+                                                   ((command-name . _) 
command-name))))))
+                                         #:error-message-format "Error: Cannot 
read ~s: ~a~%"
+                                         #:error-context (list cmdline-path)
+                                         #:on-error #f)))
+
+             (define (safe-kill-process pid kill-signal)
+               "Kill process PID with KILL-SIGNAL if possible."
+               (call-with-safe-syscall (lambda ()
+                                         (kill pid kill-signal)
+                                         #t)
+                                       #:on-error 'error
+                                       #:silent-errors '()
+                                       #:error-message-format
+                                       "Error: Failed to kill process ~a: ~a~%"
+                                       #:error-context '()))
+
+;;; Mounts
+
+             (define (safe-get-device mount-point)
+               "Get the device ID (st_dev) of MOUNT-POINT--or #f if not 
possible."
+               (and=>
+                (safe-stat mount-point)
+                stat:dev))
+
+             (define (safe-parse-mountinfo path)
+               "Read and parse /proc/self/mountinfo (or specified path).
+Return a list of parsed entries, where each entry is:
+(list mount-id parent-id mount-point-string)
+Return '() on file read error or if file is unparseable."
+               (call-with-safe-syscall ; TODO: call-with-input-file is not 
actually a syscall.
+                (lambda ()
+                  (let ((entries '()))
+                    (call-with-input-file path
+                      (lambda (port)
+                        (let loop ()
+                          (let ((line (get-line port)))
+                            (unless (eof-object? line)
+                              (match (string-split line #\space)
+                                ;;       mnt_id par_id major:minor root 
mount_point ...
+                                ((m-id-str p-id-str _ _ mp . _)
+                                 ;; Attempt to parse IDs, skip line on error
+                                 (catch 'invalid-argument
+                                        (lambda ()
+                                          (let ((mount-id (string->number 
m-id-str))
+                                                (parent-id (string->number 
p-id-str)))
+                                            ;; Add successfully parsed entry 
to list
+                                            (set! entries (cons (list mount-id 
parent-id mp)
+                                                                entries))
+                                            (loop))) ; Continue to next line
+                                        (lambda args
+                                          (format (current-error-port)
+                                                  "Warning: Skipping mountinfo 
line due to parse error: ~s (~a)~%"
+                                                  line args)
+                                          (loop))))
+                                (_ (loop))))))))
+                    ;; Return parsed entries in file order
+                    (reverse entries)))
+                #:error-message-format "Error: Cannot read or parse mountinfo 
file ~s: ~a"
+                #:error-context (list path)
+                #:on-error '()))
+
+             (define (safe-find-nested-mounts root-mount-point target-device)
+               "Find mount points that block the unmounting of 
ROOT-MOUNT-POINT.
+TARGET-DEVICE argument is ignored.
+Mountpoints are returned depth-first (in the order they can be unmounted).
+ROOT-MOUNT-POINT is included."
+               (let* ((mountinfo (safe-parse-mountinfo (format #f 
"~a/self/mountinfo" PROC-DIR-NAME))))
+                 (define (safe-find-mounts-via-mountinfo accumulator lives 
root-mount-point)
+                   (if (member root-mount-point accumulator)
+                       (format (current-error-port) "Cycle detected~%"))
+                   (let ((accumulator (cons root-mount-point accumulator)))
+                     (if (= lives 0)
+                         (begin
+                           (format (current-error-port) "Error: Recursive 
mountpoints too deep.~%")
+                           accumulator)
+                         (let ((root-entry (find (lambda (entry)
+                                                   (match entry
+                                                     ((_ _ mp) (string=? mp 
root-mount-point))
+                                                     (_ #f))) ; Should not 
happen
+                                                 mountinfo)))
+                           (if root-entry
+                               (let ((root-mount-id (car root-entry)))
+                                 (fold (lambda (entry accumulator)
+                                         (match entry
+                                           ((_ parent-id mp)
+                                            (if (= parent-id root-mount-id)
+                                                
(safe-find-mounts-via-mountinfo accumulator
+                                                                               
 (- lives 1)
+                                                                               
 mp)
+                                                accumulator))
+                                           (_ accumulator)))
+                                       accumulator
+                                       mountinfo))
+                               (begin
+                                 (format (current-error-port) "Error: Could 
not find mount ID for ~s in parsed mountinfo~%"
+                                         root-mount-point)
+                                 accumulator))))))
+                 (safe-find-mounts-via-mountinfo '() 100 root-mount-point)))
+
+             ;;; End of inlined module (fuser)
+
+             (define MOUNT-POINT "/")
+
+             (define O_ACCMODE #o0003)
+
+             (define (flags-has-write-access? flags)
+               "Given open FLAGS, return whether it (probably) signifies write 
access."
+               (and flags (not (= (logand flags O_ACCMODE)
+                                  O_RDONLY))))
+
+             (define (ask-to-kill? pid command)
+               "Ask whether to kill process with id PID (and command COMMAND)"
+               (format (current-error-port) "~%Process Found: PID ~a  Command: 
~s~%" pid command)
+               (format (current-error-port) "Kill process ~a? [y/N] " pid)
+               (force-output (current-error-port))
+               (let ((response (read-char (current-input-port))))
+                 (if (not (eof-object? response))
+                     ;; Consume rest of line.
+                     (read-line (current-input-port)))
+                 (or (eqv? response #\y)
+                     (eqv? response #\Y))))
+
+             (define (clean-up . args)
+               (let* ((error-port (current-error-port))
+                      (root-device (safe-get-device MOUNT-POINT))
+                      (mounts (safe-find-nested-mounts MOUNT-POINT 
root-device))
+                      (mount-devices (map safe-get-device mounts)))
+                 (format error-port "Searching for processes writing to files 
on devices ~s (mount points ~s)...~%"
+                         mount-devices mounts)
+                 (let* ((our-pid (getpid))
+                        (pids (filter (lambda (pid)
+                                        (not (= pid our-pid)))
+                                      (safe-get-processes)))
+                        (pids (filter (lambda (pid)
+                                        (match (filter-process-fd-flags pid
+                                                                        
(safe-get-process-fds pid)
+                                                                        
(lambda (fd flags)
+                                                                          (and 
(flags-has-write-access? flags)
+                                                                               
(find (lambda (target-device)
+                                                                               
        (safe-fd-on-device? pid fd target-device))
+                                                                               
      mount-devices))))
+                                          ((x . _) #t)
+                                          (_ #f)))
+                                      pids)))
+                   (format error-port "Found ~a process(es) matching the 
criteria.~%" (length pids))
+                   (for-each (lambda (pid)
+                               (let ((command (safe-get-process-command pid)))
+                                 (if (ask-to-kill? pid command)
+                                     (safe-kill-process pid SIGKILL)
+                                     (format error-port "Skipping PID ~a 
(~s).~%" pid command))))
+                             pids))
+                 (format error-port "~%Process scan complete.~%")
+                 (format error-port "Searching for nested mounts of ~s...~%" 
MOUNT-POINT)
+                 (if (null? mounts)
+                     (format error-port "No nested mount points found.~%")
+                     (begin
+                       (format error-port "Found nested mount points that 
would need unmounting:~%")
+                       (for-each (lambda (mp)
+                                   (format #t "  ~s~%" mp)
+                                   (safe-umount mp))
+                                 mounts)))))
+
+             (define (call-with-mounted-filesystem source mountpoint 
filesystem-type options proc)
+               (mount source mountpoint file-system-type options 
#:update-mtab? #f)
+               (catch #t
+                      (lambda ()
+                        (proc)
+                        (umount mountpoint))
+                      (lambda args
+                        (umount mountpoint))))
+
              (sync)
 
-             (let ((null (%make-void-port "w")))
+             (let* ((null (%make-void-port "w"))
+                    (call-with-io-file (lambda (file-name proc)
+                                         (let ((port (open file-name O_RDWR)))
+                                           (set-current-input-port port)
+                                           (set-current-output-port port)
+                                           (set-current-error-port port)
+                                           (catch #t proc
+                                                  (lambda args
+                                                    (set-current-input-port 
null)
+                                                    (set-current-output-port 
null)
+                                                    (set-current-error-port 
null)
+                                                    (close port))))))
+                    (with-mounted-filesystem (syntax-rules ()
+                                               ((with-mounted-filesystem 
source filesystem-type mountpoint options . exps)
+                                                (call-with-mounted-filesystem 
source filesystem-type mountpoint options
+                                                                              
(lambda () . exps))))))
+
                ;; Redirect the default output ports.
                (set-current-output-port null)
                (set-current-error-port null)
@@ -363,21 +690,47 @@ (define %root-file-system-shepherd-service
                ;; root file system can be re-mounted read-only.
                (let loop ((n 10))
                  (unless (catch 'system-error
+                                (lambda ()
+                                  (mount #f "/" #f
+                                         (logior MS_REMOUNT MS_RDONLY)
+                                         #:update-mtab? #f)
+                                  #t)
+                                (const #f))
+                   (when (zero? n)
+                     ;; TODO: pivot-root to /run/booted-system/initrd 
first--so we don't try to kill ourselves.
+                     ;; But that's on /gnu/store--which we don't have anymore.
+                     ;; Instead, we'll just exempt outselves (see 
"our-pid")--and possibly miss things.
+                     (with-mounted-filesystem "none" "proc" "/proc" 0
+                       (with-mounted-filesystem "none" "devtmpfs" "/dev" 0
+                         (catch 'system-error
                            (lambda ()
-                             (mount #f "/" #f
-                                    (logior MS_REMOUNT MS_RDONLY)
-                                    #:update-mtab? #f)
-                             #t)
+                             (mknod "/dev/tty" 'char-special #o600 (+ (* 5 
256) 0)))
                            (const #f))
-                   (unless (zero? n)
-                     ;; Yield to the other fibers.  That gives logging fibers
-                     ;; an opportunity to close log files so the 'mount' call
-                     ;; doesn't fail with EBUSY.
-                     ((@ (fibers) sleep) 1)
-                     (loop (- n 1)))))
+                         (call-with-io-file "/dev/tty"
+                          (lambda ()
+                            ;; we don't have chvt :(
+                            ;; (it would need to use %ioctl fd VT_ACTIVATE int 
on /dev/tty)
+                            ;(chvt 12)
+                            (clean-up)))))
+                     ;; Should have been unmounted already--but we are paranoid
+                     ;; (and probably were blocking ourselves anyway).
+                     (catch 'system-error
+                                (lambda ()
+                                  (mount #f "/" #f
+                                         (logior MS_REMOUNT MS_RDONLY)
+                                         #:update-mtab? #f)
+                                  #t)
+                                (const #f))
+                   ((@ (fibers) sleep) 10))
+                 (unless (zero? n)
+                   ;; Yield to the other fibers.  That gives logging fibers
+                   ;; an opportunity to close log files so the 'mount' call
+                   ;; doesn't fail with EBUSY.
+                   ((@ (fibers) sleep) 1)
+                   (loop (- n 1)))))
 
-               #f)))
-   (respawn? #f)))
+             #f)))
+  (respawn? #f)))
 
 (define root-file-system-service-type
   (shepherd-service-type 'root-file-system

base-commit: 85b5c2c8f66aed05730f6c7bdeabfaadf619bb8f
prerequisite-patch-id: 1a4781dff5873451484bba21bc0dc4617075cb55
prerequisite-patch-id: bbe7274727aa8e1bf4beee1acafbd0a3fdc9257a
-- 
2.49.0






reply via email to

[Prev in Thread] Current Thread [Next in Thread]