guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

136/376: Propagate remote timeouts properly


From: Ludovic Courtès
Subject: 136/376: Propagate remote timeouts properly
Date: Wed, 28 Jan 2015 22:04:33 +0000

civodul pushed a commit to tag 1.8
in repository guix.

commit 42e9ad8fd1c8825fcf182cee787a206eb932e575
Author: Eelco Dolstra <address@hidden>
Date:   Sun Aug 17 19:09:03 2014 +0200

    Propagate remote timeouts properly
---
 scripts/build-remote.pl.in |    7 +-----
 src/libstore/build.cc      |   54 ++++++++++++++++++++++++++-----------------
 2 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/scripts/build-remote.pl.in b/scripts/build-remote.pl.in
index 89d3a43..243b92c 100755
--- a/scripts/build-remote.pl.in
+++ b/scripts/build-remote.pl.in
@@ -258,13 +258,8 @@ writeInt($maxSilentTime, $to);
 writeInt($buildTimeout, $to);
 my $res = readInt($from);
 if ($res != 0) {
-    # Note that if we get exit code 100 from `nix-store -r', it
-    # denotes a permanent build failure (as opposed to an SSH problem
-    # or a temporary Nix problem).  We propagate this to the caller to
-    # allow it to distinguish between transient and permanent
-    # failures.
     my $msg = readString($from);
-    print STDERR "error: $msg (on `$hostName')\n";
+    print STDERR "error: $msg on `$hostName'\n";
     exit $res;
 }
 
diff --git a/src/libstore/build.cc b/src/libstore/build.cc
index 5b77998..ad450a9 100644
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@@ -238,6 +238,9 @@ public:
        failure). */
     bool permanentFailure;
 
+    /* Set if at least one derivation had a timeout. */
+    bool timedOut;
+
     LocalStore & store;
 
     std::shared_ptr<HookInstance> hook;
@@ -1440,33 +1443,39 @@ void DerivationGoal::buildDone()
         outputLocks.unlock();
         buildUser.release();
 
-        /* When using a build hook, the hook will return a remote
-           build failure using exit code 100.  Anything else is a hook
-           problem. */
-        bool hookError = hook &&
-            (!WIFEXITED(status) || WEXITSTATUS(status) != 100);
+        if (hook && WIFEXITED(status) && WEXITSTATUS(status) == 101) {
+            if (settings.printBuildTrace)
+                printMsg(lvlError, format("@ build-failed %1% - timeout") % 
drvPath);
+            worker.timedOut = true;
+        }
 
-        if (settings.printBuildTrace) {
-            if (hook && hookError)
+        else if (hook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100)) {
+            if (settings.printBuildTrace)
                 printMsg(lvlError, format("@ hook-failed %1% - %2% %3%")
                     % drvPath % status % e.msg());
-            else
+        }
+
+        else {
+            if (settings.printBuildTrace)
                 printMsg(lvlError, format("@ build-failed %1% - %2% %3%")
                     % drvPath % 1 % e.msg());
+            worker.permanentFailure = !fixedOutput && !diskFull;
+
+            /* Register the outputs of this build as "failed" so we
+               won't try to build them again (negative caching).
+               However, don't do this for fixed-output derivations,
+               since they're likely to fail for transient reasons
+               (e.g., fetchurl not being able to access the network).
+               Hook errors (like communication problems with the
+               remote machine) shouldn't be cached either. */
+            if (/* settings.cacheFailure && */ !fixedOutput && !diskFull)
+            {
+                printMsg(lvlError, "REG");
+                foreach (DerivationOutputs::iterator, i, drv.outputs)
+                    worker.store.registerFailedPath(i->second.path);
+            }
         }
 
-        /* Register the outputs of this build as "failed" so we won't
-           try to build them again (negative caching).  However, don't
-           do this for fixed-output derivations, since they're likely
-           to fail for transient reasons (e.g., fetchurl not being
-           able to access the network).  Hook errors (like
-           communication problems with the remote machine) shouldn't
-           be cached either. */
-        if (settings.cacheFailure && !hookError && !fixedOutput)
-            foreach (DerivationOutputs::iterator, i, drv.outputs)
-                worker.store.registerFailedPath(i->second.path);
-
-        worker.permanentFailure = !hookError && !fixedOutput && !diskFull;
         amDone(ecFailed);
         return;
     }
@@ -2909,6 +2918,7 @@ Worker::Worker(LocalStore & store)
     nrLocalBuilds = 0;
     lastWokenUp = 0;
     permanentFailure = false;
+    timedOut = false;
 }
 
 
@@ -3220,6 +3230,7 @@ void Worker::waitForInput()
                 format("%1% timed out after %2% seconds of silence")
                 % goal->getName() % settings.maxSilentTime);
             goal->cancel(true);
+            timedOut = true;
         }
 
         else if (goal->getExitCode() == Goal::ecBusy &&
@@ -3231,6 +3242,7 @@ void Worker::waitForInput()
                 format("%1% timed out after %2% seconds")
                 % goal->getName() % settings.buildTimeout);
             goal->cancel(true);
+            timedOut = true;
         }
     }
 
@@ -3247,7 +3259,7 @@ void Worker::waitForInput()
 
 unsigned int Worker::exitStatus()
 {
-    return permanentFailure ? 100 : 1;
+    return timedOut ? 101 : (permanentFailure ? 100 : 1);
 }
 
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]