From 42e9ad8fd1c8825fcf182cee787a206eb932e575 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 17 Aug 2014 19:09:03 +0200 Subject: [PATCH] Propagate remote timeouts properly --- scripts/build-remote.pl.in | 7 +---- src/libstore/build.cc | 62 +++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/scripts/build-remote.pl.in b/scripts/build-remote.pl.in index 89d3a43cc..243b92cf8 100755 --- a/scripts/build-remote.pl.in +++ b/scripts/build-remote.pl.in @@ -258,13 +258,8 @@ writeInt($maxSilentTime, $to); writeInt($buildTimeout, $to); my $res = readInt($from); if ($res != 0) { - # Note that if we get exit code 100 from `nix-store -r', it - # denotes a permanent build failure (as opposed to an SSH problem - # or a temporary Nix problem). We propagate this to the caller to - # allow it to distinguish between transient and permanent - # failures. my $msg = readString($from); - print STDERR "error: $msg (on `$hostName')\n"; + print STDERR "error: $msg on `$hostName'\n"; exit $res; } diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 5b779984c..ad450a916 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -238,6 +238,9 @@ public: failure). */ bool permanentFailure; + /* Set if at least one derivation had a timeout. */ + bool timedOut; + LocalStore & store; std::shared_ptr hook; @@ -1440,33 +1443,39 @@ void DerivationGoal::buildDone() outputLocks.unlock(); buildUser.release(); - /* When using a build hook, the hook will return a remote - build failure using exit code 100. Anything else is a hook - problem. */ - bool hookError = hook && - (!WIFEXITED(status) || WEXITSTATUS(status) != 100); - - if (settings.printBuildTrace) { - if (hook && hookError) - printMsg(lvlError, format("@ hook-failed %1% - %2% %3%") - % drvPath % status % e.msg()); - else - printMsg(lvlError, format("@ build-failed %1% - %2% %3%") - % drvPath % 1 % e.msg()); + if (hook && WIFEXITED(status) && WEXITSTATUS(status) == 101) { + if (settings.printBuildTrace) + printMsg(lvlError, format("@ build-failed %1% - timeout") % drvPath); + worker.timedOut = true; } - /* Register the outputs of this build as "failed" so we won't - try to build them again (negative caching). However, don't - do this for fixed-output derivations, since they're likely - to fail for transient reasons (e.g., fetchurl not being - able to access the network). Hook errors (like - communication problems with the remote machine) shouldn't - be cached either. */ - if (settings.cacheFailure && !hookError && !fixedOutput) - foreach (DerivationOutputs::iterator, i, drv.outputs) - worker.store.registerFailedPath(i->second.path); + else if (hook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100)) { + if (settings.printBuildTrace) + printMsg(lvlError, format("@ hook-failed %1% - %2% %3%") + % drvPath % status % e.msg()); + } + + else { + if (settings.printBuildTrace) + printMsg(lvlError, format("@ build-failed %1% - %2% %3%") + % drvPath % 1 % e.msg()); + worker.permanentFailure = !fixedOutput && !diskFull; + + /* Register the outputs of this build as "failed" so we + won't try to build them again (negative caching). + However, don't do this for fixed-output derivations, + since they're likely to fail for transient reasons + (e.g., fetchurl not being able to access the network). + Hook errors (like communication problems with the + remote machine) shouldn't be cached either. */ + if (/* settings.cacheFailure && */ !fixedOutput && !diskFull) + { + printMsg(lvlError, "REG"); + foreach (DerivationOutputs::iterator, i, drv.outputs) + worker.store.registerFailedPath(i->second.path); + } + } - worker.permanentFailure = !hookError && !fixedOutput && !diskFull; amDone(ecFailed); return; } @@ -2909,6 +2918,7 @@ Worker::Worker(LocalStore & store) nrLocalBuilds = 0; lastWokenUp = 0; permanentFailure = false; + timedOut = false; } @@ -3220,6 +3230,7 @@ void Worker::waitForInput() format("%1% timed out after %2% seconds of silence") % goal->getName() % settings.maxSilentTime); goal->cancel(true); + timedOut = true; } else if (goal->getExitCode() == Goal::ecBusy && @@ -3231,6 +3242,7 @@ void Worker::waitForInput() format("%1% timed out after %2% seconds") % goal->getName() % settings.buildTimeout); goal->cancel(true); + timedOut = true; } } @@ -3247,7 +3259,7 @@ void Worker::waitForInput() unsigned int Worker::exitStatus() { - return permanentFailure ? 100 : 1; + return timedOut ? 101 : (permanentFailure ? 100 : 1); }