forked from lix-project/lix
Propagate remote timeouts properly
This commit is contained in:
parent
c6e85ee474
commit
42e9ad8fd1
2 changed files with 38 additions and 31 deletions
|
@ -258,13 +258,8 @@ writeInt($maxSilentTime, $to);
|
|||
writeInt($buildTimeout, $to);
|
||||
my $res = readInt($from);
|
||||
if ($res != 0) {
|
||||
# Note that if we get exit code 100 from `nix-store -r', it
|
||||
# denotes a permanent build failure (as opposed to an SSH problem
|
||||
# or a temporary Nix problem). We propagate this to the caller to
|
||||
# allow it to distinguish between transient and permanent
|
||||
# failures.
|
||||
my $msg = readString($from);
|
||||
print STDERR "error: $msg (on `$hostName')\n";
|
||||
print STDERR "error: $msg on `$hostName'\n";
|
||||
exit $res;
|
||||
}
|
||||
|
||||
|
|
|
@ -238,6 +238,9 @@ public:
|
|||
failure). */
|
||||
bool permanentFailure;
|
||||
|
||||
/* Set if at least one derivation had a timeout. */
|
||||
bool timedOut;
|
||||
|
||||
LocalStore & store;
|
||||
|
||||
std::shared_ptr<HookInstance> hook;
|
||||
|
@ -1440,33 +1443,39 @@ void DerivationGoal::buildDone()
|
|||
outputLocks.unlock();
|
||||
buildUser.release();
|
||||
|
||||
/* When using a build hook, the hook will return a remote
|
||||
build failure using exit code 100. Anything else is a hook
|
||||
problem. */
|
||||
bool hookError = hook &&
|
||||
(!WIFEXITED(status) || WEXITSTATUS(status) != 100);
|
||||
|
||||
if (settings.printBuildTrace) {
|
||||
if (hook && hookError)
|
||||
printMsg(lvlError, format("@ hook-failed %1% - %2% %3%")
|
||||
% drvPath % status % e.msg());
|
||||
else
|
||||
printMsg(lvlError, format("@ build-failed %1% - %2% %3%")
|
||||
% drvPath % 1 % e.msg());
|
||||
if (hook && WIFEXITED(status) && WEXITSTATUS(status) == 101) {
|
||||
if (settings.printBuildTrace)
|
||||
printMsg(lvlError, format("@ build-failed %1% - timeout") % drvPath);
|
||||
worker.timedOut = true;
|
||||
}
|
||||
|
||||
/* Register the outputs of this build as "failed" so we won't
|
||||
try to build them again (negative caching). However, don't
|
||||
do this for fixed-output derivations, since they're likely
|
||||
to fail for transient reasons (e.g., fetchurl not being
|
||||
able to access the network). Hook errors (like
|
||||
communication problems with the remote machine) shouldn't
|
||||
be cached either. */
|
||||
if (settings.cacheFailure && !hookError && !fixedOutput)
|
||||
foreach (DerivationOutputs::iterator, i, drv.outputs)
|
||||
worker.store.registerFailedPath(i->second.path);
|
||||
else if (hook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100)) {
|
||||
if (settings.printBuildTrace)
|
||||
printMsg(lvlError, format("@ hook-failed %1% - %2% %3%")
|
||||
% drvPath % status % e.msg());
|
||||
}
|
||||
|
||||
else {
|
||||
if (settings.printBuildTrace)
|
||||
printMsg(lvlError, format("@ build-failed %1% - %2% %3%")
|
||||
% drvPath % 1 % e.msg());
|
||||
worker.permanentFailure = !fixedOutput && !diskFull;
|
||||
|
||||
/* Register the outputs of this build as "failed" so we
|
||||
won't try to build them again (negative caching).
|
||||
However, don't do this for fixed-output derivations,
|
||||
since they're likely to fail for transient reasons
|
||||
(e.g., fetchurl not being able to access the network).
|
||||
Hook errors (like communication problems with the
|
||||
remote machine) shouldn't be cached either. */
|
||||
if (/* settings.cacheFailure && */ !fixedOutput && !diskFull)
|
||||
{
|
||||
printMsg(lvlError, "REG");
|
||||
foreach (DerivationOutputs::iterator, i, drv.outputs)
|
||||
worker.store.registerFailedPath(i->second.path);
|
||||
}
|
||||
}
|
||||
|
||||
worker.permanentFailure = !hookError && !fixedOutput && !diskFull;
|
||||
amDone(ecFailed);
|
||||
return;
|
||||
}
|
||||
|
@ -2909,6 +2918,7 @@ Worker::Worker(LocalStore & store)
|
|||
nrLocalBuilds = 0;
|
||||
lastWokenUp = 0;
|
||||
permanentFailure = false;
|
||||
timedOut = false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -3220,6 +3230,7 @@ void Worker::waitForInput()
|
|||
format("%1% timed out after %2% seconds of silence")
|
||||
% goal->getName() % settings.maxSilentTime);
|
||||
goal->cancel(true);
|
||||
timedOut = true;
|
||||
}
|
||||
|
||||
else if (goal->getExitCode() == Goal::ecBusy &&
|
||||
|
@ -3231,6 +3242,7 @@ void Worker::waitForInput()
|
|||
format("%1% timed out after %2% seconds")
|
||||
% goal->getName() % settings.buildTimeout);
|
||||
goal->cancel(true);
|
||||
timedOut = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3247,7 +3259,7 @@ void Worker::waitForInput()
|
|||
|
||||
unsigned int Worker::exitStatus()
|
||||
{
|
||||
return permanentFailure ? 100 : 1;
|
||||
return timedOut ? 101 : (permanentFailure ? 100 : 1);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue