* When using a build hook, distinguish between transient failures

(e.g. an SSH connection problem) and permanent failures (i.e. the
  builder failed).  This matters to Hydra (it wants to know whether it
  makes sense to retry a build).
This commit is contained in:
Eelco Dolstra 2009-01-13 11:39:09 +00:00
parent 4ce692df88
commit 019176137f
2 changed files with 30 additions and 6 deletions

View file

@ -192,8 +192,16 @@ my $buildFlags = "--max-silent-time $maxSilentTime";
# connection dies. Without it, the remote process might continue to
# run indefinitely (that is, until it next tries to write to
# stdout/stderr).
system("ssh -tt $sshOpts $hostName 'nix-store -rvvK $buildFlags $drvPath'") == 0
or die "remote build on $hostName failed: $?";
if (system("ssh -tt $sshOpts $hostName 'nix-store -rvvK $buildFlags $drvPath'") != 0) {
# If we couldn't run ssh or there was an ssh problem (indicated by
# exit code 255), then we return exit code 1; otherwise we assume
# that the builder failed, which we indicated to Nix using exit
# code 100. It's important to distinguish between the two because
# the first is a transient failure and the latter is permanent.
my $res = $? == -1 || ($? >> 8) == 255 ? 1 : 100;
print STDERR "remote build on $hostName failed: $?";
exit $res;
}
print "REMOTE BUILD DONE: $drvPath on $hostName\n";

View file

@ -648,6 +648,9 @@ private:
/* Pipe for the builder's standard output/error. */
Pipe logPipe;
/* Whether we're building using a build hook. */
bool usingBuildHook;
/* Pipes for talking to the build hook (if any). */
Pipe toHook;
Pipe fromHook;
@ -970,6 +973,7 @@ void DerivationGoal::tryToBuild()
try {
/* Is the build hook willing to accept this job? */
usingBuildHook = true;
switch (tryBuildHook()) {
case rpAccept:
/* Yes, it has started doing so. Wait until we get
@ -1003,6 +1007,7 @@ void DerivationGoal::tryToBuild()
/* Acquire locks and such. If we then see that the build has
been done by somebody else, we're done. */
usingBuildHook = false;
PrepareBuildReply preply = prepareBuild();
if (preply == prDone) {
amDone(ecSuccess);
@ -1019,8 +1024,12 @@ void DerivationGoal::tryToBuild()
} catch (BuildError & e) {
printMsg(lvlError, e.msg());
if (printBuildTrace) {
printMsg(lvlError, format("@ build-failed %1% %2% %3% %4%")
% drvPath % drv.outputs["out"].path % 0 % e.msg());
if (usingBuildHook)
printMsg(lvlError, format("@ hook-failed %1% %2% %3% %4%")
% drvPath % drv.outputs["out"].path % 0 % e.msg());
else
printMsg(lvlError, format("@ build-failed %1% %2% %3% %4%")
% drvPath % drv.outputs["out"].path % 0 % e.msg());
}
amDone(ecFailed);
return;
@ -1122,8 +1131,15 @@ void DerivationGoal::buildDone()
} catch (BuildError & e) {
printMsg(lvlError, e.msg());
if (printBuildTrace) {
printMsg(lvlError, format("@ build-failed %1% %2% %3% %4%")
% drvPath % drv.outputs["out"].path % status % e.msg());
/* When using a build hook, the hook will return a
remote build failure using exit code 100. Anything
else is a hook problem. */
if (usingBuildHook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100))
printMsg(lvlError, format("@ hook-failed %1% %2% %3% %4%")
% drvPath % drv.outputs["out"].path % status % e.msg());
else
printMsg(lvlError, format("@ build-failed %1% %2% %3% %4%")
% drvPath % drv.outputs["out"].path % status % e.msg());
}
amDone(ecFailed);
return;