From 0b00d51bafd2422bfae72c2693de344fbca5c7e2 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 26 Oct 2016 14:42:28 +0200 Subject: [PATCH] Prevent orphaned build steps If two active steps of the same build failed, then the first would be marked as "failed", but the second would end up as "orphaned", causing it to be marked as "aborted" later on. Now it's correctly marked as "failed". --- src/hydra-queue-runner/builder.cc | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc index 3a0b9f46..99e5e6fd 100644 --- a/src/hydra-queue-runner/builder.cc +++ b/src/hydra-queue-runner/builder.cc @@ -108,6 +108,7 @@ State::StepResult State::doBuildStep(nix::ref destStore, Step::ptr step, Finally clearStep([&]() { if (stepNr && !stepFinished) { + printError("marking step %d of build %d as orphaned", stepNr, build->id); auto orphanedSteps_(orphanedSteps.lock()); orphanedSteps_->emplace(build->id, stepNr); } @@ -176,6 +177,7 @@ State::StepResult State::doBuildStep(nix::ref destStore, Step::ptr step, if (result.canRetry) { printMsg(lvlError, format("possibly transient failure building ‘%1%’ on ‘%2%’: %3%") % step->drvPath % machine->sshName % result.errorMsg); + assert(stepNr); bool retry; { auto step_(step->state.lock()); @@ -197,6 +199,8 @@ State::StepResult State::doBuildStep(nix::ref destStore, Step::ptr step, if (result.stepStatus == bsSuccess) { + assert(stepNr); + /* Register success in the database for all Build objects that have this step as the top-level step. Since the queue monitor thread may be creating new referring Builds @@ -293,13 +297,17 @@ State::StepResult State::doBuildStep(nix::ref destStore, Step::ptr step, } else { + /* For standard failures, we don't care about the error + message. */ + if (result.stepStatus != bsAborted) + result.errorMsg = ""; + /* Register failure in the database for all Build objects that directly or indirectly depend on this step. */ std::vector dependentIDs; while (true) { - /* Get the builds and steps that depend on this step. */ std::set indirect; { @@ -315,21 +323,17 @@ State::StepResult State::doBuildStep(nix::ref destStore, Step::ptr step, printMsg(lvlDebug, format("finishing build step ‘%1%’") % s->drvPath); steps_->erase(s->drvPath); } - break; } } + if (indirect.empty() && stepFinished) break; + /* Update the database. */ { auto mc = startDbUpdate(); pqxx::work txn(*conn); - /* For standard failures, we don't care about the error - message. */ - if (result.stepStatus != bsAborted) - result.errorMsg = ""; - /* Create failed build steps for every build that depends on this, except when this step is cached and is the top-level of that build (since then it's @@ -343,9 +347,11 @@ State::StepResult State::doBuildStep(nix::ref destStore, Step::ptr step, result.stepStatus, result.errorMsg, build == build2 ? 0 : build->id); } - if (result.stepStatus != bsCachedFailure) + if (result.stepStatus != bsCachedFailure && !stepFinished) { + assert(stepNr); finishBuildStep(txn, result.startTime, result.stopTime, result.overhead, build->id, stepNr, machine->sshName, result.stepStatus, result.errorMsg); + } /* Mark all builds that depend on this derivation as failed. */ for (auto & build2 : indirect) {