diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc index f89aee72..a230d25a 100644 --- a/src/hydra-queue-runner/builder.cc +++ b/src/hydra-queue-runner/builder.cc @@ -2,6 +2,7 @@ #include "state.hh" #include "build-result.hh" +#include "finally.hh" using namespace nix; @@ -100,6 +101,14 @@ State::StepResult State::doBuildStep(nix::ref destStore, Step::ptr step, RemoteResult result; BuildOutput res; int stepNr = 0; + bool stepFinished = false; + + Finally clearStep([&]() { + if (stepNr && !stepFinished) { + auto orphanedSteps_(orphanedSteps.lock()); + orphanedSteps_->emplace(build->id, stepNr); + } + }); time_t stepStartTime = result.startTime = time(0); @@ -170,10 +179,13 @@ State::StepResult State::doBuildStep(nix::ref destStore, Step::ptr step, } if (retry) { auto mc = startDbUpdate(); + { pqxx::work txn(*conn); finishBuildStep(txn, result.startTime, result.stopTime, result.overhead, build->id, stepNr, machine->sshName, result.stepStatus, result.errorMsg); txn.commit(); + } + stepFinished = true; if (quit) exit(1); return sRetry; } @@ -234,6 +246,8 @@ State::StepResult State::doBuildStep(nix::ref destStore, Step::ptr step, txn.commit(); } + stepFinished = true; + if (direct.empty()) break; /* Remove the direct dependencies from ‘builds’. This will @@ -357,6 +371,8 @@ State::StepResult State::doBuildStep(nix::ref destStore, Step::ptr step, txn.commit(); } + stepFinished = true; + /* Remove the indirect dependencies from ‘builds’. This will cause them to be destroyed. */ for (auto & b : indirect) { diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index a87c10eb..a79b0942 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -825,6 +825,39 @@ void State::run(BuildID buildOne) /* Idem for notification sending. */ std::thread(&State::notificationSender, this).detach(); + /* Periodically clean up orphaned busy steps in the database. */ + std::thread([&]() { + while (true) { + sleep(180); + + std::set> steps; + { + auto orphanedSteps_(orphanedSteps.lock()); + if (orphanedSteps_->empty()) continue; + steps = *orphanedSteps_; + orphanedSteps_->clear(); + } + + try { + auto conn(dbPool.get()); + pqxx::work txn(*conn); + for (auto & step : steps) { + printMsg(lvlError, format("cleaning orphaned step %d of build %d") % step.second % step.first); + txn.parameterized + ("update BuildSteps set busy = 0, status = $1 where build = $2 and stepnr = $3 and busy = 1") + ((int) bsAborted) + (step.first) + (step.second).exec(); + } + txn.commit(); + } catch (std::exception & e) { + printMsg(lvlError, format("cleanup thread: %1%") % e.what()); + auto orphanedSteps_(orphanedSteps.lock()); + orphanedSteps_->insert(steps.begin(), steps.end()); + } + } + }).detach(); + /* Monitor the database for status dump requests (e.g. from ‘hydra-queue-runner --status’). */ while (true) { diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 061d5657..3f46f5bd 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -362,6 +362,11 @@ private: time_t lastStatusLogged = 0; const int statusLogInterval = 300; + /* Steps that were busy while we encounted a PostgreSQL + error. These need to be cleared at a later time to prevent them + from showing up as busy until the queue runner is restarted. */ + nix::Sync>> orphanedSteps; + public: State();