Periodically clear orphaned build steps

These are build steps that remain "busy" in the database even though
they have finished, because they couldn't be updated (e.g. due to a
PostgreSQL connection problem). To prevent them from showing up as
busy in the "Machine status" page, we now periodically purge them.
This commit is contained in:
Eelco Dolstra 2016-04-13 16:18:35 +02:00
parent f3f661bac1
commit 077ed3f571
3 changed files with 54 additions and 0 deletions

View file

@ -2,6 +2,7 @@
#include "state.hh"
#include "build-result.hh"
#include "finally.hh"
using namespace nix;
@ -100,6 +101,14 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
RemoteResult result;
BuildOutput res;
int stepNr = 0;
bool stepFinished = false;
Finally clearStep([&]() {
if (stepNr && !stepFinished) {
auto orphanedSteps_(orphanedSteps.lock());
orphanedSteps_->emplace(build->id, stepNr);
}
});
time_t stepStartTime = result.startTime = time(0);
@ -170,10 +179,13 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
}
if (retry) {
auto mc = startDbUpdate();
{
pqxx::work txn(*conn);
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead, build->id,
stepNr, machine->sshName, result.stepStatus, result.errorMsg);
txn.commit();
}
stepFinished = true;
if (quit) exit(1);
return sRetry;
}
@ -234,6 +246,8 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
txn.commit();
}
stepFinished = true;
if (direct.empty()) break;
/* Remove the direct dependencies from builds. This will
@ -357,6 +371,8 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
txn.commit();
}
stepFinished = true;
/* Remove the indirect dependencies from builds. This
will cause them to be destroyed. */
for (auto & b : indirect) {

View file

@ -825,6 +825,39 @@ void State::run(BuildID buildOne)
/* Idem for notification sending. */
std::thread(&State::notificationSender, this).detach();
/* Periodically clean up orphaned busy steps in the database. */
std::thread([&]() {
while (true) {
sleep(180);
std::set<std::pair<BuildID, int>> steps;
{
auto orphanedSteps_(orphanedSteps.lock());
if (orphanedSteps_->empty()) continue;
steps = *orphanedSteps_;
orphanedSteps_->clear();
}
try {
auto conn(dbPool.get());
pqxx::work txn(*conn);
for (auto & step : steps) {
printMsg(lvlError, format("cleaning orphaned step %d of build %d") % step.second % step.first);
txn.parameterized
("update BuildSteps set busy = 0, status = $1 where build = $2 and stepnr = $3 and busy = 1")
((int) bsAborted)
(step.first)
(step.second).exec();
}
txn.commit();
} catch (std::exception & e) {
printMsg(lvlError, format("cleanup thread: %1%") % e.what());
auto orphanedSteps_(orphanedSteps.lock());
orphanedSteps_->insert(steps.begin(), steps.end());
}
}
}).detach();
/* Monitor the database for status dump requests (e.g. from
hydra-queue-runner --status). */
while (true) {

View file

@ -362,6 +362,11 @@ private:
time_t lastStatusLogged = 0;
const int statusLogInterval = 300;
/* Steps that were busy while we encounted a PostgreSQL
error. These need to be cleared at a later time to prevent them
from showing up as busy until the queue runner is restarted. */
nix::Sync<std::set<std::pair<BuildID, int>>> orphanedSteps;
public:
State();