forked from lix-project/hydra
Periodically clear orphaned build steps
These are build steps that remain "busy" in the database even though they have finished, because they couldn't be updated (e.g. due to a PostgreSQL connection problem). To prevent them from showing up as busy in the "Machine status" page, we now periodically purge them.
This commit is contained in:
parent
f3f661bac1
commit
077ed3f571
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
#include "state.hh"
|
#include "state.hh"
|
||||||
#include "build-result.hh"
|
#include "build-result.hh"
|
||||||
|
#include "finally.hh"
|
||||||
|
|
||||||
using namespace nix;
|
using namespace nix;
|
||||||
|
|
||||||
|
@ -100,6 +101,14 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
RemoteResult result;
|
RemoteResult result;
|
||||||
BuildOutput res;
|
BuildOutput res;
|
||||||
int stepNr = 0;
|
int stepNr = 0;
|
||||||
|
bool stepFinished = false;
|
||||||
|
|
||||||
|
Finally clearStep([&]() {
|
||||||
|
if (stepNr && !stepFinished) {
|
||||||
|
auto orphanedSteps_(orphanedSteps.lock());
|
||||||
|
orphanedSteps_->emplace(build->id, stepNr);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
time_t stepStartTime = result.startTime = time(0);
|
time_t stepStartTime = result.startTime = time(0);
|
||||||
|
|
||||||
|
@ -170,10 +179,13 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
}
|
}
|
||||||
if (retry) {
|
if (retry) {
|
||||||
auto mc = startDbUpdate();
|
auto mc = startDbUpdate();
|
||||||
|
{
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(*conn);
|
||||||
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead, build->id,
|
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead, build->id,
|
||||||
stepNr, machine->sshName, result.stepStatus, result.errorMsg);
|
stepNr, machine->sshName, result.stepStatus, result.errorMsg);
|
||||||
txn.commit();
|
txn.commit();
|
||||||
|
}
|
||||||
|
stepFinished = true;
|
||||||
if (quit) exit(1);
|
if (quit) exit(1);
|
||||||
return sRetry;
|
return sRetry;
|
||||||
}
|
}
|
||||||
|
@ -234,6 +246,8 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
txn.commit();
|
txn.commit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stepFinished = true;
|
||||||
|
|
||||||
if (direct.empty()) break;
|
if (direct.empty()) break;
|
||||||
|
|
||||||
/* Remove the direct dependencies from ‘builds’. This will
|
/* Remove the direct dependencies from ‘builds’. This will
|
||||||
|
@ -357,6 +371,8 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
||||||
txn.commit();
|
txn.commit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stepFinished = true;
|
||||||
|
|
||||||
/* Remove the indirect dependencies from ‘builds’. This
|
/* Remove the indirect dependencies from ‘builds’. This
|
||||||
will cause them to be destroyed. */
|
will cause them to be destroyed. */
|
||||||
for (auto & b : indirect) {
|
for (auto & b : indirect) {
|
||||||
|
|
|
@ -825,6 +825,39 @@ void State::run(BuildID buildOne)
|
||||||
/* Idem for notification sending. */
|
/* Idem for notification sending. */
|
||||||
std::thread(&State::notificationSender, this).detach();
|
std::thread(&State::notificationSender, this).detach();
|
||||||
|
|
||||||
|
/* Periodically clean up orphaned busy steps in the database. */
|
||||||
|
std::thread([&]() {
|
||||||
|
while (true) {
|
||||||
|
sleep(180);
|
||||||
|
|
||||||
|
std::set<std::pair<BuildID, int>> steps;
|
||||||
|
{
|
||||||
|
auto orphanedSteps_(orphanedSteps.lock());
|
||||||
|
if (orphanedSteps_->empty()) continue;
|
||||||
|
steps = *orphanedSteps_;
|
||||||
|
orphanedSteps_->clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
auto conn(dbPool.get());
|
||||||
|
pqxx::work txn(*conn);
|
||||||
|
for (auto & step : steps) {
|
||||||
|
printMsg(lvlError, format("cleaning orphaned step %d of build %d") % step.second % step.first);
|
||||||
|
txn.parameterized
|
||||||
|
("update BuildSteps set busy = 0, status = $1 where build = $2 and stepnr = $3 and busy = 1")
|
||||||
|
((int) bsAborted)
|
||||||
|
(step.first)
|
||||||
|
(step.second).exec();
|
||||||
|
}
|
||||||
|
txn.commit();
|
||||||
|
} catch (std::exception & e) {
|
||||||
|
printMsg(lvlError, format("cleanup thread: %1%") % e.what());
|
||||||
|
auto orphanedSteps_(orphanedSteps.lock());
|
||||||
|
orphanedSteps_->insert(steps.begin(), steps.end());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}).detach();
|
||||||
|
|
||||||
/* Monitor the database for status dump requests (e.g. from
|
/* Monitor the database for status dump requests (e.g. from
|
||||||
‘hydra-queue-runner --status’). */
|
‘hydra-queue-runner --status’). */
|
||||||
while (true) {
|
while (true) {
|
||||||
|
|
|
@ -362,6 +362,11 @@ private:
|
||||||
time_t lastStatusLogged = 0;
|
time_t lastStatusLogged = 0;
|
||||||
const int statusLogInterval = 300;
|
const int statusLogInterval = 300;
|
||||||
|
|
||||||
|
/* Steps that were busy while we encounted a PostgreSQL
|
||||||
|
error. These need to be cleared at a later time to prevent them
|
||||||
|
from showing up as busy until the queue runner is restarted. */
|
||||||
|
nix::Sync<std::set<std::pair<BuildID, int>>> orphanedSteps;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
State();
|
State();
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue