forked from lix-project/hydra
Periodically clear orphaned build steps
These are build steps that remain "busy" in the database even though they have finished, because they couldn't be updated (e.g. due to a PostgreSQL connection problem). To prevent them from showing up as busy in the "Machine status" page, we now periodically purge them.
This commit is contained in:
parent
f3f661bac1
commit
077ed3f571
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "state.hh"
|
||||
#include "build-result.hh"
|
||||
#include "finally.hh"
|
||||
|
||||
using namespace nix;
|
||||
|
||||
|
@ -100,6 +101,14 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
|||
RemoteResult result;
|
||||
BuildOutput res;
|
||||
int stepNr = 0;
|
||||
bool stepFinished = false;
|
||||
|
||||
Finally clearStep([&]() {
|
||||
if (stepNr && !stepFinished) {
|
||||
auto orphanedSteps_(orphanedSteps.lock());
|
||||
orphanedSteps_->emplace(build->id, stepNr);
|
||||
}
|
||||
});
|
||||
|
||||
time_t stepStartTime = result.startTime = time(0);
|
||||
|
||||
|
@ -170,10 +179,13 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
|||
}
|
||||
if (retry) {
|
||||
auto mc = startDbUpdate();
|
||||
{
|
||||
pqxx::work txn(*conn);
|
||||
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead, build->id,
|
||||
stepNr, machine->sshName, result.stepStatus, result.errorMsg);
|
||||
txn.commit();
|
||||
}
|
||||
stepFinished = true;
|
||||
if (quit) exit(1);
|
||||
return sRetry;
|
||||
}
|
||||
|
@ -234,6 +246,8 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
|||
txn.commit();
|
||||
}
|
||||
|
||||
stepFinished = true;
|
||||
|
||||
if (direct.empty()) break;
|
||||
|
||||
/* Remove the direct dependencies from ‘builds’. This will
|
||||
|
@ -357,6 +371,8 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,
|
|||
txn.commit();
|
||||
}
|
||||
|
||||
stepFinished = true;
|
||||
|
||||
/* Remove the indirect dependencies from ‘builds’. This
|
||||
will cause them to be destroyed. */
|
||||
for (auto & b : indirect) {
|
||||
|
|
|
@ -825,6 +825,39 @@ void State::run(BuildID buildOne)
|
|||
/* Idem for notification sending. */
|
||||
std::thread(&State::notificationSender, this).detach();
|
||||
|
||||
/* Periodically clean up orphaned busy steps in the database. */
|
||||
std::thread([&]() {
|
||||
while (true) {
|
||||
sleep(180);
|
||||
|
||||
std::set<std::pair<BuildID, int>> steps;
|
||||
{
|
||||
auto orphanedSteps_(orphanedSteps.lock());
|
||||
if (orphanedSteps_->empty()) continue;
|
||||
steps = *orphanedSteps_;
|
||||
orphanedSteps_->clear();
|
||||
}
|
||||
|
||||
try {
|
||||
auto conn(dbPool.get());
|
||||
pqxx::work txn(*conn);
|
||||
for (auto & step : steps) {
|
||||
printMsg(lvlError, format("cleaning orphaned step %d of build %d") % step.second % step.first);
|
||||
txn.parameterized
|
||||
("update BuildSteps set busy = 0, status = $1 where build = $2 and stepnr = $3 and busy = 1")
|
||||
((int) bsAborted)
|
||||
(step.first)
|
||||
(step.second).exec();
|
||||
}
|
||||
txn.commit();
|
||||
} catch (std::exception & e) {
|
||||
printMsg(lvlError, format("cleanup thread: %1%") % e.what());
|
||||
auto orphanedSteps_(orphanedSteps.lock());
|
||||
orphanedSteps_->insert(steps.begin(), steps.end());
|
||||
}
|
||||
}
|
||||
}).detach();
|
||||
|
||||
/* Monitor the database for status dump requests (e.g. from
|
||||
‘hydra-queue-runner --status’). */
|
||||
while (true) {
|
||||
|
|
|
@ -362,6 +362,11 @@ private:
|
|||
time_t lastStatusLogged = 0;
|
||||
const int statusLogInterval = 300;
|
||||
|
||||
/* Steps that were busy while we encounted a PostgreSQL
|
||||
error. These need to be cleared at a later time to prevent them
|
||||
from showing up as busy until the queue runner is restarted. */
|
||||
nix::Sync<std::set<std::pair<BuildID, int>>> orphanedSteps;
|
||||
|
||||
public:
|
||||
State();
|
||||
|
||||
|
|
Loading…
Reference in a new issue