forked from lix-project/hydra
queue-runner: handle broken pg pool connections in builder code
Completes 9b62c52e5c
with another location
that was initially missed.
This commit is contained in:
parent
3ee51dbe58
commit
44b9a7b95d
|
@ -35,10 +35,18 @@ void State::builder(MachineReservation::ptr reservation)
|
||||||
activeSteps_.lock()->erase(activeStep);
|
activeSteps_.lock()->erase(activeStep);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
auto conn(dbPool.get());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
auto destStore = getDestStore();
|
auto destStore = getDestStore();
|
||||||
// Might release the reservation.
|
// Might release the reservation.
|
||||||
res = doBuildStep(destStore, reservation, activeStep);
|
res = doBuildStep(destStore, reservation, *conn, activeStep);
|
||||||
|
} catch (pqxx::broken_connection & e) {
|
||||||
|
printMsg(lvlError, "db lost while building ‘%s’ on ‘%s’: %s (retriable)",
|
||||||
|
localStore->printStorePath(activeStep->step->drvPath),
|
||||||
|
reservation ? reservation->machine->sshName : std::string("(no machine)"),
|
||||||
|
e.what());
|
||||||
|
conn.markBad();
|
||||||
} catch (std::exception & e) {
|
} catch (std::exception & e) {
|
||||||
printMsg(lvlError, "uncaught exception building ‘%s’ on ‘%s’: %s",
|
printMsg(lvlError, "uncaught exception building ‘%s’ on ‘%s’: %s",
|
||||||
localStore->printStorePath(activeStep->step->drvPath),
|
localStore->printStorePath(activeStep->step->drvPath),
|
||||||
|
@ -76,6 +84,7 @@ void State::builder(MachineReservation::ptr reservation)
|
||||||
|
|
||||||
State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
MachineReservation::ptr & reservation,
|
MachineReservation::ptr & reservation,
|
||||||
|
Connection & conn,
|
||||||
std::shared_ptr<ActiveStep> activeStep)
|
std::shared_ptr<ActiveStep> activeStep)
|
||||||
{
|
{
|
||||||
auto step(reservation->step);
|
auto step(reservation->step);
|
||||||
|
@ -106,8 +115,6 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
buildOptions.maxLogSize = maxLogSize;
|
buildOptions.maxLogSize = maxLogSize;
|
||||||
buildOptions.enforceDeterminism = step->isDeterministic;
|
buildOptions.enforceDeterminism = step->isDeterministic;
|
||||||
|
|
||||||
auto conn(dbPool.get());
|
|
||||||
|
|
||||||
{
|
{
|
||||||
std::set<Build::ptr> dependents;
|
std::set<Build::ptr> dependents;
|
||||||
std::set<Step::ptr> steps;
|
std::set<Step::ptr> steps;
|
||||||
|
@ -132,7 +139,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
for (auto build2 : dependents) {
|
for (auto build2 : dependents) {
|
||||||
if (build2->drvPath == step->drvPath) {
|
if (build2->drvPath == step->drvPath) {
|
||||||
build = build2;
|
build = build2;
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(conn);
|
||||||
notifyBuildStarted(txn, build->id);
|
notifyBuildStarted(txn, build->id);
|
||||||
txn.commit();
|
txn.commit();
|
||||||
}
|
}
|
||||||
|
@ -187,7 +194,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
|
|
||||||
/* If any of the outputs have previously failed, then don't bother
|
/* If any of the outputs have previously failed, then don't bother
|
||||||
building again. */
|
building again. */
|
||||||
if (checkCachedFailure(step, *conn))
|
if (checkCachedFailure(step, conn))
|
||||||
result.stepStatus = bsCachedFailure;
|
result.stepStatus = bsCachedFailure;
|
||||||
else {
|
else {
|
||||||
|
|
||||||
|
@ -195,13 +202,13 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
building. */
|
building. */
|
||||||
{
|
{
|
||||||
auto mc = startDbUpdate();
|
auto mc = startDbUpdate();
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(conn);
|
||||||
stepNr = createBuildStep(txn, result.startTime, buildId, step, machine->sshName, bsBusy);
|
stepNr = createBuildStep(txn, result.startTime, buildId, step, machine->sshName, bsBusy);
|
||||||
txn.commit();
|
txn.commit();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto updateStep = [&](StepState stepState) {
|
auto updateStep = [&](StepState stepState) {
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(conn);
|
||||||
updateBuildStep(txn, buildId, stepNr, stepState);
|
updateBuildStep(txn, buildId, stepNr, stepState);
|
||||||
txn.commit();
|
txn.commit();
|
||||||
};
|
};
|
||||||
|
@ -252,7 +259,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
|
|
||||||
/* Finish the step in the database. */
|
/* Finish the step in the database. */
|
||||||
if (stepNr) {
|
if (stepNr) {
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(conn);
|
||||||
finishBuildStep(txn, result, buildId, stepNr, machine->sshName);
|
finishBuildStep(txn, result, buildId, stepNr, machine->sshName);
|
||||||
txn.commit();
|
txn.commit();
|
||||||
}
|
}
|
||||||
|
@ -328,7 +335,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
{
|
{
|
||||||
auto mc = startDbUpdate();
|
auto mc = startDbUpdate();
|
||||||
|
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(conn);
|
||||||
|
|
||||||
for (auto & b : direct) {
|
for (auto & b : direct) {
|
||||||
printInfo("marking build %1% as succeeded", b->id);
|
printInfo("marking build %1% as succeeded", b->id);
|
||||||
|
@ -356,7 +363,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
/* Send notification about the builds that have this step as
|
/* Send notification about the builds that have this step as
|
||||||
the top-level. */
|
the top-level. */
|
||||||
{
|
{
|
||||||
pqxx::work txn(*conn);
|
pqxx::work txn(conn);
|
||||||
for (auto id : buildIDs)
|
for (auto id : buildIDs)
|
||||||
notifyBuildFinished(txn, id, {});
|
notifyBuildFinished(txn, id, {});
|
||||||
txn.commit();
|
txn.commit();
|
||||||
|
@ -385,7 +392,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
}
|
}
|
||||||
|
|
||||||
} else
|
} else
|
||||||
failStep(*conn, step, buildId, result, machine, stepFinished);
|
failStep(conn, step, buildId, result, machine, stepFinished);
|
||||||
|
|
||||||
// FIXME: keep stats about aborted steps?
|
// FIXME: keep stats about aborted steps?
|
||||||
nrStepsDone++;
|
nrStepsDone++;
|
||||||
|
|
|
@ -594,6 +594,7 @@ private:
|
||||||
enum StepResult { sDone, sRetry, sMaybeCancelled };
|
enum StepResult { sDone, sRetry, sMaybeCancelled };
|
||||||
StepResult doBuildStep(nix::ref<nix::Store> destStore,
|
StepResult doBuildStep(nix::ref<nix::Store> destStore,
|
||||||
MachineReservation::ptr & reservation,
|
MachineReservation::ptr & reservation,
|
||||||
|
Connection & conn,
|
||||||
std::shared_ptr<ActiveStep> activeStep);
|
std::shared_ptr<ActiveStep> activeStep);
|
||||||
|
|
||||||
void buildRemote(nix::ref<nix::Store> destStore,
|
void buildRemote(nix::ref<nix::Store> destStore,
|
||||||
|
|
Loading…
Reference in a new issue