Queue monitor: Bail out earlier if a step has failed previously
Currently, the hydra.nixos.org queue contains 1000s of Darwin builds that all depend on a stdenv-darwin that previously failed. However, before, first createStep() would construct a dependency graph for each build, then getQueuedBuilds() would discover that one of the steps had failed previously and discard all those steps. Since the graph construction involves a lot of uncached calls to isValidPath(), this took several seconds per build. Now createStep() detects the previous failure right away and bails out.
This commit is contained in:
parent
2f0f7406d4
commit
177bf25d64
|
@ -63,6 +63,12 @@ void State::queueMonitorLoop()
|
|||
}
|
||||
|
||||
|
||||
struct PreviousFailure : public std::exception {
|
||||
Step::ptr step;
|
||||
PreviousFailure(Step::ptr step) : step(step) { }
|
||||
};
|
||||
|
||||
|
||||
bool State::getQueuedBuilds(Connection & conn, ref<Store> localStore,
|
||||
ref<Store> destStore, unsigned int & lastBuildId)
|
||||
{
|
||||
|
@ -139,8 +145,55 @@ bool State::getQueuedBuilds(Connection & conn, ref<Store> localStore,
|
|||
}
|
||||
|
||||
std::set<Step::ptr> newSteps;
|
||||
Step::ptr step = createStep(destStore, conn, build, build->drvPath,
|
||||
Step::ptr step;
|
||||
|
||||
/* Create steps for this derivation and its dependencies. */
|
||||
try {
|
||||
step = createStep(destStore, conn, build, build->drvPath,
|
||||
build, 0, finishedDrvs, newSteps, newRunnable);
|
||||
} catch (PreviousFailure & ex) {
|
||||
|
||||
/* Some step previously failed, so mark the build as
|
||||
failed right away. */
|
||||
printMsg(lvlError, format("marking build %d as cached failure due to ‘%s’") % build->id % ex.step->drvPath);
|
||||
if (!build->finishedInDB) {
|
||||
auto mc = startDbUpdate();
|
||||
pqxx::work txn(conn);
|
||||
|
||||
/* Find the previous build step record, first by
|
||||
derivation path, then by output path. */
|
||||
BuildID propagatedFrom = 0;
|
||||
|
||||
auto res = txn.parameterized
|
||||
("select max(build) from BuildSteps where drvPath = $1 and startTime != 0 and stopTime != 0 and status = 1")
|
||||
(ex.step->drvPath).exec();
|
||||
if (!res[0][0].is_null()) propagatedFrom = res[0][0].as<BuildID>();
|
||||
|
||||
if (!propagatedFrom) {
|
||||
for (auto & output : ex.step->drv.outputs) {
|
||||
auto res = txn.parameterized
|
||||
("select max(s.build) from BuildSteps s join BuildStepOutputs o on s.build = o.build where path = $1 and startTime != 0 and stopTime != 0 and status = 1")
|
||||
(output.second.path).exec();
|
||||
if (!res[0][0].is_null()) {
|
||||
propagatedFrom = res[0][0].as<BuildID>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
createBuildStep(txn, 0, build, ex.step, "", bsCachedFailure, "", propagatedFrom);
|
||||
txn.parameterized
|
||||
("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $3, isCachedBuild = 1 where id = $1 and finished = 0")
|
||||
(build->id)
|
||||
((int) (ex.step->drvPath == build->drvPath ? bsFailed : bsDepFailed))
|
||||
(time(0)).exec();
|
||||
txn.commit();
|
||||
build->finishedInDB = true;
|
||||
nrBuildsDone++;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Some of the new steps may be the top level of builds that
|
||||
we haven't processed yet. So do them now. This ensures that
|
||||
|
@ -174,53 +227,6 @@ bool State::getQueuedBuilds(Connection & conn, ref<Store> localStore,
|
|||
return;
|
||||
}
|
||||
|
||||
/* If any step has a previously failed output path, then fail
|
||||
the build right away. */
|
||||
bool badStep = false;
|
||||
for (auto & r : newSteps)
|
||||
if (checkCachedFailure(r, conn)) {
|
||||
printMsg(lvlError, format("marking build %1% as cached failure") % build->id);
|
||||
if (!build->finishedInDB) {
|
||||
auto mc = startDbUpdate();
|
||||
pqxx::work txn(conn);
|
||||
|
||||
/* Find the previous build step record, first by
|
||||
derivation path, then by output path. */
|
||||
BuildID propagatedFrom = 0;
|
||||
|
||||
auto res = txn.parameterized
|
||||
("select max(build) from BuildSteps where drvPath = $1 and startTime != 0 and stopTime != 0 and status = 1")
|
||||
(r->drvPath).exec();
|
||||
if (!res[0][0].is_null()) propagatedFrom = res[0][0].as<BuildID>();
|
||||
|
||||
if (!propagatedFrom) {
|
||||
for (auto & output : r->drv.outputs) {
|
||||
auto res = txn.parameterized
|
||||
("select max(s.build) from BuildSteps s join BuildStepOutputs o on s.build = o.build where path = $1 and startTime != 0 and stopTime != 0 and status = 1")
|
||||
(output.second.path).exec();
|
||||
if (!res[0][0].is_null()) {
|
||||
propagatedFrom = res[0][0].as<BuildID>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
createBuildStep(txn, 0, build, r, "", bsCachedFailure, "", propagatedFrom);
|
||||
txn.parameterized
|
||||
("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $3, isCachedBuild = 1 where id = $1 and finished = 0")
|
||||
(build->id)
|
||||
((int) (step == r ? bsFailed : bsDepFailed))
|
||||
(time(0)).exec();
|
||||
txn.commit();
|
||||
build->finishedInDB = true;
|
||||
nrBuildsDone++;
|
||||
}
|
||||
badStep = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (badStep) return;
|
||||
|
||||
/* Note: if we exit this scope prior to this, the build and
|
||||
all newly created steps are destroyed. */
|
||||
|
||||
|
@ -401,6 +407,10 @@ Step::ptr State::createStep(ref<Store> destStore,
|
|||
}
|
||||
}
|
||||
|
||||
/* If this derivation failed previously, give up. */
|
||||
if (checkCachedFailure(step, conn))
|
||||
throw PreviousFailure{step};
|
||||
|
||||
/* Are all outputs valid? */
|
||||
bool valid = true;
|
||||
PathSet outputs = step->drv.outputPaths();
|
||||
|
@ -453,7 +463,6 @@ Step::ptr State::createStep(ref<Store> destStore,
|
|||
|
||||
/* No, we need to build. */
|
||||
printMsg(lvlDebug, format("creating build step ‘%1%’") % drvPath);
|
||||
newSteps.insert(step);
|
||||
|
||||
/* Create steps for the dependencies. */
|
||||
for (auto & i : step->drv.inputDrvs) {
|
||||
|
@ -474,6 +483,8 @@ Step::ptr State::createStep(ref<Store> destStore,
|
|||
newRunnable.insert(step);
|
||||
}
|
||||
|
||||
newSteps.insert(step);
|
||||
|
||||
return step;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue