hydra-queue-runner: --build-one: correctly handle a cached build

Previously, the build ID would never flow through channels which
exited.

This patch tracks the buildOne state as part of State and exits avoids
waiting forever for new work.

The code around buildOnly is a bit rough, making this a bit weird to
implement but since it is only used for testing the value of improving
it on its own is a bit questionable.
This commit is contained in:
Graham Christensen 2021-03-16 16:09:36 -04:00
parent 930f05c38e
commit 87d46ad5d6
No known key found for this signature in database
GPG key ID: FE918C3A98C1030F
4 changed files with 22 additions and 14 deletions

View file

@ -148,7 +148,8 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
localStore->printStorePath(step->drvPath), repeats + 1, machine->sshName, buildId, (dependents.size() - 1)); localStore->printStorePath(step->drvPath), repeats + 1, machine->sshName, buildId, (dependents.size() - 1));
} }
bool quit = buildId == buildOne && step->drvPath == *buildDrvPath; if (!buildOneDone)
buildOneDone = buildId == buildOne && step->drvPath == *buildDrvPath;
RemoteResult result; RemoteResult result;
BuildOutput res; BuildOutput res;
@ -265,7 +266,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
if (retry) { if (retry) {
auto mc = startDbUpdate(); auto mc = startDbUpdate();
stepFinished = true; stepFinished = true;
if (quit) exit(1); if (buildOneDone) exit(1);
return sRetry; return sRetry;
} }
} }
@ -376,7 +377,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
} }
} else } else
failStep(*conn, step, buildId, result, machine, stepFinished, quit); failStep(*conn, step, buildId, result, machine, stepFinished);
// FIXME: keep stats about aborted steps? // FIXME: keep stats about aborted steps?
nrStepsDone++; nrStepsDone++;
@ -386,7 +387,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
machine->state->totalStepTime += stepStopTime - stepStartTime; machine->state->totalStepTime += stepStopTime - stepStartTime;
machine->state->totalStepBuildTime += result.stopTime - result.startTime; machine->state->totalStepBuildTime += result.stopTime - result.startTime;
if (quit) exit(0); // testing hack; FIXME: this won't run plugins if (buildOneDone) exit(0); // testing hack; FIXME: this won't run plugins
return sDone; return sDone;
} }
@ -398,8 +399,7 @@ void State::failStep(
BuildID buildId, BuildID buildId,
const RemoteResult & result, const RemoteResult & result,
Machine::ptr machine, Machine::ptr machine,
bool & stepFinished, bool & stepFinished)
bool & quit)
{ {
/* Register failure in the database for all Build objects that /* Register failure in the database for all Build objects that
directly or indirectly depend on this step. */ directly or indirectly depend on this step. */
@ -481,7 +481,7 @@ void State::failStep(
b->finishedInDB = true; b->finishedInDB = true;
builds_->erase(b->id); builds_->erase(b->id);
dependentIDs.push_back(b->id); dependentIDs.push_back(b->id);
if (buildOne == b->id) quit = true; if (!buildOneDone && buildOne == b->id) buildOneDone = true;
} }
} }

View file

@ -374,7 +374,6 @@ void State::abortUnsupported()
if (!build) build = *dependents.begin(); if (!build) build = *dependents.begin();
bool stepFinished = false; bool stepFinished = false;
bool quit = false;
failStep( failStep(
*conn, step, build->id, *conn, step, build->id,
@ -385,9 +384,9 @@ void State::abortUnsupported()
.startTime = now2, .startTime = now2,
.stopTime = now2, .stopTime = now2,
}, },
nullptr, stepFinished, quit); nullptr, stepFinished);
if (quit) exit(1); if (buildOneDone) exit(1);
} }
} }

View file

@ -35,14 +35,17 @@ void State::queueMonitorLoop()
unsigned int lastBuildId = 0; unsigned int lastBuildId = 0;
while (true) { bool quit = false;
while (!quit) {
localStore->clearPathInfoCache(); localStore->clearPathInfoCache();
bool done = getQueuedBuilds(*conn, destStore, lastBuildId); bool done = getQueuedBuilds(*conn, destStore, lastBuildId);
if (buildOne && buildOneDone) quit = true;
/* Sleep until we get notification from the database about an /* Sleep until we get notification from the database about an
event. */ event. */
if (done) { if (done && !quit) {
conn->await_notification(); conn->await_notification();
nrQueueWakeups++; nrQueueWakeups++;
} else } else
@ -65,6 +68,8 @@ void State::queueMonitorLoop()
processJobsetSharesChange(*conn); processJobsetSharesChange(*conn);
} }
} }
exit(0);
} }
@ -160,6 +165,7 @@ bool State::getQueuedBuilds(Connection & conn,
/* Some step previously failed, so mark the build as /* Some step previously failed, so mark the build as
failed right away. */ failed right away. */
if (!buildOneDone && build->id == buildOne) buildOneDone = true;
printMsg(lvlError, "marking build %d as cached failure due to %s", printMsg(lvlError, "marking build %d as cached failure due to %s",
build->id, localStore->printStorePath(ex.step->drvPath)); build->id, localStore->printStorePath(ex.step->drvPath));
if (!build->finishedInDB) { if (!build->finishedInDB) {
@ -231,6 +237,7 @@ bool State::getQueuedBuilds(Connection & conn,
auto mc = startDbUpdate(); auto mc = startDbUpdate();
pqxx::work txn(conn); pqxx::work txn(conn);
time_t now = time(0); time_t now = time(0);
if (!buildOneDone && build->id == buildOne) buildOneDone = true;
printMsg(lvlInfo, "marking build %1% as succeeded (cached)", build->id); printMsg(lvlInfo, "marking build %1% as succeeded (cached)", build->id);
markSucceededBuild(txn, build, res, true, now, now); markSucceededBuild(txn, build, res, true, now, now);
notifyBuildFinished(txn, build->id, {}); notifyBuildFinished(txn, build->id, {});
@ -289,6 +296,8 @@ bool State::getQueuedBuilds(Connection & conn,
for (auto & r : newRunnable) for (auto & r : newRunnable)
makeRunnable(r); makeRunnable(r);
if (buildOne && newRunnable.size() == 0) buildOneDone = true;
nrBuildsRead += nrAdded; nrBuildsRead += nrAdded;
/* Stop after a certain time to allow priority bumps to be /* Stop after a certain time to allow priority bumps to be

View file

@ -367,6 +367,7 @@ private:
/* Specific build to do for --build-one (testing only). */ /* Specific build to do for --build-one (testing only). */
BuildID buildOne; BuildID buildOne;
bool buildOneDone = false;
/* Statistics per machine type for the Hydra auto-scaler. */ /* Statistics per machine type for the Hydra auto-scaler. */
struct MachineType struct MachineType
@ -485,8 +486,7 @@ private:
BuildID buildId, BuildID buildId,
const RemoteResult & result, const RemoteResult & result,
Machine::ptr machine, Machine::ptr machine,
bool & stepFinished, bool & stepFinished);
bool & quit);
Jobset::ptr createJobset(pqxx::work & txn, Jobset::ptr createJobset(pqxx::work & txn,
const std::string & projectName, const std::string & jobsetName); const std::string & projectName, const std::string & jobsetName);