hydra-queue-runner: don't dispatch until the machines parser has completed one run

Periodically, I have seen tests fail because of out of order queue runner behavior:

    checking the queue for builds > 0...
    loading build 1 (tests:basic:empty_dir)
    aborting unsupported build step '...-empty-dir.drv' (type 'x86_64-linux')
    marking build 1 as failed
    adding new machine ‘localhost’

This patch should prevent the dispatcher from running before any machines are
made available.
This commit is contained in:
Graham Christensen 2022-02-10 10:51:12 -05:00
parent 09652475bd
commit 4acaf9c8b0
3 changed files with 13 additions and 2 deletions

View file

@ -31,8 +31,10 @@ void State::makeRunnable(Step::ptr step)
void State::dispatcher()
{
while (true) {
printMsg(lvlDebug, "Waiting for the machines parsing to have completed at least once");
machinesReadyLock.lock();
while (true) {
try {
printMsg(lvlDebug, "dispatcher woken up");
nrDispatcherWakeups++;

View file

@ -158,6 +158,7 @@ void State::monitorMachinesFile()
(settings.thisSystem == "x86_64-linux" ? "x86_64-linux,i686-linux" : settings.thisSystem.get())
+ " - " + std::to_string(settings.maxBuildJobs) + " 1 "
+ concatStringsSep(",", settings.systemFeatures.get()));
machinesReadyLock.unlock();
return;
}
@ -203,9 +204,15 @@ void State::monitorMachinesFile()
parseMachines(contents);
};
auto firstParse = true;
while (true) {
try {
readMachinesFiles();
if (firstParse) {
machinesReadyLock.unlock();
firstParse = false;
}
// FIXME: use inotify.
sleep(30);
} catch (std::exception & e) {
@ -321,7 +328,7 @@ int State::createSubstitutionStep(pqxx::work & txn, time_t startTime, time_t sto
txn.exec_params0
("insert into BuildStepOutputs (build, stepnr, name, path) values ($1, $2, $3, $4)",
build->id, stepNr, outputName,
build->id, stepNr, outputName,
localStore->printStorePath(storePath));
return stepNr;
@ -770,6 +777,7 @@ void State::run(BuildID buildOne)
dumpStatus(*conn);
}
machinesReadyLock.lock();
std::thread(&State::monitorMachinesFile, this).detach();
std::thread(&State::queueMonitor, this).detach();

View file

@ -342,6 +342,7 @@ private:
nix::Pool<Connection> dbPool;
/* The build machines. */
std::mutex machinesReadyLock;
typedef std::map<std::string, Machine::ptr> Machines;
nix::Sync<Machines> machines; // FIXME: use atomic_shared_ptr