forked from lix-project/hydra
hydra-queue-runner: don't dispatch until the machines parser has completed one run
Periodically, I have seen tests fail because of out of order queue runner behavior: checking the queue for builds > 0... loading build 1 (tests:basic:empty_dir) aborting unsupported build step '...-empty-dir.drv' (type 'x86_64-linux') marking build 1 as failed adding new machine ‘localhost’ This patch should prevent the dispatcher from running before any machines are made available.
This commit is contained in:
parent
09652475bd
commit
4acaf9c8b0
3 changed files with 13 additions and 2 deletions
|
@ -31,8 +31,10 @@ void State::makeRunnable(Step::ptr step)
|
|||
|
||||
void State::dispatcher()
|
||||
{
|
||||
while (true) {
|
||||
printMsg(lvlDebug, "Waiting for the machines parsing to have completed at least once");
|
||||
machinesReadyLock.lock();
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
printMsg(lvlDebug, "dispatcher woken up");
|
||||
nrDispatcherWakeups++;
|
||||
|
|
|
@ -158,6 +158,7 @@ void State::monitorMachinesFile()
|
|||
(settings.thisSystem == "x86_64-linux" ? "x86_64-linux,i686-linux" : settings.thisSystem.get())
|
||||
+ " - " + std::to_string(settings.maxBuildJobs) + " 1 "
|
||||
+ concatStringsSep(",", settings.systemFeatures.get()));
|
||||
machinesReadyLock.unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -203,9 +204,15 @@ void State::monitorMachinesFile()
|
|||
parseMachines(contents);
|
||||
};
|
||||
|
||||
auto firstParse = true;
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
readMachinesFiles();
|
||||
if (firstParse) {
|
||||
machinesReadyLock.unlock();
|
||||
firstParse = false;
|
||||
}
|
||||
// FIXME: use inotify.
|
||||
sleep(30);
|
||||
} catch (std::exception & e) {
|
||||
|
@ -321,7 +328,7 @@ int State::createSubstitutionStep(pqxx::work & txn, time_t startTime, time_t sto
|
|||
|
||||
txn.exec_params0
|
||||
("insert into BuildStepOutputs (build, stepnr, name, path) values ($1, $2, $3, $4)",
|
||||
build->id, stepNr, outputName,
|
||||
build->id, stepNr, outputName,
|
||||
localStore->printStorePath(storePath));
|
||||
|
||||
return stepNr;
|
||||
|
@ -770,6 +777,7 @@ void State::run(BuildID buildOne)
|
|||
dumpStatus(*conn);
|
||||
}
|
||||
|
||||
machinesReadyLock.lock();
|
||||
std::thread(&State::monitorMachinesFile, this).detach();
|
||||
|
||||
std::thread(&State::queueMonitor, this).detach();
|
||||
|
|
|
@ -342,6 +342,7 @@ private:
|
|||
nix::Pool<Connection> dbPool;
|
||||
|
||||
/* The build machines. */
|
||||
std::mutex machinesReadyLock;
|
||||
typedef std::map<std::string, Machine::ptr> Machines;
|
||||
nix::Sync<Machines> machines; // FIXME: use atomic_shared_ptr
|
||||
|
||||
|
|
Loading…
Reference in a new issue