forked from lix-project/hydra
hydra-queue-runner: Improve dispatcher
We now take the machine speed factor into account, just like build-remote.pl.
This commit is contained in:
parent
3855131185
commit
a40ca6b76e
|
@ -6,6 +6,7 @@
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
#include <pqxx/pqxx>
|
#include <pqxx/pqxx>
|
||||||
|
|
||||||
|
@ -159,13 +160,7 @@ struct Machine
|
||||||
unsigned int maxJobs = 1;
|
unsigned int maxJobs = 1;
|
||||||
float speedFactor = 1.0;
|
float speedFactor = 1.0;
|
||||||
|
|
||||||
Sync<unsigned int> currentJobs;
|
std::atomic<unsigned int> currentJobs{0};
|
||||||
|
|
||||||
Machine()
|
|
||||||
{
|
|
||||||
auto currentJobs_(currentJobs.lock());
|
|
||||||
*currentJobs_ = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool supportsStep(Step::ptr step)
|
bool supportsStep(Step::ptr step)
|
||||||
{
|
{
|
||||||
|
@ -187,13 +182,11 @@ struct MachineReservation
|
||||||
Machine::ptr machine;
|
Machine::ptr machine;
|
||||||
MachineReservation(Machine::ptr machine) : machine(machine)
|
MachineReservation(Machine::ptr machine) : machine(machine)
|
||||||
{
|
{
|
||||||
auto currentJobs_(machine->currentJobs.lock());
|
machine->currentJobs++;
|
||||||
(*currentJobs_)++;
|
|
||||||
}
|
}
|
||||||
~MachineReservation()
|
~MachineReservation()
|
||||||
{
|
{
|
||||||
auto currentJobs_(machine->currentJobs.lock());
|
machine->currentJobs--;
|
||||||
if (*currentJobs_ > 0) (*currentJobs_)--;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -284,8 +277,6 @@ public:
|
||||||
|
|
||||||
void wakeDispatcher();
|
void wakeDispatcher();
|
||||||
|
|
||||||
MachineReservation::ptr findMachine(Step::ptr step);
|
|
||||||
|
|
||||||
void builder(Step::ptr step, MachineReservation::ptr reservation);
|
void builder(Step::ptr step, MachineReservation::ptr reservation);
|
||||||
|
|
||||||
/* Perform the given build step. Return true if the step is to be
|
/* Perform the given build step. Return true if the step is to be
|
||||||
|
@ -878,49 +869,98 @@ void State::dispatcher()
|
||||||
|
|
||||||
auto sleepUntil = system_time::max();
|
auto sleepUntil = system_time::max();
|
||||||
|
|
||||||
{
|
bool keepGoing;
|
||||||
auto runnable_(runnable.lock());
|
|
||||||
printMsg(lvlDebug, format("%1% runnable builds") % runnable_->size());
|
|
||||||
|
|
||||||
/* FIXME: we're holding the runnable lock too long
|
do {
|
||||||
here. This could be more efficient. */
|
/* Bail out when there are no slots left. */
|
||||||
|
std::vector<Machine::ptr> machinesSorted;
|
||||||
|
{
|
||||||
|
auto machines_(machines.lock());
|
||||||
|
machinesSorted.insert(machinesSorted.end(),
|
||||||
|
machines_->begin(), machines_->end());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sort the machines by a combination of speed factor and
|
||||||
|
available slots. Prioritise the available machines as
|
||||||
|
follows:
|
||||||
|
|
||||||
|
- First by load divided by speed factor, rounded to the
|
||||||
|
nearest integer. This causes fast machines to be
|
||||||
|
preferred over slow machines with similar loads.
|
||||||
|
|
||||||
|
- Then by speed factor.
|
||||||
|
|
||||||
|
- Finally by load. */
|
||||||
|
sort(machinesSorted.begin(), machinesSorted.end(),
|
||||||
|
[](const Machine::ptr & a, const Machine::ptr & b) -> bool
|
||||||
|
{
|
||||||
|
float ta = roundf(a->currentJobs / a->speedFactor);
|
||||||
|
float tb = roundf(b->currentJobs / b->speedFactor);
|
||||||
|
return
|
||||||
|
ta != tb ? ta > tb :
|
||||||
|
a->speedFactor != b->speedFactor ? a->speedFactor > b->speedFactor :
|
||||||
|
a->maxJobs > b->maxJobs;
|
||||||
|
});
|
||||||
|
|
||||||
|
/* Find a machine with a free slot and find a step to run
|
||||||
|
on it. Once we find such a pair, we restart the outer
|
||||||
|
loop because the machine sorting will have changed. */
|
||||||
|
keepGoing = false;
|
||||||
system_time now = std::chrono::system_clock::now();
|
system_time now = std::chrono::system_clock::now();
|
||||||
|
|
||||||
for (auto i = runnable_->begin(); i != runnable_->end(); ) {
|
for (auto & machine : machinesSorted) {
|
||||||
auto step = i->lock();
|
// FIXME: can we lose a wakeup if a builder exits concurrently?
|
||||||
|
if (machine->currentJobs >= machine->maxJobs) continue;
|
||||||
|
|
||||||
/* Delete dead steps. */
|
auto runnable_(runnable.lock());
|
||||||
if (!step) {
|
printMsg(lvlDebug, format("%1% runnable builds") % runnable_->size());
|
||||||
i = runnable_->erase(i);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Skip previously failed steps that aren't ready to
|
/* FIXME: we're holding the runnable lock too long
|
||||||
be retried. */
|
here. This could be more efficient. */
|
||||||
{
|
|
||||||
auto step_(step->state.lock());
|
for (auto i = runnable_->begin(); i != runnable_->end(); ) {
|
||||||
if (step_->tries > 0 && step_->after > now) {
|
auto step = i->lock();
|
||||||
if (step_->after < sleepUntil)
|
|
||||||
sleepUntil = step_->after;
|
/* Delete dead steps. */
|
||||||
|
if (!step) {
|
||||||
|
i = runnable_->erase(i);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Can this machine do this step? */
|
||||||
|
if (!machine->supportsStep(step)) {
|
||||||
++i;
|
++i;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Skip previously failed steps that aren't ready
|
||||||
|
to be retried. */
|
||||||
|
{
|
||||||
|
auto step_(step->state.lock());
|
||||||
|
if (step_->tries > 0 && step_->after > now) {
|
||||||
|
if (step_->after < sleepUntil)
|
||||||
|
sleepUntil = step_->after;
|
||||||
|
++i;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make a slot reservation and start a thread to
|
||||||
|
do the build. */
|
||||||
|
auto reservation = std::make_shared<MachineReservation>(machine);
|
||||||
|
i = runnable_->erase(i);
|
||||||
|
|
||||||
|
auto builderThread = std::thread(&State::builder, this, step, reservation);
|
||||||
|
builderThread.detach(); // FIXME?
|
||||||
|
|
||||||
|
keepGoing = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto reservation = findMachine(step);
|
if (keepGoing) break;
|
||||||
if (!reservation) {
|
|
||||||
printMsg(lvlDebug, format("cannot execute step ‘%1%’ right now") % step->drvPath);
|
|
||||||
++i;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
i = runnable_->erase(i);
|
|
||||||
|
|
||||||
auto builderThread = std::thread(&State::builder, this, step, reservation);
|
|
||||||
builderThread.detach(); // FIXME?
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
} while (keepGoing);
|
||||||
|
|
||||||
/* Sleep until we're woken up (either because a runnable build
|
/* Sleep until we're woken up (either because a runnable build
|
||||||
is added, or because a build finishes). */
|
is added, or because a build finishes). */
|
||||||
|
@ -944,23 +984,6 @@ void State::wakeDispatcher()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
MachineReservation::ptr State::findMachine(Step::ptr step)
|
|
||||||
{
|
|
||||||
auto machines_(machines.lock());
|
|
||||||
|
|
||||||
for (auto & machine : *machines_) {
|
|
||||||
if (!machine->supportsStep(step)) continue;
|
|
||||||
{
|
|
||||||
auto currentJobs_(machine->currentJobs.lock());
|
|
||||||
if (*currentJobs_ >= machine->maxJobs) continue;
|
|
||||||
}
|
|
||||||
return std::make_shared<MachineReservation>(machine);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void State::builder(Step::ptr step, MachineReservation::ptr reservation)
|
void State::builder(Step::ptr step, MachineReservation::ptr reservation)
|
||||||
{
|
{
|
||||||
bool retry = true;
|
bool retry = true;
|
||||||
|
@ -1274,9 +1297,8 @@ void State::dumpStatus()
|
||||||
{
|
{
|
||||||
auto machines_(machines.lock());
|
auto machines_(machines.lock());
|
||||||
for (auto & m : *machines_) {
|
for (auto & m : *machines_) {
|
||||||
auto currentJobs_(m->currentJobs.lock());
|
|
||||||
printMsg(lvlError, format("machine %1%: %2%/%3% active")
|
printMsg(lvlError, format("machine %1%: %2%/%3% active")
|
||||||
% m->sshName % *currentJobs_ % m->maxJobs);
|
% m->sshName % m->currentJobs % m->maxJobs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue