forked from lix-project/hydra
Start steps in order of ascending build ID
This commit is contained in:
parent
52ec971154
commit
27182c7c1d
4 changed files with 85 additions and 23 deletions
|
@ -1,3 +1,4 @@
|
||||||
|
#include <iostream>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
|
@ -53,8 +54,8 @@ void State::dispatcher()
|
||||||
|
|
||||||
system_time State::doDispatch()
|
system_time State::doDispatch()
|
||||||
{
|
{
|
||||||
|
/* Start steps until we're out of steps or slots. */
|
||||||
auto sleepUntil = system_time::max();
|
auto sleepUntil = system_time::max();
|
||||||
|
|
||||||
bool keepGoing;
|
bool keepGoing;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
@ -105,35 +106,23 @@ system_time State::doDispatch()
|
||||||
a.currentJobs > b.currentJobs;
|
a.currentJobs > b.currentJobs;
|
||||||
});
|
});
|
||||||
|
|
||||||
/* Find a machine with a free slot and find a step to run
|
/* Sort the runnable steps by priority. FIXME: O(n lg n);
|
||||||
on it. Once we find such a pair, we restart the outer
|
obviously, it would be better to keep a runnable queue sorted
|
||||||
loop because the machine sorting will have changed. */
|
by priority. */
|
||||||
keepGoing = false;
|
std::vector<Step::ptr> runnableSorted;
|
||||||
|
{
|
||||||
for (auto & mi : machinesSorted) {
|
|
||||||
// FIXME: can we lose a wakeup if a builder exits concurrently?
|
|
||||||
if (mi.machine->state->currentJobs >= mi.machine->maxJobs) continue;
|
|
||||||
|
|
||||||
auto runnable_(runnable.lock());
|
auto runnable_(runnable.lock());
|
||||||
//printMsg(lvlDebug, format("%1% runnable builds") % runnable_->size());
|
runnableSorted.reserve(runnable_->size());
|
||||||
|
|
||||||
/* FIXME: we're holding the runnable lock too long
|
|
||||||
here. This could be more efficient. */
|
|
||||||
|
|
||||||
for (auto i = runnable_->begin(); i != runnable_->end(); ) {
|
for (auto i = runnable_->begin(); i != runnable_->end(); ) {
|
||||||
auto step = i->lock();
|
auto step = i->lock();
|
||||||
|
|
||||||
/* Delete dead steps. */
|
/* Remove dead steps. */
|
||||||
if (!step) {
|
if (!step) {
|
||||||
i = runnable_->erase(i);
|
i = runnable_->erase(i);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Can this machine do this step? */
|
++i;
|
||||||
if (!mi.machine->supportsStep(step)) {
|
|
||||||
++i;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Skip previously failed steps that aren't ready
|
/* Skip previously failed steps that aren't ready
|
||||||
to be retried. */
|
to be retried. */
|
||||||
|
@ -142,15 +131,52 @@ system_time State::doDispatch()
|
||||||
if (step_->tries > 0 && step_->after > now) {
|
if (step_->tries > 0 && step_->after > now) {
|
||||||
if (step_->after < sleepUntil)
|
if (step_->after < sleepUntil)
|
||||||
sleepUntil = step_->after;
|
sleepUntil = step_->after;
|
||||||
++i;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
runnableSorted.push_back(step);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort(runnableSorted.begin(), runnableSorted.end(),
|
||||||
|
[](const Step::ptr & a, const Step::ptr & b)
|
||||||
|
{
|
||||||
|
auto a_(a->state.lock());
|
||||||
|
auto b_(b->state.lock()); // FIXME: deadlock?
|
||||||
|
return a_->lowestBuildID < b_->lowestBuildID;
|
||||||
|
});
|
||||||
|
|
||||||
|
/* Find a machine with a free slot and find a step to run
|
||||||
|
on it. Once we find such a pair, we restart the outer
|
||||||
|
loop because the machine sorting will have changed. */
|
||||||
|
keepGoing = false;
|
||||||
|
|
||||||
|
for (auto & mi : machinesSorted) {
|
||||||
|
if (mi.machine->state->currentJobs >= mi.machine->maxJobs) continue;
|
||||||
|
|
||||||
|
for (auto & step : runnableSorted) {
|
||||||
|
|
||||||
|
/* Can this machine do this step? */
|
||||||
|
if (!mi.machine->supportsStep(step)) continue;
|
||||||
|
|
||||||
|
/* Let's do this step. Remove it from the runnable
|
||||||
|
list. FIXME: O(n). */
|
||||||
|
{
|
||||||
|
auto runnable_(runnable.lock());
|
||||||
|
bool removed = false;
|
||||||
|
for (auto i = runnable_->begin(); i != runnable_->end(); )
|
||||||
|
if (i->lock() == step) {
|
||||||
|
i = runnable_->erase(i);
|
||||||
|
removed = true;
|
||||||
|
break;
|
||||||
|
} else ++i;
|
||||||
|
assert(removed);
|
||||||
|
}
|
||||||
|
|
||||||
/* Make a slot reservation and start a thread to
|
/* Make a slot reservation and start a thread to
|
||||||
do the build. */
|
do the build. */
|
||||||
auto reservation = std::make_shared<MaintainCount>(mi.machine->state->currentJobs);
|
auto reservation = std::make_shared<MaintainCount>(mi.machine->state->currentJobs);
|
||||||
i = runnable_->erase(i);
|
|
||||||
|
|
||||||
auto builderThread = std::thread(&State::builder, this, step, mi.machine, reservation);
|
auto builderThread = std::thread(&State::builder, this, step, mi.machine, reservation);
|
||||||
builderThread.detach(); // FIXME?
|
builderThread.detach(); // FIXME?
|
||||||
|
|
|
@ -204,6 +204,28 @@ void getDependents(Step::ptr step, std::set<Build::ptr> & builds, std::set<Step:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void visitDependencies(std::function<void(Step::ptr)> visitor, Step::ptr start)
|
||||||
|
{
|
||||||
|
std::set<Step::ptr> queued;
|
||||||
|
std::queue<Step::ptr> todo;
|
||||||
|
todo.push(start);
|
||||||
|
|
||||||
|
while (!todo.empty()) {
|
||||||
|
auto step = todo.front();
|
||||||
|
todo.pop();
|
||||||
|
|
||||||
|
visitor(step);
|
||||||
|
|
||||||
|
auto state(step->state.lock());
|
||||||
|
for (auto & dep : state->deps)
|
||||||
|
if (queued.find(dep) == queued.end()) {
|
||||||
|
queued.insert(dep);
|
||||||
|
todo.push(dep);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void State::markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
void State::markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
||||||
const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime)
|
const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime)
|
||||||
{
|
{
|
||||||
|
|
|
@ -228,6 +228,14 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Update the lowest build ID field of each dependency. This
|
||||||
|
is used by the dispatcher to start steps in order of build
|
||||||
|
ID. */
|
||||||
|
visitDependencies([&](const Step::ptr & step) {
|
||||||
|
auto step_(step->state.lock());
|
||||||
|
step_->lowestBuildID = std::min(step_->lowestBuildID, build->id);
|
||||||
|
}, build->toplevel);
|
||||||
|
|
||||||
/* Add the new runnable build steps to ‘runnable’ and wake up
|
/* Add the new runnable build steps to ‘runnable’ and wake up
|
||||||
the builder threads. */
|
the builder threads. */
|
||||||
printMsg(lvlChatty, format("got %1% new runnable steps from %2% new builds") % newRunnable.size() % nrAdded);
|
printMsg(lvlChatty, format("got %1% new runnable steps from %2% new builds") % newRunnable.size() % nrAdded);
|
||||||
|
|
|
@ -112,6 +112,9 @@ struct Step
|
||||||
|
|
||||||
/* Point in time after which the step can be retried. */
|
/* Point in time after which the step can be retried. */
|
||||||
system_time after;
|
system_time after;
|
||||||
|
|
||||||
|
/* The lowest build ID depending on this step. */
|
||||||
|
BuildID lowestBuildID{std::numeric_limits<BuildID>::max()};
|
||||||
};
|
};
|
||||||
|
|
||||||
std::atomic_bool finished{false}; // debugging
|
std::atomic_bool finished{false}; // debugging
|
||||||
|
@ -127,6 +130,9 @@ struct Step
|
||||||
|
|
||||||
void getDependents(Step::ptr step, std::set<Build::ptr> & builds, std::set<Step::ptr> & steps);
|
void getDependents(Step::ptr step, std::set<Build::ptr> & builds, std::set<Step::ptr> & steps);
|
||||||
|
|
||||||
|
/* Call ‘visitor’ for a step and all its dependencies. */
|
||||||
|
void visitDependencies(std::function<void(Step::ptr)> visitor, Step::ptr step);
|
||||||
|
|
||||||
|
|
||||||
struct Machine
|
struct Machine
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue