forked from lix-project/hydra
Revive jobset scheduling
(I.e. taking the jobset scheduling share into account.)
This commit is contained in:
parent
08739a2a5a
commit
97f11baa8d
|
@ -131,6 +131,16 @@ bool State::doBuildStep(std::shared_ptr<StoreAPI> store, Step::ptr step,
|
||||||
time_t stepStopTime = time(0);
|
time_t stepStopTime = time(0);
|
||||||
if (!result.stopTime) result.stopTime = stepStopTime;
|
if (!result.stopTime) result.stopTime = stepStopTime;
|
||||||
|
|
||||||
|
/* Account the time we spent building this step by dividing it
|
||||||
|
among the jobsets that depend on it. */
|
||||||
|
{
|
||||||
|
auto step_(step->state.lock());
|
||||||
|
// FIXME: loss of precision.
|
||||||
|
time_t charge = (result.stopTime - result.startTime) / step_->jobsets.size();
|
||||||
|
for (auto & jobset : step_->jobsets)
|
||||||
|
jobset->addStep(result.startTime, charge);
|
||||||
|
}
|
||||||
|
|
||||||
/* Asynchronously compress the log. */
|
/* Asynchronously compress the log. */
|
||||||
if (result.logFile != "") {
|
if (result.logFile != "") {
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
#include <iostream>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
|
@ -54,6 +53,19 @@ void State::dispatcher()
|
||||||
|
|
||||||
system_time State::doDispatch()
|
system_time State::doDispatch()
|
||||||
{
|
{
|
||||||
|
/* Prune old historical build step info from the jobsets. */
|
||||||
|
{
|
||||||
|
auto jobsets_(jobsets.lock());
|
||||||
|
for (auto & jobset : *jobsets_) {
|
||||||
|
auto s1 = jobset.second->shareUsed();
|
||||||
|
jobset.second->pruneSteps();
|
||||||
|
auto s2 = jobset.second->shareUsed();
|
||||||
|
if (s1 != s2)
|
||||||
|
printMsg(lvlDebug, format("pruned scheduling window of ‘%1%:%2%’ from %3% to %4%")
|
||||||
|
% jobset.first.first % jobset.first.second % s1 % s2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Start steps until we're out of steps or slots. */
|
/* Start steps until we're out of steps or slots. */
|
||||||
auto sleepUntil = system_time::max();
|
auto sleepUntil = system_time::max();
|
||||||
bool keepGoing;
|
bool keepGoing;
|
||||||
|
@ -139,6 +151,13 @@ system_time State::doDispatch()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (auto & step : runnableSorted) {
|
||||||
|
auto step_(step->state.lock());
|
||||||
|
step_->lowestShareUsed = 1e9;
|
||||||
|
for (auto & jobset : step_->jobsets)
|
||||||
|
step_->lowestShareUsed = std::min(step_->lowestShareUsed, jobset->shareUsed());
|
||||||
|
}
|
||||||
|
|
||||||
sort(runnableSorted.begin(), runnableSorted.end(),
|
sort(runnableSorted.begin(), runnableSorted.end(),
|
||||||
[](const Step::ptr & a, const Step::ptr & b)
|
[](const Step::ptr & a, const Step::ptr & b)
|
||||||
{
|
{
|
||||||
|
@ -146,6 +165,7 @@ system_time State::doDispatch()
|
||||||
auto b_(b->state.lock()); // FIXME: deadlock?
|
auto b_(b->state.lock()); // FIXME: deadlock?
|
||||||
return
|
return
|
||||||
a_->highestGlobalPriority != b_->highestGlobalPriority ? a_->highestGlobalPriority > b_->highestGlobalPriority :
|
a_->highestGlobalPriority != b_->highestGlobalPriority ? a_->highestGlobalPriority > b_->highestGlobalPriority :
|
||||||
|
a_->lowestShareUsed != b_->lowestShareUsed ? a_->lowestShareUsed < b_->lowestShareUsed :
|
||||||
a_->lowestBuildID < b_->lowestBuildID;
|
a_->lowestBuildID < b_->lowestBuildID;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -204,3 +224,24 @@ void State::wakeDispatcher()
|
||||||
}
|
}
|
||||||
dispatcherWakeupCV.notify_one();
|
dispatcherWakeupCV.notify_one();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Jobset::addStep(time_t startTime, time_t duration)
|
||||||
|
{
|
||||||
|
auto steps_(steps.lock());
|
||||||
|
(*steps_)[startTime] = duration;
|
||||||
|
seconds += duration;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Jobset::pruneSteps()
|
||||||
|
{
|
||||||
|
time_t now = time(0);
|
||||||
|
auto steps_(steps.lock());
|
||||||
|
while (!steps_->empty()) {
|
||||||
|
auto i = steps_->begin();
|
||||||
|
if (i->first > now - schedulingWindow) break;
|
||||||
|
seconds -= i->second;
|
||||||
|
steps_->erase(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -462,6 +462,17 @@ void State::dumpStatus(Connection & conn, bool log)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
root.attr("jobsets");
|
||||||
|
JSONObject nested(out);
|
||||||
|
auto jobsets_(jobsets.lock());
|
||||||
|
for (auto & jobset : *jobsets_) {
|
||||||
|
nested.attr(jobset.first.first + ":" + jobset.first.second);
|
||||||
|
JSONObject nested2(out);
|
||||||
|
nested2.attr("shareUsed"); out << jobset.second->shareUsed();
|
||||||
|
nested2.attr("seconds", jobset.second->getSeconds());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (log) printMsg(lvlInfo, format("status: %1%") % out.str());
|
if (log) printMsg(lvlInfo, format("status: %1%") % out.str());
|
||||||
|
|
|
@ -49,7 +49,6 @@ void State::queueMonitorLoop()
|
||||||
printMsg(lvlTalkative, "got notification: builds cancelled or bumped");
|
printMsg(lvlTalkative, "got notification: builds cancelled or bumped");
|
||||||
processQueueChange(*conn);
|
processQueueChange(*conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,6 +83,7 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
|
||||||
build->buildTimeout = row["timeout"].as<int>();
|
build->buildTimeout = row["timeout"].as<int>();
|
||||||
build->timestamp = row["timestamp"].as<time_t>();
|
build->timestamp = row["timestamp"].as<time_t>();
|
||||||
build->globalPriority = row["globalPriority"].as<int>();
|
build->globalPriority = row["globalPriority"].as<int>();
|
||||||
|
build->jobset = createJobset(txn, build->projectName, build->jobsetName);
|
||||||
|
|
||||||
newBuilds.emplace(std::make_pair(build->drvPath, build));
|
newBuilds.emplace(std::make_pair(build->drvPath, build));
|
||||||
}
|
}
|
||||||
|
@ -210,6 +210,8 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
|
||||||
build->toplevel = step;
|
build->toplevel = step;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
build->propagatePriorities();
|
||||||
|
|
||||||
printMsg(lvlChatty, format("added build %1% (top-level step %2%, %3% new steps)")
|
printMsg(lvlChatty, format("added build %1% (top-level step %2%, %3% new steps)")
|
||||||
% build->id % step->drvPath % newSteps.size());
|
% build->id % step->drvPath % newSteps.size());
|
||||||
};
|
};
|
||||||
|
@ -230,8 +232,6 @@ void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store,
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
||||||
build->propagatePriorities();
|
|
||||||
|
|
||||||
/* Add the new runnable build steps to ‘runnable’ and wake up
|
/* Add the new runnable build steps to ‘runnable’ and wake up
|
||||||
the builder threads. */
|
the builder threads. */
|
||||||
printMsg(lvlChatty, format("got %1% new runnable steps from %2% new builds") % newRunnable.size() % nrAdded);
|
printMsg(lvlChatty, format("got %1% new runnable steps from %2% new builds") % newRunnable.size() % nrAdded);
|
||||||
|
@ -253,6 +253,7 @@ void Build::propagatePriorities()
|
||||||
auto step_(step->state.lock());
|
auto step_(step->state.lock());
|
||||||
step_->highestGlobalPriority = std::max(step_->highestGlobalPriority, globalPriority);
|
step_->highestGlobalPriority = std::max(step_->highestGlobalPriority, globalPriority);
|
||||||
step_->lowestBuildID = std::min(step_->lowestBuildID, id);
|
step_->lowestBuildID = std::min(step_->lowestBuildID, id);
|
||||||
|
step_->jobsets.insert(jobset);
|
||||||
}, toplevel);
|
}, toplevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -395,3 +396,39 @@ Step::ptr State::createStep(std::shared_ptr<StoreAPI> store, const Path & drvPat
|
||||||
|
|
||||||
return step;
|
return step;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Jobset::ptr State::createJobset(pqxx::work & txn,
|
||||||
|
const std::string & projectName, const std::string & jobsetName)
|
||||||
|
{
|
||||||
|
auto jobsets_(jobsets.lock());
|
||||||
|
|
||||||
|
auto p = std::make_pair(projectName, jobsetName);
|
||||||
|
|
||||||
|
auto i = jobsets_->find(p);
|
||||||
|
if (i != jobsets_->end()) return i->second;
|
||||||
|
|
||||||
|
auto res = txn.parameterized
|
||||||
|
("select schedulingShares from Jobsets where project = $1 and name = $2")
|
||||||
|
(projectName)(jobsetName).exec();
|
||||||
|
if (res.empty()) throw Error("missing jobset - can't happen");
|
||||||
|
|
||||||
|
auto shares = res[0]["schedulingShares"].as<unsigned int>();
|
||||||
|
if (shares == 0) shares = 1;
|
||||||
|
|
||||||
|
auto jobset = std::make_shared<Jobset>(shares);
|
||||||
|
|
||||||
|
/* Load the build steps from the last 24 hours. */
|
||||||
|
res = txn.parameterized
|
||||||
|
("select s.startTime, s.stopTime from BuildSteps s join Builds b on build = id "
|
||||||
|
"where s.startTime is not null and s.stopTime > $1 and project = $2 and jobset = $3")
|
||||||
|
(time(0) - Jobset::schedulingWindow * 10)(projectName)(jobsetName).exec();
|
||||||
|
for (auto const & row : res) {
|
||||||
|
time_t startTime = row["startTime"].as<time_t>();
|
||||||
|
time_t stopTime = row["stopTime"].as<time_t>();
|
||||||
|
jobset->addStep(startTime, stopTime - startTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
(*jobsets_)[p] = jobset;
|
||||||
|
return jobset;
|
||||||
|
}
|
||||||
|
|
|
@ -60,6 +60,40 @@ struct Step;
|
||||||
struct BuildOutput;
|
struct BuildOutput;
|
||||||
|
|
||||||
|
|
||||||
|
class Jobset
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef std::shared_ptr<Jobset> ptr;
|
||||||
|
typedef std::weak_ptr<Jobset> wptr;
|
||||||
|
|
||||||
|
Jobset(unsigned int shares) : shares(shares) { }
|
||||||
|
|
||||||
|
static const time_t schedulingWindow = 24 * 60 * 60;
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
std::atomic<time_t> seconds{0};
|
||||||
|
std::atomic<unsigned int> shares;
|
||||||
|
|
||||||
|
/* The start time and duration of the most recent build steps. */
|
||||||
|
Sync<std::map<time_t, time_t>> steps;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
double shareUsed()
|
||||||
|
{
|
||||||
|
return (double) seconds / shares;
|
||||||
|
}
|
||||||
|
|
||||||
|
time_t getSeconds() { return seconds; }
|
||||||
|
|
||||||
|
void addStep(time_t startTime, time_t duration);
|
||||||
|
|
||||||
|
void pruneSteps();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct Build
|
struct Build
|
||||||
{
|
{
|
||||||
typedef std::shared_ptr<Build> ptr;
|
typedef std::shared_ptr<Build> ptr;
|
||||||
|
@ -75,6 +109,8 @@ struct Build
|
||||||
|
|
||||||
std::shared_ptr<Step> toplevel;
|
std::shared_ptr<Step> toplevel;
|
||||||
|
|
||||||
|
Jobset::ptr jobset;
|
||||||
|
|
||||||
std::atomic_bool finishedInDB{false};
|
std::atomic_bool finishedInDB{false};
|
||||||
|
|
||||||
std::string fullJobName()
|
std::string fullJobName()
|
||||||
|
@ -110,6 +146,10 @@ struct Step
|
||||||
/* Builds that have this step as the top-level derivation. */
|
/* Builds that have this step as the top-level derivation. */
|
||||||
std::vector<Build::wptr> builds;
|
std::vector<Build::wptr> builds;
|
||||||
|
|
||||||
|
/* Jobsets to which this step belongs. Used for determining
|
||||||
|
scheduling priority. */
|
||||||
|
std::set<Jobset::ptr> jobsets;
|
||||||
|
|
||||||
/* Number of times we've tried this step. */
|
/* Number of times we've tried this step. */
|
||||||
unsigned int tries = 0;
|
unsigned int tries = 0;
|
||||||
|
|
||||||
|
@ -120,6 +160,10 @@ struct Step
|
||||||
step. */
|
step. */
|
||||||
int highestGlobalPriority{0};
|
int highestGlobalPriority{0};
|
||||||
|
|
||||||
|
/* The lowest share used of any jobset depending on this
|
||||||
|
step. */
|
||||||
|
double lowestShareUsed;
|
||||||
|
|
||||||
/* The lowest ID of any build depending on this step. */
|
/* The lowest ID of any build depending on this step. */
|
||||||
BuildID lowestBuildID{std::numeric_limits<BuildID>::max()};
|
BuildID lowestBuildID{std::numeric_limits<BuildID>::max()};
|
||||||
};
|
};
|
||||||
|
@ -203,6 +247,10 @@ private:
|
||||||
typedef std::map<BuildID, Build::ptr> Builds;
|
typedef std::map<BuildID, Build::ptr> Builds;
|
||||||
Sync<Builds> builds;
|
Sync<Builds> builds;
|
||||||
|
|
||||||
|
/* The jobsets. */
|
||||||
|
typedef std::map<std::pair<std::string, std::string>, Jobset::ptr> Jobsets;
|
||||||
|
Sync<Jobsets> jobsets;
|
||||||
|
|
||||||
/* All active or pending build steps (i.e. dependencies of the
|
/* All active or pending build steps (i.e. dependencies of the
|
||||||
queued builds). Note that these are weak pointers. Steps are
|
queued builds). Note that these are weak pointers. Steps are
|
||||||
kept alive by being reachable from Builds or by being in
|
kept alive by being reachable from Builds or by being in
|
||||||
|
@ -299,6 +347,9 @@ private:
|
||||||
Build::ptr referringBuild, Step::ptr referringStep, std::set<nix::Path> & finishedDrvs,
|
Build::ptr referringBuild, Step::ptr referringStep, std::set<nix::Path> & finishedDrvs,
|
||||||
std::set<Step::ptr> & newSteps, std::set<Step::ptr> & newRunnable);
|
std::set<Step::ptr> & newSteps, std::set<Step::ptr> & newRunnable);
|
||||||
|
|
||||||
|
Jobset::ptr createJobset(pqxx::work & txn,
|
||||||
|
const std::string & projectName, const std::string & jobsetName);
|
||||||
|
|
||||||
void makeRunnable(Step::ptr step);
|
void makeRunnable(Step::ptr step);
|
||||||
|
|
||||||
/* The thread that selects and starts runnable builds. */
|
/* The thread that selects and starts runnable builds. */
|
||||||
|
|
Loading…
Reference in a new issue