From d571e44b86f8b166d835f6fd413fbf612dda8e26 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 17 Aug 2015 13:50:41 +0200 Subject: [PATCH] Keep stats for the Hydra auto scaler "hydra-queue-runner --status" now prints how many runnable and running build steps exist for each machine type. This allows additional machines to be provisioned based on the Hydra load. --- src/hydra-queue-runner/builder.cc | 8 +-- src/hydra-queue-runner/counter.hh | 2 +- src/hydra-queue-runner/dispatcher.cc | 52 ++++++++++++++++++-- src/hydra-queue-runner/hydra-queue-runner.cc | 15 ++++++ src/hydra-queue-runner/state.hh | 22 ++++++++- 5 files changed, 91 insertions(+), 8 deletions(-) diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc index 2867f1f0..322863f8 100644 --- a/src/hydra-queue-runner/builder.cc +++ b/src/hydra-queue-runner/builder.cc @@ -6,18 +6,20 @@ using namespace nix; -void State::builder(Step::ptr step, Machine::ptr machine, std::shared_ptr reservation) +void State::builder(MachineReservation::ptr reservation) { bool retry = true; MaintainCount mc(nrActiveSteps); + auto step = reservation->step; + try { auto store = openStore(); // FIXME: pool - retry = doBuildStep(store, step, machine); + retry = doBuildStep(store, step, reservation->machine); } catch (std::exception & e) { printMsg(lvlError, format("uncaught exception building ‘%1%’ on ‘%2%’: %3%") - % step->drvPath % machine->sshName % e.what()); + % step->drvPath % reservation->machine->sshName % e.what()); } /* Release the machine and wake up the dispatcher. */ diff --git a/src/hydra-queue-runner/counter.hh b/src/hydra-queue-runner/counter.hh index 912cb499..1943d1c3 100644 --- a/src/hydra-queue-runner/counter.hh +++ b/src/hydra-queue-runner/counter.hh @@ -8,5 +8,5 @@ struct MaintainCount { counter & c; MaintainCount(counter & c) : c(c) { c++; } - ~MaintainCount() { c--; } + ~MaintainCount() { auto prev = c--; assert(prev); } }; diff --git a/src/hydra-queue-runner/dispatcher.cc b/src/hydra-queue-runner/dispatcher.cc index bb50948f..ecc68116 100644 --- a/src/hydra-queue-runner/dispatcher.cc +++ b/src/hydra-queue-runner/dispatcher.cc @@ -1,5 +1,6 @@ #include #include +#include #include "state.hh" @@ -142,6 +143,7 @@ system_time State::doDispatch() FIXME: O(n lg n); obviously, it would be better to keep a runnable queue sorted by priority. */ std::vector runnableSorted; + std::unordered_map runnablePerType; { auto runnable_(runnable.lock()); runnableSorted.reserve(runnable_->size()); @@ -156,6 +158,8 @@ system_time State::doDispatch() ++i; + runnablePerType[step->drv.platform]++; + /* Skip previously failed steps that aren't ready to be retried. */ { @@ -215,13 +219,14 @@ system_time State::doDispatch() break; } else ++i; assert(removed); + assert(runnablePerType[step->drv.platform]); + runnablePerType[step->drv.platform]--; } /* Make a slot reservation and start a thread to do the build. */ - auto reservation = std::make_shared(mi.machine->state->currentJobs); - - auto builderThread = std::thread(&State::builder, this, step, mi.machine, reservation); + auto builderThread = std::thread(&State::builder, this, + std::make_shared(*this, step, mi.machine)); builderThread.detach(); // FIXME? keepGoing = true; @@ -231,6 +236,17 @@ system_time State::doDispatch() if (keepGoing) break; } + /* Update the stats for the auto-scaler. */ + { + auto machineTypes_(machineTypes.lock()); + + for (auto & i : *machineTypes_) + i.second.runnable = 0; + + for (auto & i : runnablePerType) + (*machineTypes_)[i.first].runnable = i.second; + } + } while (keepGoing); return sleepUntil; @@ -266,3 +282,33 @@ void Jobset::pruneSteps() steps_->erase(i); } } + + +State::MachineReservation::MachineReservation(State & state, Step::ptr step, Machine::ptr machine) + : state(state), step(step), machine(machine) +{ + machine->state->currentJobs++; + + { + auto machineTypes_(state.machineTypes.lock()); + (*machineTypes_)[step->drv.platform].running++; + } +} + + +State::MachineReservation::~MachineReservation() +{ + auto prev = machine->state->currentJobs--; + assert(prev); + if (prev == 1) + machine->state->idleSince = time(0); + + { + auto machineTypes_(state.machineTypes.lock()); + auto & machineType = (*machineTypes_)[step->drv.platform]; + assert(machineType.running); + machineType.running--; + if (machineType.running == 0) + machineType.lastActive = time(0); + } +} diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 3fe608fc..61508ae6 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -453,6 +453,8 @@ void State::dumpStatus(Connection & conn, bool log) nested.attr(m->sshName); JSONObject nested2(out); nested2.attr("currentJobs", s->currentJobs); + if (s->currentJobs == 0) + nested2.attr("idleSince", s->idleSince); nested2.attr("nrStepsDone", s->nrStepsDone); if (m->state->nrStepsDone) { nested2.attr("totalStepTime", s->totalStepTime); @@ -473,6 +475,19 @@ void State::dumpStatus(Connection & conn, bool log) nested2.attr("seconds", jobset.second->getSeconds()); } } + { + root.attr("machineTypes"); + JSONObject nested(out); + auto machineTypes_(machineTypes.lock()); + for (auto & i : *machineTypes_) { + nested.attr(i.first); + JSONObject nested2(out); + nested2.attr("runnable", i.second.runnable); + nested2.attr("running", i.second.running); + if (i.second.running == 0) + nested2.attr("lastActive", i.second.lastActive); + } + } } if (log) printMsg(lvlInfo, format("status: %1%") % out.str()); diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 060e2d1c..db81a290 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -208,6 +208,7 @@ struct Machine counter nrStepsDone{0}; counter totalStepTime{0}; // total time for steps, including closure copying counter totalStepBuildTime{0}; // total build time for steps + std::atomic idleSince{0}; struct ConnectInfo { @@ -318,6 +319,25 @@ private: /* Specific build to do for --build-one (testing only). */ BuildID buildOne; + /* Statistics per machine type for the Hydra auto-scaler. */ + struct MachineType + { + unsigned int runnable{0}, running{0}; + time_t lastActive{0}; + }; + + Sync> machineTypes; + + struct MachineReservation + { + typedef std::shared_ptr ptr; + State & state; + Step::ptr step; + Machine::ptr machine; + MachineReservation(State & state, Step::ptr step, Machine::ptr machine); + ~MachineReservation(); + }; + public: State(); @@ -369,7 +389,7 @@ private: void wakeDispatcher(); - void builder(Step::ptr step, Machine::ptr machine, std::shared_ptr reservation); + void builder(MachineReservation::ptr reservation); /* Perform the given build step. Return true if the step is to be retried. */