forked from lix-project/hydra
queue-runner: release machine reservation while copying outputs
This allows for better builder usage when the queue runner is busy. To avoid running into uncontrollable imbalances between builder/queue runner, we only release the machine reservation after the local throttler has found a slot to start copying the outputs for that build.
This commit is contained in:
parent
026e3a3103
commit
5c3e508e55
3 changed files with 22 additions and 9 deletions
|
@ -506,6 +506,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
void State::buildRemote(ref<Store> destStore,
|
void State::buildRemote(ref<Store> destStore,
|
||||||
|
MachineReservation::ptr & reservation,
|
||||||
::Machine::ptr machine, Step::ptr step,
|
::Machine::ptr machine, Step::ptr step,
|
||||||
const BuildOptions & buildOptions,
|
const BuildOptions & buildOptions,
|
||||||
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep,
|
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep,
|
||||||
|
@ -630,6 +631,15 @@ void State::buildRemote(ref<Store> destStore,
|
||||||
}
|
}
|
||||||
SemaphoreReleaser releaser(&localWorkThrottler);
|
SemaphoreReleaser releaser(&localWorkThrottler);
|
||||||
|
|
||||||
|
/* Once we've started copying outputs, release the machine reservation
|
||||||
|
* so further builds can happen. We do not release the machine earlier
|
||||||
|
* to avoid situations where the queue runner is bottlenecked on
|
||||||
|
* copying outputs and we end up building too many things that we
|
||||||
|
* haven't been able to allow copy slots for. */
|
||||||
|
assert(reservation.unique());
|
||||||
|
reservation = 0;
|
||||||
|
wakeDispatcher();
|
||||||
|
|
||||||
StorePathSet outputs;
|
StorePathSet outputs;
|
||||||
for (auto & [_, realisation] : buildResult.builtOutputs)
|
for (auto & [_, realisation] : buildResult.builtOutputs)
|
||||||
outputs.insert(realisation.outPath);
|
outputs.insert(realisation.outPath);
|
||||||
|
|
|
@ -46,10 +46,12 @@ void State::builder(MachineReservation::ptr reservation)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Release the machine and wake up the dispatcher. */
|
/* If the machine hasn't been released yet, release and wake up the dispatcher. */
|
||||||
assert(reservation.unique());
|
if (reservation) {
|
||||||
reservation = 0;
|
assert(reservation.unique());
|
||||||
wakeDispatcher();
|
reservation = 0;
|
||||||
|
wakeDispatcher();
|
||||||
|
}
|
||||||
|
|
||||||
/* If there was a temporary failure, retry the step after an
|
/* If there was a temporary failure, retry the step after an
|
||||||
exponentially increasing interval. */
|
exponentially increasing interval. */
|
||||||
|
@ -72,11 +74,11 @@ void State::builder(MachineReservation::ptr reservation)
|
||||||
|
|
||||||
|
|
||||||
State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
MachineReservation::ptr reservation,
|
MachineReservation::ptr & reservation,
|
||||||
std::shared_ptr<ActiveStep> activeStep)
|
std::shared_ptr<ActiveStep> activeStep)
|
||||||
{
|
{
|
||||||
auto & step(reservation->step);
|
auto step(reservation->step);
|
||||||
auto & machine(reservation->machine);
|
auto machine(reservation->machine);
|
||||||
|
|
||||||
{
|
{
|
||||||
auto step_(step->state.lock());
|
auto step_(step->state.lock());
|
||||||
|
@ -208,7 +210,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||||
|
|
||||||
try {
|
try {
|
||||||
/* FIXME: referring builds may have conflicting timeouts. */
|
/* FIXME: referring builds may have conflicting timeouts. */
|
||||||
buildRemote(destStore, machine, step, buildOptions, result, activeStep, updateStep, narMembers);
|
buildRemote(destStore, reservation, machine, step, buildOptions, result, activeStep, updateStep, narMembers);
|
||||||
} catch (Error & e) {
|
} catch (Error & e) {
|
||||||
if (activeStep->state_.lock()->cancelled) {
|
if (activeStep->state_.lock()->cancelled) {
|
||||||
printInfo("marking step %d of build %d as cancelled", stepNr, buildId);
|
printInfo("marking step %d of build %d as cancelled", stepNr, buildId);
|
||||||
|
|
|
@ -594,10 +594,11 @@ private:
|
||||||
retried. */
|
retried. */
|
||||||
enum StepResult { sDone, sRetry, sMaybeCancelled };
|
enum StepResult { sDone, sRetry, sMaybeCancelled };
|
||||||
StepResult doBuildStep(nix::ref<nix::Store> destStore,
|
StepResult doBuildStep(nix::ref<nix::Store> destStore,
|
||||||
MachineReservation::ptr reservation,
|
MachineReservation::ptr & reservation,
|
||||||
std::shared_ptr<ActiveStep> activeStep);
|
std::shared_ptr<ActiveStep> activeStep);
|
||||||
|
|
||||||
void buildRemote(nix::ref<nix::Store> destStore,
|
void buildRemote(nix::ref<nix::Store> destStore,
|
||||||
|
MachineReservation::ptr & reservation,
|
||||||
Machine::ptr machine, Step::ptr step,
|
Machine::ptr machine, Step::ptr step,
|
||||||
const BuildOptions & buildOptions,
|
const BuildOptions & buildOptions,
|
||||||
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep,
|
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep,
|
||||||
|
|
Loading…
Reference in a new issue