diff --git a/src/hydra-queue-runner/build-remote.cc b/src/hydra-queue-runner/build-remote.cc index 65452be3..9466f009 100644 --- a/src/hydra-queue-runner/build-remote.cc +++ b/src/hydra-queue-runner/build-remote.cc @@ -373,10 +373,13 @@ void State::buildRemote(ref destStore, % step->drvPath % machine->sshName % totalNarSize); /* Block until we have the required amount of memory - available. FIXME: only need this for binary cache - destination stores. */ + available, which is twice the NAR size (namely the + uncompressed and worst-case compressed NAR), plus 150 + MB for xz compression overhead. (The xz manpage claims + ~94 MiB, but that's not was I'm seeing.) */ auto resStart = std::chrono::steady_clock::now(); - auto memoryReservation(memoryTokens.get(totalNarSize)); + size_t compressionCost = totalNarSize + 150 * 1024 * 1024; + result.tokens = std::make_unique(memoryTokens.get(totalNarSize + compressionCost)); auto resStop = std::chrono::steady_clock::now(); auto resMs = std::chrono::duration_cast(resStop - resStart).count(); @@ -390,6 +393,11 @@ void State::buildRemote(ref destStore, to.flush(); destStore->importPaths(from, result.accessor, true); + /* Release the tokens pertaining to NAR + compression. After this we only have the uncompressed + NAR in memory. */ + result.tokens->give_back(compressionCost); + auto now2 = std::chrono::steady_clock::now(); result.overhead += std::chrono::duration_cast(now2 - now1).count(); diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc index f943ea6e..4e8f6cf3 100644 --- a/src/hydra-queue-runner/builder.cc +++ b/src/hydra-queue-runner/builder.cc @@ -179,6 +179,9 @@ State::StepResult State::doBuildStep(nix::ref destStore, if (result.stepStatus == bsSuccess) res = getBuildOutput(destStore, ref(result.accessor), step->drv); + + result.accessor = 0; + result.tokens = 0; } time_t stepStopTime = time(0); diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index b02491f2..347bbbb7 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -50,6 +50,7 @@ struct RemoteResult time_t startTime = 0, stopTime = 0; unsigned int overhead = 0; nix::Path logFile; + std::unique_ptr tokens; std::shared_ptr accessor; BuildStatus buildStatus() diff --git a/src/hydra-queue-runner/token-server.hh b/src/hydra-queue-runner/token-server.hh index 6c0cf716..7c6310ef 100644 --- a/src/hydra-queue-runner/token-server.hh +++ b/src/hydra-queue-runner/token-server.hh @@ -40,6 +40,7 @@ public: { if (tokens >= ts->maxTokens) throw NoTokens(format("requesting more tokens (%d) than exist (%d)") % tokens % ts->maxTokens); + debug("acquiring %d tokens", tokens); auto inUse(ts->inUse.lock()); while (*inUse + tokens > ts->maxTokens) if (timeout) { @@ -54,21 +55,38 @@ public: public: - Token(Token && t) : ts(t.ts) { t.ts = 0; } + Token(Token && t) : ts(t.ts), tokens(t.tokens), acquired(t.acquired) + { + t.ts = 0; + t.acquired = false; + } Token(const Token & l) = delete; ~Token() { if (!ts || !acquired) return; - { - auto inUse(ts->inUse.lock()); - assert(*inUse >= tokens); - *inUse -= tokens; - } - ts->wakeup.notify_one(); + give_back(tokens); } bool operator ()() { return acquired; } + + void give_back(size_t t) + { + debug("returning %d tokens", t); + if (!t) return; + assert(acquired); + assert(t <= tokens); + { + auto inUse(ts->inUse.lock()); + assert(*inUse >= t); + *inUse -= t; + tokens -= t; + } + // FIXME: inefficient. Should wake up waiters that can + // proceed now. + ts->wakeup.notify_all(); + } + }; Token get(size_t tokens = 1, unsigned int timeout = 0)