diff --git a/src/hydra-queue-runner/build-remote.cc b/src/hydra-queue-runner/build-remote.cc
index 65452be3..9466f009 100644
--- a/src/hydra-queue-runner/build-remote.cc
+++ b/src/hydra-queue-runner/build-remote.cc
@@ -373,10 +373,13 @@ void State::buildRemote(ref<Store> destStore,
                 % step->drvPath % machine->sshName % totalNarSize);
 
             /* Block until we have the required amount of memory
-               available. FIXME: only need this for binary cache
-               destination stores. */
+               available, which is twice the NAR size (namely the
+               uncompressed and worst-case compressed NAR), plus 150
+               MB for xz compression overhead. (The xz manpage claims
+               ~94 MiB, but that's not was I'm seeing.) */
             auto resStart = std::chrono::steady_clock::now();
-            auto memoryReservation(memoryTokens.get(totalNarSize));
+            size_t compressionCost = totalNarSize + 150 * 1024 * 1024;
+            result.tokens = std::make_unique<nix::TokenServer::Token>(memoryTokens.get(totalNarSize + compressionCost));
             auto resStop = std::chrono::steady_clock::now();
 
             auto resMs = std::chrono::duration_cast<std::chrono::milliseconds>(resStop - resStart).count();
@@ -390,6 +393,11 @@ void State::buildRemote(ref<Store> destStore,
             to.flush();
             destStore->importPaths(from, result.accessor, true);
 
+            /* Release the tokens pertaining to NAR
+               compression. After this we only have the uncompressed
+               NAR in memory. */
+            result.tokens->give_back(compressionCost);
+
             auto now2 = std::chrono::steady_clock::now();
 
             result.overhead += std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc
index f943ea6e..4e8f6cf3 100644
--- a/src/hydra-queue-runner/builder.cc
+++ b/src/hydra-queue-runner/builder.cc
@@ -179,6 +179,9 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
 
         if (result.stepStatus == bsSuccess)
             res = getBuildOutput(destStore, ref<FSAccessor>(result.accessor), step->drv);
+
+        result.accessor = 0;
+        result.tokens = 0;
     }
 
     time_t stepStopTime = time(0);
diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh
index b02491f2..347bbbb7 100644
--- a/src/hydra-queue-runner/state.hh
+++ b/src/hydra-queue-runner/state.hh
@@ -50,6 +50,7 @@ struct RemoteResult
     time_t startTime = 0, stopTime = 0;
     unsigned int overhead = 0;
     nix::Path logFile;
+    std::unique_ptr<nix::TokenServer::Token> tokens;
     std::shared_ptr<nix::FSAccessor> accessor;
 
     BuildStatus buildStatus()
diff --git a/src/hydra-queue-runner/token-server.hh b/src/hydra-queue-runner/token-server.hh
index 6c0cf716..7c6310ef 100644
--- a/src/hydra-queue-runner/token-server.hh
+++ b/src/hydra-queue-runner/token-server.hh
@@ -40,6 +40,7 @@ public:
         {
             if (tokens >= ts->maxTokens)
                 throw NoTokens(format("requesting more tokens (%d) than exist (%d)") % tokens % ts->maxTokens);
+            debug("acquiring %d tokens", tokens);
             auto inUse(ts->inUse.lock());
             while (*inUse + tokens > ts->maxTokens)
                 if (timeout) {
@@ -54,21 +55,38 @@ public:
 
     public:
 
-        Token(Token && t) : ts(t.ts) { t.ts = 0; }
+        Token(Token && t) : ts(t.ts), tokens(t.tokens), acquired(t.acquired)
+        {
+            t.ts = 0;
+            t.acquired = false;
+        }
         Token(const Token & l) = delete;
 
         ~Token()
         {
             if (!ts || !acquired) return;
-            {
-                auto inUse(ts->inUse.lock());
-                assert(*inUse >= tokens);
-                *inUse -= tokens;
-            }
-            ts->wakeup.notify_one();
+            give_back(tokens);
         }
 
         bool operator ()() { return acquired; }
+
+        void give_back(size_t t)
+        {
+            debug("returning %d tokens", t);
+            if (!t) return;
+            assert(acquired);
+            assert(t <= tokens);
+            {
+                auto inUse(ts->inUse.lock());
+                assert(*inUse >= t);
+                *inUse -= t;
+                tokens -= t;
+            }
+            // FIXME: inefficient. Should wake up waiters that can
+            // proceed now.
+            ts->wakeup.notify_all();
+        }
+
     };
 
     Token get(size_t tokens = 1, unsigned int timeout = 0)