queue-runner: limit parallelism of CPU intensive operations

My current theory is that running more parallel xz than available CPU cores is reducing our overall throughput by requiring more scheduling overhead and more cache thrashing.
2024-04-11 15:03:23 +02:00 · 2024-04-11 15:03:23 +02:00 · a51bd392a2
parent a596d6c3c1
commit a51bd392a2
4 changed files with 27 additions and 0 deletions
--- a/src/hydra-queue-runner/build-remote.cc
+++ b/src/hydra-queue-runner/build-remote.cc
@ -494,6 +494,16 @@ void RemoteResult::updateWithBuildResult(const nix::BuildResult & buildResult)
 }
 /* Utility guard object to auto-release a semaphore on destruction. */
 template <typename T>
 class SemaphoreReleaser {
 public:
    SemaphoreReleaser(T* s) : sem(s) {}
    ~SemaphoreReleaser() { sem->release(); }
 private:
    T* sem;
 };
 void State::buildRemote(ref<Store> destStore,
    ::Machine::ptr machine, Step::ptr step,
@ -612,6 +622,14 @@ void State::buildRemote(ref<Store> destStore,
            result.logFile = "";
        }
        /* Throttle CPU-bound work. Opportunistically skip updating the current
         * step, since this requires a DB roundtrip. */
        if (!localWorkThrottler.try_acquire()) {
            updateStep(ssWaitingForLocalSlot);
            localWorkThrottler.acquire();
        }
        SemaphoreReleaser releaser(&localWorkThrottler);
        StorePathSet outputs;
        for (auto & [_, realisation] : buildResult.builtOutputs)
            outputs.insert(realisation.outPath);
--- a/src/hydra-queue-runner/hydra-queue-runner.cc
+++ b/src/hydra-queue-runner/hydra-queue-runner.cc
@ -85,6 +85,7 @@ State::State(std::optional<std::string> metricsAddrOpt)
    : config(std::make_unique<HydraConfig>())
    , maxUnsupportedTime(config->getIntOption("max_unsupported_time", 0))
    , dbPool(config->getIntOption("max_db_connections", 128))
    , localWorkThrottler(config->getIntOption("max_local_worker_threads", std::min(maxSupportedLocalWorkers, std::max(4u, std::thread::hardware_concurrency()) - 2)))
    , maxOutputSize(config->getIntOption("max_output_size", 2ULL << 30))
    , maxLogSize(config->getIntOption("max_log_size", 64ULL << 20))
    , uploadLogsToBinaryCache(config->getBoolOption("upload_logs_to_binary_cache", false))
--- a/src/hydra-queue-runner/state.hh
+++ b/src/hydra-queue-runner/state.hh
@ -7,6 +7,7 @@
 #include <memory>
 #include <queue>
 #include <regex>
 #include <semaphore>
 #include <prometheus/counter.h>
 #include <prometheus/gauge.h>
@ -56,6 +57,7 @@ typedef enum {
    ssConnecting = 10,
    ssSendingInputs = 20,
    ssBuilding = 30,
    ssWaitingForLocalSlot = 35,
    ssReceivingOutputs = 40,
    ssPostProcessing = 50,
 } StepState;
@ -387,6 +389,10 @@ private:
    typedef std::map<std::string, Machine::ptr> Machines;
    nix::Sync<Machines> machines; // FIXME: use atomic_shared_ptr
    /* Throttler for CPU-bound local work. */
    static constexpr unsigned int maxSupportedLocalWorkers = 1024;
    std::counting_semaphore<maxSupportedLocalWorkers> localWorkThrottler;
    /* Various stats. */
    time_t startedAt;
    counter nrBuildsRead{0};
--- a/src/root/build.tt
+++ b/src/root/build.tt
@ -69,6 +69,8 @@ END;
                  <strong>Sending inputs</strong>
                [% ELSIF step.busy == 30 %]
                  <strong>Building</strong>
                [% ELSIF step.busy == 35 %]
                  <strong>Waiting to receive outputs</strong>
                [% ELSIF step.busy == 40 %]
                  <strong>Receiving outputs</strong>
                [% ELSIF step.busy == 50 %]