diff --git a/doc/manual/src/configuration.md b/doc/manual/src/configuration.md index ec071f24..7aca17ef 100644 --- a/doc/manual/src/configuration.md +++ b/doc/manual/src/configuration.md @@ -102,6 +102,22 @@ in the hydra configuration file, as below: ``` +hydra-queue-runner's Prometheus service +--------------------------------------- + +hydra-queue-runner supports running a Prometheus webserver for metrics. The +exporter's address defaults to exposing on `127.0.0.1:9198`, but is also +configurable through the hydra configuration file and a command line argument, +as below. A port of `:0` will make the exposer choose a random, available port. + +```conf +queue_runner_exporter_address = 127.0.0.1:9198 +``` + +```shell +$ hydra-queue-runner --prometheus-address 127.0.0.1:9198 +``` + Using LDAP as authentication backend (optional) ----------------------------------------------- diff --git a/flake.lock b/flake.lock index b9dd6995..5c726a24 100644 --- a/flake.lock +++ b/flake.lock @@ -16,6 +16,22 @@ "type": "github" } }, + "newNixpkgs": { + "locked": { + "lastModified": 1647380550, + "narHash": "sha256-909TI9poX7CIUiFx203WL29YON6m/I6k0ExbZvR7bLM=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "6e3ee8957637a60f5072e33d78e05c0f65c54366", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable-small", + "repo": "nixpkgs", + "type": "github" + } + }, "nix": { "inputs": { "lowdown-src": "lowdown-src", @@ -67,6 +83,7 @@ }, "root": { "inputs": { + "newNixpkgs": "newNixpkgs", "nix": "nix", "nixpkgs": [ "nix", diff --git a/flake.nix b/flake.nix index d2b71724..b41dc8e0 100644 --- a/flake.nix +++ b/flake.nix @@ -1,10 +1,13 @@ { description = "A Nix-based continuous build system"; + # FIXME: All the pinned versions of nix/nixpkgs have a broken foreman (yes, + # even 2.7.0's Nixpkgs pin). + inputs.newNixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable-small"; inputs.nixpkgs.follows = "nix/nixpkgs"; #inputs.nix.url = github:NixOS/nix/2.7.0; - outputs = { self, nixpkgs, nix }: + outputs = { self, newNixpkgs, nixpkgs, nix }: let version = "${builtins.readFile ./version.txt}.${builtins.substring 0 8 (self.lastModifiedDate or "19700101")}.${self.shortRev or "DIRTY"}"; @@ -38,6 +41,12 @@ # A Nixpkgs overlay that provides a 'hydra' package. overlay = final: prev: { + # Overlay these packages to use dependencies from the Nixpkgs everything + # else uses, to side-step the version difference: glibc is 2.32 in the + # nix-pinned Nixpkgs, but 2.33 in the newNixpkgs commit. + civetweb = final.callPackage "${newNixpkgs}/pkgs/development/libraries/civetweb" { }; + prometheus-cpp = final.callPackage "${newNixpkgs}/pkgs/development/libraries/prometheus-cpp" { }; + # Add LDAP dependencies that aren't currently found within nixpkgs. perlPackages = prev.perlPackages // { TestPostgreSQL = final.perlPackages.buildPerlModule { @@ -61,7 +70,7 @@ }; }; - FunctionParameters = final.buildPerlPackage { + FunctionParameters = final.perlPackages.buildPerlPackage { pname = "Function-Parameters"; version = "2.001003"; src = final.fetchurl { @@ -75,7 +84,7 @@ }; }; - CatalystPluginPrometheusTiny = final.buildPerlPackage { + CatalystPluginPrometheusTiny = final.perlPackages.buildPerlPackage { pname = "Catalyst-Plugin-PrometheusTiny"; version = "0.005"; src = final.fetchurl { @@ -104,7 +113,7 @@ }; }; - CryptPassphrase = final.buildPerlPackage { + CryptPassphrase = final.perlPackages.buildPerlPackage { pname = "Crypt-Passphrase"; version = "0.003"; src = final.fetchurl { @@ -117,7 +126,7 @@ }; }; - CryptPassphraseArgon2 = final.buildPerlPackage { + CryptPassphraseArgon2 = final.perlPackages.buildPerlPackage { pname = "Crypt-Passphrase-Argon2"; version = "0.002"; src = final.fetchurl { @@ -131,7 +140,7 @@ }; }; - DataRandom = final.buildPerlPackage { + DataRandom = final.perlPackages.buildPerlPackage { pname = "Data-Random"; version = "0.13"; src = final.fetchurl { @@ -145,7 +154,7 @@ }; }; - DirSelf = final.buildPerlPackage { + DirSelf = final.perlPackages.buildPerlPackage { pname = "Dir-Self"; version = "0.11"; src = final.fetchurl { @@ -173,7 +182,7 @@ }; }; - PrometheusTiny = final.buildPerlPackage { + PrometheusTiny = final.perlPackages.buildPerlPackage { pname = "Prometheus-Tiny"; version = "0.007"; src = final.fetchurl { @@ -188,7 +197,7 @@ }; }; - PrometheusTinyShared = final.buildPerlPackage { + PrometheusTinyShared = final.perlPackages.buildPerlPackage { pname = "Prometheus-Tiny-Shared"; version = "0.023"; src = final.fetchurl { @@ -219,7 +228,7 @@ }; }; - TieHashMethod = final.buildPerlPackage { + TieHashMethod = final.perlPackages.buildPerlPackage { pname = "Tie-Hash-Method"; version = "0.02"; src = final.fetchurl { @@ -232,7 +241,7 @@ }; }; - Test2Harness = final.buildPerlPackage { + Test2Harness = final.perlPackages.buildPerlPackage { pname = "Test2-Harness"; version = "1.000042"; src = final.fetchurl { @@ -281,7 +290,7 @@ }; }; - LongJump = final.buildPerlPackage { + LongJump = final.perlPackages.buildPerlPackage { pname = "Long-Jump"; version = "0.000001"; src = final.fetchurl { @@ -295,7 +304,7 @@ }; }; - gotofile = final.buildPerlPackage { + gotofile = final.perlPackages.buildPerlPackage { pname = "goto-file"; version = "0.005"; src = final.fetchurl { @@ -425,7 +434,7 @@ }; }; - StringCompareConstantTime = final.buildPerlPackage { + StringCompareConstantTime = final.perlPackages.buildPerlPackage { pname = "String-Compare-ConstantTime"; version = "0.321"; src = final.fetchurl { @@ -438,7 +447,7 @@ }; }; - UUID4Tiny = final.buildPerlPackage { + UUID4Tiny = final.perlPackages.buildPerlPackage { pname = "UUID4-Tiny"; version = "0.002"; src = final.fetchurl { @@ -562,13 +571,16 @@ (if lib.versionAtLeast lib.version "20.03pre" then nlohmann_json else nlohmann_json.override { multipleHeaders = true; }) + prometheus-cpp ]; checkInputs = [ cacert - foreman + # FIXME: foreman is broken on all nix/nixpkgs pin, up to and + # including 2.7.0 + newNixpkgs.legacyPackages.${final.system}.foreman glibcLocales - netcat-openbsd + libressl.nc openldap python3 ]; diff --git a/src/hydra-queue-runner/Makefile.am b/src/hydra-queue-runner/Makefile.am index 57808608..117112f6 100644 --- a/src/hydra-queue-runner/Makefile.am +++ b/src/hydra-queue-runner/Makefile.am @@ -4,5 +4,5 @@ hydra_queue_runner_SOURCES = hydra-queue-runner.cc queue-monitor.cc dispatcher.c builder.cc build-result.cc build-remote.cc \ hydra-build-result.hh counter.hh state.hh db.hh \ nar-extractor.cc nar-extractor.hh -hydra_queue_runner_LDADD = $(NIX_LIBS) -lpqxx +hydra_queue_runner_LDADD = $(NIX_LIBS) -lpqxx -lprometheus-cpp-pull -lprometheus-cpp-core hydra_queue_runner_CXXFLAGS = $(NIX_CFLAGS) -Wall -I ../libhydra -Wno-deprecated-declarations diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 615e470b..723bf223 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -6,6 +6,8 @@ #include #include +#include + #include "state.hh" #include "hydra-build-result.hh" #include "store-api.hh" @@ -36,8 +38,55 @@ std::string getEnvOrDie(const std::string & key) return *value; } +State::PromMetrics::PromMetrics() + : registry(std::make_shared()) + , queue_checks_started( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_checks_started_total") + .Help("Number of times State::getQueuedBuilds() was started") + .Register(*registry) + .Add({}) + ) + , queue_build_loads( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_build_loads_total") + .Help("Number of builds loaded") + .Register(*registry) + .Add({}) + ) + , queue_steps_created( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_steps_created_total") + .Help("Number of steps created") + .Register(*registry) + .Add({}) + ) + , queue_checks_early_exits( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_checks_early_exits_total") + .Help("Number of times State::getQueuedBuilds() yielded to potential bumps") + .Register(*registry) + .Add({}) + ) + , queue_checks_finished( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_checks_finished_total") + .Help("Number of times State::getQueuedBuilds() was completed") + .Register(*registry) + .Add({}) + ) + , queue_max_id( + prometheus::BuildGauge() + .Name("hydraqueuerunner_queue_max_build_id_info") + .Help("Maximum build record ID in the queue") + .Register(*registry) + .Add({}) + ) +{ -State::State() +} + +State::State(std::optional metricsAddrOpt) : config(std::make_unique()) , maxUnsupportedTime(config->getIntOption("max_unsupported_time", 0)) , dbPool(config->getIntOption("max_db_connections", 128)) @@ -45,11 +94,16 @@ State::State() , maxLogSize(config->getIntOption("max_log_size", 64ULL << 20)) , uploadLogsToBinaryCache(config->getBoolOption("upload_logs_to_binary_cache", false)) , rootsDir(config->getStrOption("gc_roots_dir", fmt("%s/gcroots/per-user/%s/hydra-roots", settings.nixStateDir, getEnvOrDie("LOGNAME")))) + , metricsAddr(config->getStrOption("queue_runner_metrics_address", std::string{"127.0.0.1:9198"})) { hydraData = getEnvOrDie("HYDRA_DATA"); logDir = canonPath(hydraData + "/build-logs"); + if (metricsAddrOpt.has_value()) { + metricsAddr = metricsAddrOpt.value(); + } + /* handle deprecated store specification */ if (config->getStrOption("store_mode") != "") throw Error("store_mode in hydra.conf is deprecated, please use store_uri"); @@ -754,6 +808,18 @@ void State::run(BuildID buildOne) if (!lock) throw Error("hydra-queue-runner is already running"); + std::cout << "Starting the Prometheus exporter on " << metricsAddr << std::endl; + + /* Set up simple exporter, to show that we're still alive. */ + prometheus::Exposer promExposer{metricsAddr}; + auto exposerPort = promExposer.GetListeningPorts().front(); + + promExposer.RegisterCollectable(prom.registry); + + std::cout << "Started the Prometheus exporter, listening on " + << metricsAddr << "/metrics (port " << exposerPort << ")" + << std::endl; + Store::Params localParams; localParams["max-connections"] = "16"; localParams["max-connection-age"] = "600"; @@ -864,6 +930,7 @@ int main(int argc, char * * argv) bool unlock = false; bool status = false; BuildID buildOne = 0; + std::optional metricsAddrOpt = std::nullopt; parseCmdLine(argc, argv, [&](Strings::iterator & arg, const Strings::iterator & end) { if (*arg == "--unlock") @@ -875,6 +942,8 @@ int main(int argc, char * * argv) buildOne = *b; else throw Error("β€˜--build-one’ requires a build ID"); + } else if (*arg == "--prometheus-address") { + metricsAddrOpt = getArg(*arg, arg, end); } else return false; return true; @@ -883,7 +952,7 @@ int main(int argc, char * * argv) settings.verboseBuild = true; settings.lockCPU = false; - State state; + State state{metricsAddrOpt}; if (status) state.showStatus(); else if (unlock) diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index 6a5a82db..3bde0d99 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -82,6 +82,8 @@ struct PreviousFailure : public std::exception { bool State::getQueuedBuilds(Connection & conn, ref destStore, unsigned int & lastBuildId) { + prom.queue_checks_started.Increment(); + printInfo("checking the queue for builds > %d...", lastBuildId); /* Grab the queued builds from the database, but don't process @@ -107,7 +109,10 @@ bool State::getQueuedBuilds(Connection & conn, auto builds_(builds.lock()); BuildID id = row["id"].as(); if (buildOne && id != buildOne) continue; - if (id > newLastBuildId) newLastBuildId = id; + if (id > newLastBuildId) { + newLastBuildId = id; + prom.queue_max_id.Set(id); + } if (builds_->count(id)) continue; auto build = std::make_shared( @@ -136,6 +141,7 @@ bool State::getQueuedBuilds(Connection & conn, std::set finishedDrvs; createBuild = [&](Build::ptr build) { + prom.queue_build_loads.Increment(); printMsg(lvlTalkative, format("loading build %1% (%2%)") % build->id % build->fullJobName()); nrAdded++; newBuildsByID.erase(build->id); @@ -306,9 +312,14 @@ bool State::getQueuedBuilds(Connection & conn, /* Stop after a certain time to allow priority bumps to be processed. */ - if (std::chrono::system_clock::now() > start + std::chrono::seconds(600)) break; + if (std::chrono::system_clock::now() > start + std::chrono::seconds(600)) { + prom.queue_checks_early_exits.Increment(); + break; + } } + prom.queue_checks_finished.Increment(); + lastBuildId = newBuildsByID.empty() ? newLastBuildId : newBuildsByID.begin()->first - 1; return newBuildsByID.empty(); } @@ -437,6 +448,8 @@ Step::ptr State::createStep(ref destStore, if (!isNew) return step; + prom.queue_steps_created.Increment(); + printMsg(lvlDebug, "considering derivation β€˜%1%’", localStore->printStorePath(drvPath)); /* Initialize the step. Note that the step may be visible in diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 7c375cb9..47e74f55 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -7,6 +7,10 @@ #include #include +#include +#include +#include + #include "db.hh" #include "parsed-derivations.hh" @@ -433,8 +437,25 @@ private: via gc_roots_dir. */ nix::Path rootsDir; + std::string metricsAddr; + + struct PromMetrics + { + std::shared_ptr registry; + + prometheus::Counter& queue_checks_started; + prometheus::Counter& queue_build_loads; + prometheus::Counter& queue_steps_created; + prometheus::Counter& queue_checks_early_exits; + prometheus::Counter& queue_checks_finished; + prometheus::Gauge& queue_max_id; + + PromMetrics(); + }; + PromMetrics prom; + public: - State(); + State(std::optional metricsAddrOpt); private: @@ -544,6 +565,8 @@ private: void addRoot(const nix::StorePath & storePath); + void runMetricsExporter(); + public: void showStatus(); diff --git a/t/Hydra/Config/include.t b/t/Hydra/Config/include.t index fe2dd1ed..14f657ff 100644 --- a/t/Hydra/Config/include.t +++ b/t/Hydra/Config/include.t @@ -20,6 +20,7 @@ write_file($ctx{'tmpdir'} . "/bar.conf", q| |); is(getHydraConfig(), { + queue_runner_metrics_address => "127.0.0.1:0", foo => { bar => "baz" } }, "Nested includes work."); diff --git a/t/lib/HydraTestContext.pm b/t/lib/HydraTestContext.pm index 237fcbe4..badb3728 100644 --- a/t/lib/HydraTestContext.pm +++ b/t/lib/HydraTestContext.pm @@ -51,6 +51,7 @@ sub new { $ENV{'HYDRA_CONFIG'} = "$dir/hydra.conf"; my $hydra_config = $opts{'hydra_config'} || ""; + $hydra_config = "queue_runner_metrics_address = 127.0.0.1:0\n" . $hydra_config; if ($opts{'use_external_destination_store'} // 1) { $hydra_config = "store_uri = file:$dir/nix/dest-store\n" . $hydra_config; }