From 5bbaa18a8f0e4271a3ae8a1a8adda40142ec2b4b Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Thu, 10 Mar 2022 12:20:44 -0800 Subject: [PATCH 01/24] flake: fix foreman execution [vin@scadrial:~/workspace/vcs/hydra]$ foreman -h Warning: the running version of Bundler (2.1.4) is older than the version that created the lockfile (2.2.20). We suggest you to upgrade to the version that created the lockfile by running `gem install bundler:2.2.20`. Traceback (most recent call last): 2: from /nix/store/ycshcdssxcj9sjf6yzb1ydw4fcglf66y-foreman-0.87.2/bin/foreman:20:in `
' 1: from /nix/store/ggqacj06n6qfm1iww0bih9ph0j89wcna-bundler-2.1.4/lib/ruby/gems/2.7.0/gems/bundler-2.1.4/lib/bundler/rubygems_integration.rb:413:in `block in replace_bin_path' /nix/store/ggqacj06n6qfm1iww0bih9ph0j89wcna-bundler-2.1.4/lib/ruby/gems/2.7.0/gems/bundler-2.1.4/lib/bundler/rubygems_integration.rb:374:in `block in replace_bin_path': can't find executable foreman for gem foreman. foreman is not currently included in the bundle, perhaps you meant to add it to your Gemfile? (Gem::Exception) --- flake.lock | 17 +++++++++++++++++ flake.nix | 9 +++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/flake.lock b/flake.lock index e4bf8c71..684243af 100644 --- a/flake.lock +++ b/flake.lock @@ -16,6 +16,22 @@ "type": "github" } }, + "newNixpkgs": { + "locked": { + "lastModified": 1646588256, + "narHash": "sha256-ZHljmNlt19nSm0Mz8fx6QEhddKUkU4hhwFmfNmGn+EY=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "2ebb6c1e5ae402ba35cca5eec58385e5f1adea04", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-21.11", + "repo": "nixpkgs", + "type": "github" + } + }, "nix": { "inputs": { "lowdown-src": "lowdown-src", @@ -69,6 +85,7 @@ }, "root": { "inputs": { + "newNixpkgs": "newNixpkgs", "nix": "nix", "nixpkgs": [ "nix", diff --git a/flake.nix b/flake.nix index d69048ed..2c4a4ddb 100644 --- a/flake.nix +++ b/flake.nix @@ -1,10 +1,13 @@ { description = "A Nix-based continuous build system"; + # FIXME: All the pinned versions of nix/nixpkgs have a broken foreman (yes, + # even 2.7.0's Nixpkgs pin). + inputs.newNixpkgs.url = "github:NixOS/nixpkgs/nixos-21.11"; inputs.nixpkgs.follows = "nix/nixpkgs"; inputs.nix.url = github:NixOS/nix/2.6.0; - outputs = { self, nixpkgs, nix }: + outputs = { self, newNixpkgs, nixpkgs, nix }: let version = "${builtins.readFile ./version.txt}.${builtins.substring 0 8 self.lastModifiedDate}.${self.shortRev or "DIRTY"}"; @@ -566,7 +569,9 @@ checkInputs = [ cacert - foreman + # FIXME: foreman is broken on all nix/nixpkgs pin, up to and + # including 2.7.0 + newNixpkgs.legacyPackages.${final.system}.foreman glibcLocales netcat-openbsd openldap From 3bf31bd6a6e27be79245496598b64fabb01bcf41 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Thu, 10 Mar 2022 12:21:30 -0800 Subject: [PATCH 02/24] hydra-queue-runner: add simple "up" exporter There are probably better ways to achieve this (and will likely need to be refactored a bit to support further metrics). --- flake.nix | 1 + src/hydra-queue-runner/Makefile.am | 2 +- src/hydra-queue-runner/hydra-queue-runner.cc | 23 ++++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/flake.nix b/flake.nix index 2c4a4ddb..175aa406 100644 --- a/flake.nix +++ b/flake.nix @@ -565,6 +565,7 @@ (if lib.versionAtLeast lib.version "20.03pre" then nlohmann_json else nlohmann_json.override { multipleHeaders = true; }) + prometheus-cpp ]; checkInputs = [ diff --git a/src/hydra-queue-runner/Makefile.am b/src/hydra-queue-runner/Makefile.am index ea852334..6e01ef85 100644 --- a/src/hydra-queue-runner/Makefile.am +++ b/src/hydra-queue-runner/Makefile.am @@ -4,5 +4,5 @@ hydra_queue_runner_SOURCES = hydra-queue-runner.cc queue-monitor.cc dispatcher.c builder.cc build-result.cc build-remote.cc \ build-result.hh counter.hh state.hh db.hh \ nar-extractor.cc nar-extractor.hh -hydra_queue_runner_LDADD = $(NIX_LIBS) -lpqxx +hydra_queue_runner_LDADD = $(NIX_LIBS) -lpqxx -lprometheus-cpp-pull -lprometheus-cpp-core hydra_queue_runner_CXXFLAGS = $(NIX_CFLAGS) -Wall -I ../libhydra -Wno-deprecated-declarations diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 3297730c..af7ec28d 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -6,6 +6,10 @@ #include #include +#include +#include +#include + #include "state.hh" #include "build-result.hh" #include "store-api.hh" @@ -854,6 +858,25 @@ int main(int argc, char * * argv) return handleExceptions(argv[0], [&]() { initNix(); + /* Export a simple "up" metric, to allow monitoring that we're + still alive. */ + std::thread([&]() { + prometheus::Exposer exposer{"127.0.0.1:8080"}; + + // @note it's the users responsibility to keep the object alive + auto registry = std::make_shared(); + + auto& running = prometheus::BuildGauge() + .Name("hydra_queue_runner_running") + .Help("Whether the queue runner is currently running") + .Register(*registry); + + exposer.RegisterCollectable(registry); + running.Add({}).Set(1); + + while (true) { } + }).detach(); + signal(SIGINT, SIG_DFL); signal(SIGTERM, SIG_DFL); signal(SIGHUP, SIG_DFL); From a0cb73579d48aca3431a9294088a3e37f2c52323 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Fri, 11 Mar 2022 11:50:44 -0800 Subject: [PATCH 03/24] flake: update newNixpkgs for newer prometheus-cpp --- flake.lock | 8 ++++---- flake.nix | 9 ++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/flake.lock b/flake.lock index 684243af..586c08e0 100644 --- a/flake.lock +++ b/flake.lock @@ -18,17 +18,17 @@ }, "newNixpkgs": { "locked": { - "lastModified": 1646588256, - "narHash": "sha256-ZHljmNlt19nSm0Mz8fx6QEhddKUkU4hhwFmfNmGn+EY=", + "lastModified": 1647023429, + "narHash": "sha256-LdMTXEgW+G1LXrGrME1b1CpTC6/r+meFZDHeXR2Ps40=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "2ebb6c1e5ae402ba35cca5eec58385e5f1adea04", + "rev": "9b095223a5dc9a6bce6ec54477f31194871eca8e", "type": "github" }, "original": { "owner": "NixOS", - "ref": "nixos-21.11", "repo": "nixpkgs", + "rev": "9b095223a5dc9a6bce6ec54477f31194871eca8e", "type": "github" } }, diff --git a/flake.nix b/flake.nix index 175aa406..7ea0ce35 100644 --- a/flake.nix +++ b/flake.nix @@ -3,7 +3,8 @@ # FIXME: All the pinned versions of nix/nixpkgs have a broken foreman (yes, # even 2.7.0's Nixpkgs pin). - inputs.newNixpkgs.url = "github:NixOS/nixpkgs/nixos-21.11"; + # FIXME: has updated prometheus-cpp: https://github.com/NixOS/nixpkgs/pull/163695 + inputs.newNixpkgs.url = "github:NixOS/nixpkgs/9b095223a5dc9a6bce6ec54477f31194871eca8e"; inputs.nixpkgs.follows = "nix/nixpkgs"; inputs.nix.url = github:NixOS/nix/2.6.0; @@ -41,6 +42,12 @@ # A Nixpkgs overlay that provides a 'hydra' package. overlay = final: prev: { + # Overlay these packages to use dependencies from the Nixpkgs everything + # else uses, to side-step the version difference: glibc is 2.32 in the + # nix-pinned Nixpkgs, but 2.33 in the newNixpkgs commit. + civetweb = final.callPackage "${newNixpkgs}/pkgs/development/libraries/civetweb" { }; + prometheus-cpp = final.callPackage "${newNixpkgs}/pkgs/development/libraries/prometheus-cpp" { }; + # Add LDAP dependencies that aren't currently found within nixpkgs. perlPackages = prev.perlPackages // { TestPostgreSQL = final.perlPackages.buildPerlModule { From 6e6475d860269b5da31f83b4954432b2d071a8d8 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Fri, 11 Mar 2022 11:51:26 -0800 Subject: [PATCH 04/24] flake: replace aliases with their proper names Newer Nixpkgs have added a throw for these aliases. --- flake.nix | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/flake.nix b/flake.nix index 7ea0ce35..a0c4ff74 100644 --- a/flake.nix +++ b/flake.nix @@ -71,7 +71,7 @@ }; }; - FunctionParameters = final.buildPerlPackage { + FunctionParameters = final.perlPackages.buildPerlPackage { pname = "Function-Parameters"; version = "2.001003"; src = final.fetchurl { @@ -85,7 +85,7 @@ }; }; - CatalystPluginPrometheusTiny = final.buildPerlPackage { + CatalystPluginPrometheusTiny = final.perlPackages.buildPerlPackage { pname = "Catalyst-Plugin-PrometheusTiny"; version = "0.005"; src = final.fetchurl { @@ -114,7 +114,7 @@ }; }; - CryptPassphrase = final.buildPerlPackage { + CryptPassphrase = final.perlPackages.buildPerlPackage { pname = "Crypt-Passphrase"; version = "0.003"; src = final.fetchurl { @@ -127,7 +127,7 @@ }; }; - CryptPassphraseArgon2 = final.buildPerlPackage { + CryptPassphraseArgon2 = final.perlPackages.buildPerlPackage { pname = "Crypt-Passphrase-Argon2"; version = "0.002"; src = final.fetchurl { @@ -141,7 +141,7 @@ }; }; - DataRandom = final.buildPerlPackage { + DataRandom = final.perlPackages.buildPerlPackage { pname = "Data-Random"; version = "0.13"; src = final.fetchurl { @@ -155,7 +155,7 @@ }; }; - DirSelf = final.buildPerlPackage { + DirSelf = final.perlPackages.buildPerlPackage { pname = "Dir-Self"; version = "0.11"; src = final.fetchurl { @@ -183,7 +183,7 @@ }; }; - PrometheusTiny = final.buildPerlPackage { + PrometheusTiny = final.perlPackages.buildPerlPackage { pname = "Prometheus-Tiny"; version = "0.007"; src = final.fetchurl { @@ -198,7 +198,7 @@ }; }; - PrometheusTinyShared = final.buildPerlPackage { + PrometheusTinyShared = final.perlPackages.buildPerlPackage { pname = "Prometheus-Tiny-Shared"; version = "0.023"; src = final.fetchurl { @@ -229,7 +229,7 @@ }; }; - TieHashMethod = final.buildPerlPackage { + TieHashMethod = final.perlPackages.buildPerlPackage { pname = "Tie-Hash-Method"; version = "0.02"; src = final.fetchurl { @@ -242,7 +242,7 @@ }; }; - Test2Harness = final.buildPerlPackage { + Test2Harness = final.perlPackages.buildPerlPackage { pname = "Test2-Harness"; version = "1.000042"; src = final.fetchurl { @@ -291,7 +291,7 @@ }; }; - LongJump = final.buildPerlPackage { + LongJump = final.perlPackages.buildPerlPackage { pname = "Long-Jump"; version = "0.000001"; src = final.fetchurl { @@ -305,7 +305,7 @@ }; }; - gotofile = final.buildPerlPackage { + gotofile = final.perlPackages.buildPerlPackage { pname = "goto-file"; version = "0.005"; src = final.fetchurl { @@ -435,7 +435,7 @@ }; }; - StringCompareConstantTime = final.buildPerlPackage { + StringCompareConstantTime = final.perlPackages.buildPerlPackage { pname = "String-Compare-ConstantTime"; version = "0.321"; src = final.fetchurl { @@ -448,7 +448,7 @@ }; }; - UUID4Tiny = final.buildPerlPackage { + UUID4Tiny = final.perlPackages.buildPerlPackage { pname = "UUID4-Tiny"; version = "0.002"; src = final.fetchurl { @@ -581,7 +581,7 @@ # including 2.7.0 newNixpkgs.legacyPackages.${final.system}.foreman glibcLocales - netcat-openbsd + libressl.nc openldap python3 ]; From 52a29d43e65d8d7078589269ab854faad0a3bc63 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Fri, 11 Mar 2022 11:52:43 -0800 Subject: [PATCH 05/24] hydra-queue-runner: make registry member of State, configurable metrics port Thanks to the updated prometheus-cpp library, specifying a port of 0 will cause it to pick a random (available) port -- ideal for tests. --- src/hydra-queue-runner/hydra-queue-runner.cc | 46 ++++++++++---------- src/hydra-queue-runner/state.hh | 8 +++- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index af7ec28d..fd4ba50d 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -7,7 +7,6 @@ #include #include -#include #include #include "state.hh" @@ -41,7 +40,7 @@ std::string getEnvOrDie(const std::string & key) } -State::State() +State::State(uint16_t metricsPort) : config(std::make_unique()) , maxUnsupportedTime(config->getIntOption("max_unsupported_time", 0)) , dbPool(config->getIntOption("max_db_connections", 128)) @@ -49,6 +48,8 @@ State::State() , maxLogSize(config->getIntOption("max_log_size", 64ULL << 20)) , uploadLogsToBinaryCache(config->getBoolOption("upload_logs_to_binary_cache", false)) , rootsDir(config->getStrOption("gc_roots_dir", fmt("%s/gcroots/per-user/%s/hydra-roots", settings.nixStateDir, getEnvOrDie("LOGNAME")))) + , registry(std::make_shared()) + , metricsPort(metricsPort) { hydraData = getEnvOrDie("HYDRA_DATA"); @@ -758,6 +759,15 @@ void State::run(BuildID buildOne) if (!lock) throw Error("hydra-queue-runner is already running"); + /* Set up simple exporter, to show that we're still alive. */ + std::string metricsAddress{"127.0.0.1:" + std::to_string(metricsPort)}; + prometheus::Exposer exposer{metricsAddress}; + exposer.RegisterCollectable(registry); + + std::cout << "Starting the Prometheus exporter, listening on " + << "http://" << metricsAddress << "/metrics" + << std::endl; + Store::Params localParams; localParams["max-connections"] = "16"; localParams["max-connection-age"] = "600"; @@ -858,25 +868,6 @@ int main(int argc, char * * argv) return handleExceptions(argv[0], [&]() { initNix(); - /* Export a simple "up" metric, to allow monitoring that we're - still alive. */ - std::thread([&]() { - prometheus::Exposer exposer{"127.0.0.1:8080"}; - - // @note it's the users responsibility to keep the object alive - auto registry = std::make_shared(); - - auto& running = prometheus::BuildGauge() - .Name("hydra_queue_runner_running") - .Help("Whether the queue runner is currently running") - .Register(*registry); - - exposer.RegisterCollectable(registry); - running.Add({}).Set(1); - - while (true) { } - }).detach(); - signal(SIGINT, SIG_DFL); signal(SIGTERM, SIG_DFL); signal(SIGHUP, SIG_DFL); @@ -887,6 +878,7 @@ int main(int argc, char * * argv) bool unlock = false; bool status = false; BuildID buildOne = 0; + uint16_t metricsPort = 0; parseCmdLine(argc, argv, [&](Strings::iterator & arg, const Strings::iterator & end) { if (*arg == "--unlock") @@ -898,6 +890,16 @@ int main(int argc, char * * argv) buildOne = *b; else throw Error("‘--build-one’ requires a build ID"); + } else if (*arg == "--port") { + if (auto p = string2Int(getArg(*arg, arg, end))) { + if (*p > std::numeric_limits::max()) { + throw Error("'--port' has a maximum of 65535"); + } else { + metricsPort = *p; + } + } else { + throw Error("'--port' requires a numeric port (0 for a random, usable port; max 65535)"); + } } else return false; return true; @@ -906,7 +908,7 @@ int main(int argc, char * * argv) settings.verboseBuild = true; settings.lockCPU = false; - State state; + State state{metricsPort}; if (status) state.showStatus(); else if (unlock) diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 8f303d28..4add0dbd 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -7,6 +7,8 @@ #include #include +#include + #include "db.hh" #include "parsed-derivations.hh" @@ -432,8 +434,12 @@ private: via gc_roots_dir. */ nix::Path rootsDir; + std::shared_ptr registry; + + uint16_t metricsPort; + public: - State(); + State(uint16_t metricsPort); private: From c0f826b92d5cad9f7633c0a847a5e2c4a892c4dc Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Mon, 14 Mar 2022 08:41:45 -0700 Subject: [PATCH 06/24] hydra-queue-runner: get the listening port from the exposer itself Otherwise, when the port is randomly chosen (e.g. by specifying no port, or a port of 0), it will just show that the port is 0 and not the port that is actually serving the metrics. --- src/hydra-queue-runner/hydra-queue-runner.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index fd4ba50d..727a728f 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -760,12 +760,13 @@ void State::run(BuildID buildOne) throw Error("hydra-queue-runner is already running"); /* Set up simple exporter, to show that we're still alive. */ - std::string metricsAddress{"127.0.0.1:" + std::to_string(metricsPort)}; - prometheus::Exposer exposer{metricsAddress}; + std::string metricsAddress{"127.0.0.1"}; + prometheus::Exposer exposer{metricsAddress + ":" + std::to_string(metricsPort)}; + auto exposerPort = exposer.GetListeningPorts().front(); exposer.RegisterCollectable(registry); std::cout << "Starting the Prometheus exporter, listening on " - << "http://" << metricsAddress << "/metrics" + << "http://" << metricsAddress << ":" << exposerPort << "/metrics" << std::endl; Store::Params localParams; From b0c17112c92c4dc3df688b77784468cc2f03c127 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Fri, 18 Mar 2022 11:10:57 -0700 Subject: [PATCH 07/24] flake: update to nixos-unstable-small https://github.com/NixOS/nixpkgs/pull/163695 was merged, so no longer need to use my commit! --- flake.lock | 8 ++++---- flake.nix | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/flake.lock b/flake.lock index 586c08e0..4dc1b240 100644 --- a/flake.lock +++ b/flake.lock @@ -18,17 +18,17 @@ }, "newNixpkgs": { "locked": { - "lastModified": 1647023429, - "narHash": "sha256-LdMTXEgW+G1LXrGrME1b1CpTC6/r+meFZDHeXR2Ps40=", + "lastModified": 1647380550, + "narHash": "sha256-909TI9poX7CIUiFx203WL29YON6m/I6k0ExbZvR7bLM=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "9b095223a5dc9a6bce6ec54477f31194871eca8e", + "rev": "6e3ee8957637a60f5072e33d78e05c0f65c54366", "type": "github" }, "original": { "owner": "NixOS", + "ref": "nixos-unstable-small", "repo": "nixpkgs", - "rev": "9b095223a5dc9a6bce6ec54477f31194871eca8e", "type": "github" } }, diff --git a/flake.nix b/flake.nix index a0c4ff74..e9cc4d2b 100644 --- a/flake.nix +++ b/flake.nix @@ -3,8 +3,7 @@ # FIXME: All the pinned versions of nix/nixpkgs have a broken foreman (yes, # even 2.7.0's Nixpkgs pin). - # FIXME: has updated prometheus-cpp: https://github.com/NixOS/nixpkgs/pull/163695 - inputs.newNixpkgs.url = "github:NixOS/nixpkgs/9b095223a5dc9a6bce6ec54477f31194871eca8e"; + inputs.newNixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable-small"; inputs.nixpkgs.follows = "nix/nixpkgs"; inputs.nix.url = github:NixOS/nix/2.6.0; From 8503a7917b65eb77c3267312dbbaae686f91a1ec Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Tue, 22 Mar 2022 13:38:13 -0700 Subject: [PATCH 08/24] fixup! hydra-queue-runner: make registry member of State, configurable metrics port --- src/hydra-queue-runner/hydra-queue-runner.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 727a728f..f20c8e73 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -7,7 +7,6 @@ #include #include -#include #include "state.hh" #include "build-result.hh" From 9cdc5aceed59d96a41ab866d2e8f575bdca64d70 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Tue, 29 Mar 2022 08:41:19 -0700 Subject: [PATCH 09/24] hydra-queue-runner: log message before and after exporter is started This way, if something goes wrong between the two, it's easier to narrow down where the issue lies. --- src/hydra-queue-runner/hydra-queue-runner.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index f20c8e73..062b0644 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -758,13 +758,15 @@ void State::run(BuildID buildOne) if (!lock) throw Error("hydra-queue-runner is already running"); + std::cout << "Starting the Prometheus exporter on port " << exposerPort << std::endl; + /* Set up simple exporter, to show that we're still alive. */ std::string metricsAddress{"127.0.0.1"}; prometheus::Exposer exposer{metricsAddress + ":" + std::to_string(metricsPort)}; auto exposerPort = exposer.GetListeningPorts().front(); exposer.RegisterCollectable(registry); - std::cout << "Starting the Prometheus exporter, listening on " + std::cout << "Started the Prometheus exporter, listening on " << "http://" << metricsAddress << ":" << exposerPort << "/metrics" << std::endl; From 905a7a7bebb1be02dd0b86735407912b54bfcda1 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Tue, 29 Mar 2022 08:46:43 -0700 Subject: [PATCH 10/24] hydra-queue-runner: read metrics port from `queue_runner_metrics_port` config --- src/hydra-queue-runner/hydra-queue-runner.cc | 2 +- src/hydra-queue-runner/state.hh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 062b0644..40babf33 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -47,8 +47,8 @@ State::State(uint16_t metricsPort) , maxLogSize(config->getIntOption("max_log_size", 64ULL << 20)) , uploadLogsToBinaryCache(config->getBoolOption("upload_logs_to_binary_cache", false)) , rootsDir(config->getStrOption("gc_roots_dir", fmt("%s/gcroots/per-user/%s/hydra-roots", settings.nixStateDir, getEnvOrDie("LOGNAME")))) + , metricsPort(config->getIntOption("queue_runner_metrics_port", metricsPort)) , registry(std::make_shared()) - , metricsPort(metricsPort) { hydraData = getEnvOrDie("HYDRA_DATA"); diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 4add0dbd..5299edea 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -434,10 +434,10 @@ private: via gc_roots_dir. */ nix::Path rootsDir; - std::shared_ptr registry; - uint16_t metricsPort; + std::shared_ptr registry; + public: State(uint16_t metricsPort); From 5ddb9a98ca674195d53c6461fe4b165692588ad8 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Tue, 29 Mar 2022 08:47:41 -0700 Subject: [PATCH 11/24] fixup! hydra-queue-runner: log message before and after exporter is started --- src/hydra-queue-runner/hydra-queue-runner.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 40babf33..c7d3b9b2 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -758,7 +758,7 @@ void State::run(BuildID buildOne) if (!lock) throw Error("hydra-queue-runner is already running"); - std::cout << "Starting the Prometheus exporter on port " << exposerPort << std::endl; + std::cout << "Starting the Prometheus exporter on port " << metricsPort << std::endl; /* Set up simple exporter, to show that we're still alive. */ std::string metricsAddress{"127.0.0.1"}; From 928b3b8268fc2f1d2534f1cf7e524d05803105fd Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Tue, 29 Mar 2022 10:42:07 -0700 Subject: [PATCH 12/24] hydra-queue-runner: fix priority of flag over config file --- src/hydra-queue-runner/hydra-queue-runner.cc | 14 +++++++++----- src/hydra-queue-runner/state.hh | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index c7d3b9b2..3834f625 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -39,7 +39,7 @@ std::string getEnvOrDie(const std::string & key) } -State::State(uint16_t metricsPort) +State::State(std::optional metricsPortOpt) : config(std::make_unique()) , maxUnsupportedTime(config->getIntOption("max_unsupported_time", 0)) , dbPool(config->getIntOption("max_db_connections", 128)) @@ -47,13 +47,17 @@ State::State(uint16_t metricsPort) , maxLogSize(config->getIntOption("max_log_size", 64ULL << 20)) , uploadLogsToBinaryCache(config->getBoolOption("upload_logs_to_binary_cache", false)) , rootsDir(config->getStrOption("gc_roots_dir", fmt("%s/gcroots/per-user/%s/hydra-roots", settings.nixStateDir, getEnvOrDie("LOGNAME")))) - , metricsPort(config->getIntOption("queue_runner_metrics_port", metricsPort)) + , metricsPort(config->getIntOption("queue_runner_metrics_port", 9099)) , registry(std::make_shared()) { hydraData = getEnvOrDie("HYDRA_DATA"); logDir = canonPath(hydraData + "/build-logs"); + if (metricsPortOpt.has_value()) { + metricsPort = metricsPortOpt.value(); + } + /* handle deprecated store specification */ if (config->getStrOption("store_mode") != "") throw Error("store_mode in hydra.conf is deprecated, please use store_uri"); @@ -880,7 +884,7 @@ int main(int argc, char * * argv) bool unlock = false; bool status = false; BuildID buildOne = 0; - uint16_t metricsPort = 0; + std::optional metricsPortOpt = std::nullopt; parseCmdLine(argc, argv, [&](Strings::iterator & arg, const Strings::iterator & end) { if (*arg == "--unlock") @@ -897,7 +901,7 @@ int main(int argc, char * * argv) if (*p > std::numeric_limits::max()) { throw Error("'--port' has a maximum of 65535"); } else { - metricsPort = *p; + metricsPortOpt = *p; } } else { throw Error("'--port' requires a numeric port (0 for a random, usable port; max 65535)"); @@ -910,7 +914,7 @@ int main(int argc, char * * argv) settings.verboseBuild = true; settings.lockCPU = false; - State state{metricsPort}; + State state{metricsPortOpt}; if (status) state.showStatus(); else if (unlock) diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 5299edea..63112c16 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -439,7 +439,7 @@ private: std::shared_ptr registry; public: - State(uint16_t metricsPort); + State(std::optional metricsPortOpt); private: From 4789eba92c65df52a204e4d7a4d3ee6f512df28e Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Tue, 29 Mar 2022 10:55:28 -0700 Subject: [PATCH 13/24] hydra-queue-runer: split metrics functionality into its own function --- src/hydra-queue-runner/hydra-queue-runner.cc | 30 +++++++++++++------- src/hydra-queue-runner/state.hh | 2 ++ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 3834f625..992b9995 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -750,6 +750,24 @@ void State::unlock() } +void State::runMetricsExporter() +{ + std::cout << "Starting the Prometheus exporter on port " << metricsPort << std::endl; + + /* Set up simple exporter, to show that we're still alive. */ + std::string metricsAddress{"127.0.0.1"}; + prometheus::Exposer exposer{metricsAddress + ":" + std::to_string(metricsPort)}; + auto exposerPort = exposer.GetListeningPorts().front(); + exposer.RegisterCollectable(registry); + + std::cout << "Started the Prometheus exporter, listening on " + << "http://" << metricsAddress << ":" << exposerPort << "/metrics" + << std::endl; + + while (true) {}; +} + + void State::run(BuildID buildOne) { /* Can't be bothered to shut down cleanly. Goodbye! */ @@ -762,17 +780,7 @@ void State::run(BuildID buildOne) if (!lock) throw Error("hydra-queue-runner is already running"); - std::cout << "Starting the Prometheus exporter on port " << metricsPort << std::endl; - - /* Set up simple exporter, to show that we're still alive. */ - std::string metricsAddress{"127.0.0.1"}; - prometheus::Exposer exposer{metricsAddress + ":" + std::to_string(metricsPort)}; - auto exposerPort = exposer.GetListeningPorts().front(); - exposer.RegisterCollectable(registry); - - std::cout << "Started the Prometheus exporter, listening on " - << "http://" << metricsAddress << ":" << exposerPort << "/metrics" - << std::endl; + std::thread(&State::runMetricsExporter, this).detach(); Store::Params localParams; localParams["max-connections"] = "16"; diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 63112c16..fb533559 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -549,6 +549,8 @@ private: void addRoot(const nix::StorePath & storePath); + void runMetricsExporter(); + public: void showStatus(); From 9c1f36c47c2ff19ab8d47760f33cd9d5c32cb229 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Tue, 29 Mar 2022 11:33:40 -0700 Subject: [PATCH 14/24] t/lib/HydraTestContext: set queue runner port to 0 This makes the exposer choose a random, available port. --- t/Hydra/Config/include.t | 1 + t/lib/HydraTestContext.pm | 1 + 2 files changed, 2 insertions(+) diff --git a/t/Hydra/Config/include.t b/t/Hydra/Config/include.t index fe2dd1ed..63186f87 100644 --- a/t/Hydra/Config/include.t +++ b/t/Hydra/Config/include.t @@ -20,6 +20,7 @@ write_file($ctx{'tmpdir'} . "/bar.conf", q| |); is(getHydraConfig(), { + queue_runner_metrics_port => 0, foo => { bar => "baz" } }, "Nested includes work."); diff --git a/t/lib/HydraTestContext.pm b/t/lib/HydraTestContext.pm index ade12280..ce933c09 100644 --- a/t/lib/HydraTestContext.pm +++ b/t/lib/HydraTestContext.pm @@ -51,6 +51,7 @@ sub new { $ENV{'HYDRA_CONFIG'} = "$dir/hydra.conf"; my $hydra_config = $opts{'hydra_config'} || ""; + $hydra_config = "queue_runner_metrics_port = 0\n" . $hydra_config; if ($opts{'use_external_destination_store'} // 1) { $hydra_config = "store_uri = file:$dir/nix/dest-store\n" . $hydra_config; } From 8c5636fe187ae29f962a5a27eeefbb5bd34ef91f Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Sat, 2 Apr 2022 17:32:14 -0700 Subject: [PATCH 15/24] hydra-queue-runner: use port 9198 by default Co-authored-by: Graham Christensen --- src/hydra-queue-runner/hydra-queue-runner.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 992b9995..87361aca 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -47,7 +47,7 @@ State::State(std::optional metricsPortOpt) , maxLogSize(config->getIntOption("max_log_size", 64ULL << 20)) , uploadLogsToBinaryCache(config->getBoolOption("upload_logs_to_binary_cache", false)) , rootsDir(config->getStrOption("gc_roots_dir", fmt("%s/gcroots/per-user/%s/hydra-roots", settings.nixStateDir, getEnvOrDie("LOGNAME")))) - , metricsPort(config->getIntOption("queue_runner_metrics_port", 9099)) + , metricsPort(config->getIntOption("queue_runner_metrics_port", 9198)) , registry(std::make_shared()) { hydraData = getEnvOrDie("HYDRA_DATA"); From 33bc60b83c1d74da1ccbb6691135ff5176e4af7d Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Wed, 6 Apr 2022 10:46:56 -0700 Subject: [PATCH 16/24] hydra-queue-runner: move exporter back to State::run It's (arguably) better than risking pinning the thread at 100% due to the busy `while` loop. --- src/hydra-queue-runner/hydra-queue-runner.cc | 30 +++++++------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 87361aca..6a84749f 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -750,24 +750,6 @@ void State::unlock() } -void State::runMetricsExporter() -{ - std::cout << "Starting the Prometheus exporter on port " << metricsPort << std::endl; - - /* Set up simple exporter, to show that we're still alive. */ - std::string metricsAddress{"127.0.0.1"}; - prometheus::Exposer exposer{metricsAddress + ":" + std::to_string(metricsPort)}; - auto exposerPort = exposer.GetListeningPorts().front(); - exposer.RegisterCollectable(registry); - - std::cout << "Started the Prometheus exporter, listening on " - << "http://" << metricsAddress << ":" << exposerPort << "/metrics" - << std::endl; - - while (true) {}; -} - - void State::run(BuildID buildOne) { /* Can't be bothered to shut down cleanly. Goodbye! */ @@ -780,7 +762,17 @@ void State::run(BuildID buildOne) if (!lock) throw Error("hydra-queue-runner is already running"); - std::thread(&State::runMetricsExporter, this).detach(); + std::cout << "Starting the Prometheus exporter on port " << metricsPort << std::endl; + + /* Set up simple exporter, to show that we're still alive. */ + std::string metricsAddress{"127.0.0.1"}; // FIXME: configurable + prometheus::Exposer promExposer{metricsAddress + ":" + std::to_string(metricsPort)}; + auto exposerPort = promExposer.GetListeningPorts().front(); + promExposer.RegisterCollectable(registry); + + std::cout << "Started the Prometheus exporter, listening on " + << "http://" << metricsAddress << ":" << exposerPort << "/metrics" + << std::endl; Store::Params localParams; localParams["max-connections"] = "16"; From edf3c348f2156827a43639c8acac7051b87dec98 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Wed, 6 Apr 2022 10:58:57 -0700 Subject: [PATCH 17/24] hydra-queue-runner: make entire address configurable --- src/hydra-queue-runner/hydra-queue-runner.cc | 31 +++++++------------- src/hydra-queue-runner/state.hh | 4 +-- t/Hydra/Config/include.t | 2 +- t/lib/HydraTestContext.pm | 2 +- 4 files changed, 15 insertions(+), 24 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 6a84749f..5ad1a9d9 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -39,7 +39,7 @@ std::string getEnvOrDie(const std::string & key) } -State::State(std::optional metricsPortOpt) +State::State(std::optional metricsAddrOpt) : config(std::make_unique()) , maxUnsupportedTime(config->getIntOption("max_unsupported_time", 0)) , dbPool(config->getIntOption("max_db_connections", 128)) @@ -47,15 +47,15 @@ State::State(std::optional metricsPortOpt) , maxLogSize(config->getIntOption("max_log_size", 64ULL << 20)) , uploadLogsToBinaryCache(config->getBoolOption("upload_logs_to_binary_cache", false)) , rootsDir(config->getStrOption("gc_roots_dir", fmt("%s/gcroots/per-user/%s/hydra-roots", settings.nixStateDir, getEnvOrDie("LOGNAME")))) - , metricsPort(config->getIntOption("queue_runner_metrics_port", 9198)) + , metricsAddr(config->getStrOption("queue_runner_metrics_address", std::string{"127.0.0.1:9198"})) , registry(std::make_shared()) { hydraData = getEnvOrDie("HYDRA_DATA"); logDir = canonPath(hydraData + "/build-logs"); - if (metricsPortOpt.has_value()) { - metricsPort = metricsPortOpt.value(); + if (metricsAddrOpt.has_value()) { + metricsAddr = metricsAddrOpt.value(); } /* handle deprecated store specification */ @@ -762,16 +762,15 @@ void State::run(BuildID buildOne) if (!lock) throw Error("hydra-queue-runner is already running"); - std::cout << "Starting the Prometheus exporter on port " << metricsPort << std::endl; + std::cout << "Starting the Prometheus exporter on " << metricsAddr << std::endl; /* Set up simple exporter, to show that we're still alive. */ - std::string metricsAddress{"127.0.0.1"}; // FIXME: configurable - prometheus::Exposer promExposer{metricsAddress + ":" + std::to_string(metricsPort)}; + prometheus::Exposer promExposer{metricsAddr}; auto exposerPort = promExposer.GetListeningPorts().front(); promExposer.RegisterCollectable(registry); std::cout << "Started the Prometheus exporter, listening on " - << "http://" << metricsAddress << ":" << exposerPort << "/metrics" + << metricsAddr << "/metrics (port " << exposerPort << ")" << std::endl; Store::Params localParams; @@ -884,7 +883,7 @@ int main(int argc, char * * argv) bool unlock = false; bool status = false; BuildID buildOne = 0; - std::optional metricsPortOpt = std::nullopt; + std::optional metricsAddrOpt = std::nullopt; parseCmdLine(argc, argv, [&](Strings::iterator & arg, const Strings::iterator & end) { if (*arg == "--unlock") @@ -896,16 +895,8 @@ int main(int argc, char * * argv) buildOne = *b; else throw Error("‘--build-one’ requires a build ID"); - } else if (*arg == "--port") { - if (auto p = string2Int(getArg(*arg, arg, end))) { - if (*p > std::numeric_limits::max()) { - throw Error("'--port' has a maximum of 65535"); - } else { - metricsPortOpt = *p; - } - } else { - throw Error("'--port' requires a numeric port (0 for a random, usable port; max 65535)"); - } + } else if (*arg == "--prometheus-address") { + metricsAddrOpt = getArg(*arg, arg, end); } else return false; return true; @@ -914,7 +905,7 @@ int main(int argc, char * * argv) settings.verboseBuild = true; settings.lockCPU = false; - State state{metricsPortOpt}; + State state{metricsAddrOpt}; if (status) state.showStatus(); else if (unlock) diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index fb533559..a37548a3 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -434,12 +434,12 @@ private: via gc_roots_dir. */ nix::Path rootsDir; - uint16_t metricsPort; + std::string metricsAddr;; std::shared_ptr registry; public: - State(std::optional metricsPortOpt); + State(std::optional metricsAddrOpt); private: diff --git a/t/Hydra/Config/include.t b/t/Hydra/Config/include.t index 63186f87..14f657ff 100644 --- a/t/Hydra/Config/include.t +++ b/t/Hydra/Config/include.t @@ -20,7 +20,7 @@ write_file($ctx{'tmpdir'} . "/bar.conf", q| |); is(getHydraConfig(), { - queue_runner_metrics_port => 0, + queue_runner_metrics_address => "127.0.0.1:0", foo => { bar => "baz" } }, "Nested includes work."); diff --git a/t/lib/HydraTestContext.pm b/t/lib/HydraTestContext.pm index ce933c09..2bb1478c 100644 --- a/t/lib/HydraTestContext.pm +++ b/t/lib/HydraTestContext.pm @@ -51,7 +51,7 @@ sub new { $ENV{'HYDRA_CONFIG'} = "$dir/hydra.conf"; my $hydra_config = $opts{'hydra_config'} || ""; - $hydra_config = "queue_runner_metrics_port = 0\n" . $hydra_config; + $hydra_config = "queue_runner_metrics_address = 127.0.0.1:0\n" . $hydra_config; if ($opts{'use_external_destination_store'} // 1) { $hydra_config = "store_uri = file:$dir/nix/dest-store\n" . $hydra_config; } From 15e8fa8aff1421c99218da58597cd84d0c67afc7 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Wed, 6 Apr 2022 11:41:18 -0700 Subject: [PATCH 18/24] doc/manual: document queue-runner prometheus exporter configuration --- doc/manual/src/configuration.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/doc/manual/src/configuration.md b/doc/manual/src/configuration.md index ec071f24..7aca17ef 100644 --- a/doc/manual/src/configuration.md +++ b/doc/manual/src/configuration.md @@ -102,6 +102,22 @@ in the hydra configuration file, as below: ``` +hydra-queue-runner's Prometheus service +--------------------------------------- + +hydra-queue-runner supports running a Prometheus webserver for metrics. The +exporter's address defaults to exposing on `127.0.0.1:9198`, but is also +configurable through the hydra configuration file and a command line argument, +as below. A port of `:0` will make the exposer choose a random, available port. + +```conf +queue_runner_exporter_address = 127.0.0.1:9198 +``` + +```shell +$ hydra-queue-runner --prometheus-address 127.0.0.1:9198 +``` + Using LDAP as authentication backend (optional) ----------------------------------------------- From 5bff730f2c4378bda6bbb46b5274dd4004a28b97 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Wed, 6 Apr 2022 11:41:04 -0700 Subject: [PATCH 19/24] WIP: I love it when they delete the assignment operator :) --- src/hydra-queue-runner/hydra-queue-runner.cc | 15 +++++++++++++++ src/hydra-queue-runner/queue-monitor.cc | 2 ++ src/hydra-queue-runner/state.hh | 6 +++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 5ad1a9d9..f00049a1 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -49,7 +49,20 @@ State::State(std::optional metricsAddrOpt) , rootsDir(config->getStrOption("gc_roots_dir", fmt("%s/gcroots/per-user/%s/hydra-roots", settings.nixStateDir, getEnvOrDie("LOGNAME")))) , metricsAddr(config->getStrOption("queue_runner_metrics_address", std::string{"127.0.0.1:9198"})) , registry(std::make_shared()) + // , call_ctr_family(prometheus::BuildCounter().Name("queue_queued_builds_calls_total").Help("Number of times State::getQueuedBuilds() was called").Register(*registry)) + // , call_ctr(call_ctr_family.Add({})) { + // call_ctr_family(prometheus::BuildCounter().Name("queue_queued_builds_calls_total").Help("Number of times State::getQueuedBuilds() was called").Register(*registry)); + // call_ctr(call_ctr_family.Add({})); + auto& fam = prometheus::BuildCounter() + .Name("queue_queued_builds_calls_total") + .Help("Number of times State::getQueuedBuilds() was called") + .Register(*registry) + .Add({}); + + // call_ctr_family(fam); + // call_ctr(call_ctr_family.Add({})); + hydraData = getEnvOrDie("HYDRA_DATA"); logDir = canonPath(hydraData + "/build-logs"); @@ -58,6 +71,7 @@ State::State(std::optional metricsAddrOpt) metricsAddr = metricsAddrOpt.value(); } + /* handle deprecated store specification */ if (config->getStrOption("store_mode") != "") throw Error("store_mode in hydra.conf is deprecated, please use store_uri"); @@ -767,6 +781,7 @@ void State::run(BuildID buildOne) /* Set up simple exporter, to show that we're still alive. */ prometheus::Exposer promExposer{metricsAddr}; auto exposerPort = promExposer.GetListeningPorts().front(); + promExposer.RegisterCollectable(registry); std::cout << "Started the Prometheus exporter, listening on " diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index 49caf8e3..8fb06f45 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -82,6 +82,8 @@ struct PreviousFailure : public std::exception { bool State::getQueuedBuilds(Connection & conn, ref destStore, unsigned int & lastBuildId) { + call_ctr.Increment(); + printInfo("checking the queue for builds > %d...", lastBuildId); /* Grab the queued builds from the database, but don't process diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index a37548a3..9e89df52 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -7,6 +7,7 @@ #include #include +#include #include #include "db.hh" @@ -434,10 +435,13 @@ private: via gc_roots_dir. */ nix::Path rootsDir; - std::string metricsAddr;; + std::string metricsAddr; std::shared_ptr registry; + // prometheus::Family& call_ctr_family; + prometheus::Counter& call_ctr; + public: State(std::optional metricsAddrOpt); From 46f52b4c4e94ae9d8699f8f857e6e8fcc4c39336 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Wed, 6 Apr 2022 15:49:38 -0400 Subject: [PATCH 20/24] bring back the working version Cole made --- src/hydra-queue-runner/hydra-queue-runner.cc | 18 +++++------------- src/hydra-queue-runner/queue-monitor.cc | 2 +- src/hydra-queue-runner/state.hh | 4 ++-- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index f00049a1..0400b61b 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -49,20 +49,12 @@ State::State(std::optional metricsAddrOpt) , rootsDir(config->getStrOption("gc_roots_dir", fmt("%s/gcroots/per-user/%s/hydra-roots", settings.nixStateDir, getEnvOrDie("LOGNAME")))) , metricsAddr(config->getStrOption("queue_runner_metrics_address", std::string{"127.0.0.1:9198"})) , registry(std::make_shared()) - // , call_ctr_family(prometheus::BuildCounter().Name("queue_queued_builds_calls_total").Help("Number of times State::getQueuedBuilds() was called").Register(*registry)) - // , call_ctr(call_ctr_family.Add({})) + , call_ctr(prometheus::BuildCounter() + .Name("queue_queued_builds_calls_total") + .Help("Number of times State::getQueuedBuilds() was called") + .Register(*registry)) + , queue_queued_builds_calls(call_ctr.Add({})) // FIXME: add the proper arguments { - // call_ctr_family(prometheus::BuildCounter().Name("queue_queued_builds_calls_total").Help("Number of times State::getQueuedBuilds() was called").Register(*registry)); - // call_ctr(call_ctr_family.Add({})); - auto& fam = prometheus::BuildCounter() - .Name("queue_queued_builds_calls_total") - .Help("Number of times State::getQueuedBuilds() was called") - .Register(*registry) - .Add({}); - - // call_ctr_family(fam); - // call_ctr(call_ctr_family.Add({})); - hydraData = getEnvOrDie("HYDRA_DATA"); logDir = canonPath(hydraData + "/build-logs"); diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index 8fb06f45..bcef4e2f 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -82,7 +82,7 @@ struct PreviousFailure : public std::exception { bool State::getQueuedBuilds(Connection & conn, ref destStore, unsigned int & lastBuildId) { - call_ctr.Increment(); + queue_queued_builds_calls.Increment(); printInfo("checking the queue for builds > %d...", lastBuildId); diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 9e89df52..56e01a0e 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -439,8 +439,8 @@ private: std::shared_ptr registry; - // prometheus::Family& call_ctr_family; - prometheus::Counter& call_ctr; + prometheus::Family& call_ctr; + prometheus::Counter& queue_queued_builds_calls; public: State(std::optional metricsAddrOpt); From 5de08d412ea1642be71849d3cd52410c64084958 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Wed, 6 Apr 2022 19:59:53 -0400 Subject: [PATCH 21/24] queue metrics: refactor the metrics into a struct --- src/hydra-queue-runner/hydra-queue-runner.cc | 20 +++++++++++++------- src/hydra-queue-runner/queue-monitor.cc | 2 +- src/hydra-queue-runner/state.hh | 11 ++++++++--- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 0400b61b..b540bfe6 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -38,6 +38,18 @@ std::string getEnvOrDie(const std::string & key) return *value; } +State::PromMetrics::PromMetrics() + : registry(std::make_shared()) + , queue_checks_started( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_checks_started_total") + .Help("Number of times State::getQueuedBuilds() was started") + .Register(*registry) + .Add({}) + ) +{ + +} State::State(std::optional metricsAddrOpt) : config(std::make_unique()) @@ -48,12 +60,6 @@ State::State(std::optional metricsAddrOpt) , uploadLogsToBinaryCache(config->getBoolOption("upload_logs_to_binary_cache", false)) , rootsDir(config->getStrOption("gc_roots_dir", fmt("%s/gcroots/per-user/%s/hydra-roots", settings.nixStateDir, getEnvOrDie("LOGNAME")))) , metricsAddr(config->getStrOption("queue_runner_metrics_address", std::string{"127.0.0.1:9198"})) - , registry(std::make_shared()) - , call_ctr(prometheus::BuildCounter() - .Name("queue_queued_builds_calls_total") - .Help("Number of times State::getQueuedBuilds() was called") - .Register(*registry)) - , queue_queued_builds_calls(call_ctr.Add({})) // FIXME: add the proper arguments { hydraData = getEnvOrDie("HYDRA_DATA"); @@ -774,7 +780,7 @@ void State::run(BuildID buildOne) prometheus::Exposer promExposer{metricsAddr}; auto exposerPort = promExposer.GetListeningPorts().front(); - promExposer.RegisterCollectable(registry); + promExposer.RegisterCollectable(prom.registry); std::cout << "Started the Prometheus exporter, listening on " << metricsAddr << "/metrics (port " << exposerPort << ")" diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index bcef4e2f..c4873cc5 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -82,7 +82,7 @@ struct PreviousFailure : public std::exception { bool State::getQueuedBuilds(Connection & conn, ref destStore, unsigned int & lastBuildId) { - queue_queued_builds_calls.Increment(); + prom.queue_checks_started.Increment(); printInfo("checking the queue for builds > %d...", lastBuildId); diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 56e01a0e..6d1e45c4 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -437,10 +437,15 @@ private: std::string metricsAddr; - std::shared_ptr registry; + struct PromMetrics + { + std::shared_ptr registry; - prometheus::Family& call_ctr; - prometheus::Counter& queue_queued_builds_calls; + prometheus::Counter& queue_checks_started; + + PromMetrics(); + }; + PromMetrics prom; public: State(std::optional metricsAddrOpt); From 1c12c5882f61cc490a589a02475cbf304a58048a Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Wed, 6 Apr 2022 20:18:29 -0400 Subject: [PATCH 22/24] hydra queue runner: instrument the process of loading new builds with prom --- src/hydra-queue-runner/hydra-queue-runner.cc | 28 ++++++++++++++++++++ src/hydra-queue-runner/queue-monitor.cc | 13 +++++++-- src/hydra-queue-runner/state.hh | 5 ++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index b540bfe6..bf25258d 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -47,6 +47,34 @@ State::PromMetrics::PromMetrics() .Register(*registry) .Add({}) ) + , queue_build_loads( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_build_loads_total") + .Help("Number of builds loaded") + .Register(*registry) + .Add({}) + ) + , queue_checks_early_exits( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_checks_early_exits_total") + .Help("Number of times State::getQueuedBuilds() yielded to potential bumps") + .Register(*registry) + .Add({}) + ) + , queue_checks_finished( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_checks_finished_total") + .Help("Number of times State::getQueuedBuilds() was completed") + .Register(*registry) + .Add({}) + ) + , queue_max_id( + prometheus::BuildGauge() + .Name("hydraqueuerunner_queue_max_build_id_info") + .Help("Maximum build record ID in the queue") + .Register(*registry) + .Add({}) + ) { } diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index c4873cc5..f2f3d59f 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -109,7 +109,10 @@ bool State::getQueuedBuilds(Connection & conn, auto builds_(builds.lock()); BuildID id = row["id"].as(); if (buildOne && id != buildOne) continue; - if (id > newLastBuildId) newLastBuildId = id; + if (id > newLastBuildId) { + newLastBuildId = id; + prom.queue_max_id.Set(id); + } if (builds_->count(id)) continue; auto build = std::make_shared( @@ -138,6 +141,7 @@ bool State::getQueuedBuilds(Connection & conn, std::set finishedDrvs; createBuild = [&](Build::ptr build) { + prom.queue_build_loads.Increment(); printMsg(lvlTalkative, format("loading build %1% (%2%)") % build->id % build->fullJobName()); nrAdded++; newBuildsByID.erase(build->id); @@ -308,9 +312,14 @@ bool State::getQueuedBuilds(Connection & conn, /* Stop after a certain time to allow priority bumps to be processed. */ - if (std::chrono::system_clock::now() > start + std::chrono::seconds(600)) break; + if (std::chrono::system_clock::now() > start + std::chrono::seconds(600)) { + prom.queue_checks_early_exits.Increment(); + break; + } } + prom.queue_checks_finished.Increment(); + lastBuildId = newBuildsByID.empty() ? newLastBuildId : newBuildsByID.begin()->first - 1; return newBuildsByID.empty(); } diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 6d1e45c4..61954848 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -8,6 +8,7 @@ #include #include +#include #include #include "db.hh" @@ -442,6 +443,10 @@ private: std::shared_ptr registry; prometheus::Counter& queue_checks_started; + prometheus::Counter& queue_build_loads; + prometheus::Counter& queue_checks_early_exits; + prometheus::Counter& queue_checks_finished; + prometheus::Gauge& queue_max_id; PromMetrics(); }; From 59ac96a99c25726092bef4b17d3ef284266b8c66 Mon Sep 17 00:00:00 2001 From: Graham Christensen Date: Wed, 6 Apr 2022 20:23:02 -0400 Subject: [PATCH 23/24] Track the number of steps created --- src/hydra-queue-runner/hydra-queue-runner.cc | 7 +++++++ src/hydra-queue-runner/queue-monitor.cc | 2 ++ src/hydra-queue-runner/state.hh | 1 + 3 files changed, 10 insertions(+) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index bf25258d..96dacbd4 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -54,6 +54,13 @@ State::PromMetrics::PromMetrics() .Register(*registry) .Add({}) ) + , queue_steps_created( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_steps_created_total") + .Help("Number of steps created") + .Register(*registry) + .Add({}) + ) , queue_checks_early_exits( prometheus::BuildCounter() .Name("hydraqueuerunner_queue_checks_early_exits_total") diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index f2f3d59f..748df37f 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -448,6 +448,8 @@ Step::ptr State::createStep(ref destStore, if (!isNew) return step; + prom.queue_steps_created.Increment(); + printMsg(lvlDebug, "considering derivation ‘%1%’", localStore->printStorePath(drvPath)); /* Initialize the step. Note that the step may be visible in diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 61954848..a8f64b4d 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -444,6 +444,7 @@ private: prometheus::Counter& queue_checks_started; prometheus::Counter& queue_build_loads; + prometheus::Counter& queue_steps_created; prometheus::Counter& queue_checks_early_exits; prometheus::Counter& queue_checks_finished; prometheus::Gauge& queue_max_id; From f8dc48f171b34b7b03bd08331c03f8e2c5a91991 Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Wed, 6 Apr 2022 17:53:11 -0700 Subject: [PATCH 24/24] hydra-queue-runner: fixup: remove extraneous newline --- src/hydra-queue-runner/hydra-queue-runner.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 96dacbd4..e3f5b772 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -104,7 +104,6 @@ State::State(std::optional metricsAddrOpt) metricsAddr = metricsAddrOpt.value(); } - /* handle deprecated store specification */ if (config->getStrOption("store_mode") != "") throw Error("store_mode in hydra.conf is deprecated, please use store_uri");