diff --git a/doc/manual/src/configuration.md b/doc/manual/src/configuration.md index 170287da..d051c00a 100644 --- a/doc/manual/src/configuration.md +++ b/doc/manual/src/configuration.md @@ -79,6 +79,22 @@ By default, Hydra will send stats to statsd at `localhost:8125`. Point Hydra to ``` +hydra-notify's Prometheus service +--------------------------------- + +hydra-notify supports running a Prometheus webserver for metrics. The +exporter does not run unless a listen address and port are specified +in the hydra configuration file, as below: + +```conf + + + listen_address = 127.0.0.1 + port = 9199 + + +``` + Using LDAP as authentication backend (optional) ----------------------------------------------- diff --git a/doc/manual/src/monitoring/README.md b/doc/manual/src/monitoring/README.md index 65872352..1f17a64d 100644 --- a/doc/manual/src/monitoring/README.md +++ b/doc/manual/src/monitoring/README.md @@ -13,3 +13,21 @@ $ curl --header "Accept: application/json" http://localhost:63333/queue-runner-s ... JSON payload ... ``` +## Notification Daemon + +The `hydra-notify` process can expose Prometheus metrics for plugin execution. See +[hydra-notify's Prometheus service](../configuration.md#hydra-notifys-prometheus-service) +for details on enabling and configuring the exporter. + +The notification exporter exposes metrics on a per-plugin, per-event-type basis: execution +durations, frequency, successes, and failures. + +### Diagnostic Dump + +The notification daemon can also dump its metrics to stderr whether or not the exporter +is configured. This is particularly useful for cases where metrics data is needed but the +exporter was not enabled. + +To trigger this diagnostic dump, send a Postgres notification with the +`hydra_notify_dump_metrics` channel and no payload. See +[Re-sending a notification](../notifications.md#re-sending-a-notification). diff --git a/flake.nix b/flake.nix index 616fa81c..44e5898c 100644 --- a/flake.nix +++ b/flake.nix @@ -470,6 +470,7 @@ NetPrometheus NetStatsd PadWalker + ParallelForkManager PerlCriticCommunity PrometheusTinyShared Readonly diff --git a/foreman/start-hydra.sh b/foreman/start-hydra.sh index 4e348266..cedcde6e 100755 --- a/foreman/start-hydra.sh +++ b/foreman/start-hydra.sh @@ -19,6 +19,13 @@ if [ ! -f ./.hydra-data/hydra.conf ]; then cat << EOF > .hydra-data/hydra.conf # test-time instances likely don't want to bootstrap nixpkgs from scratch use-substitutes = true + + + + listen_address = 127.0.0.1 + port = 64445 + + EOF fi HYDRA_CONFIG=$(pwd)/.hydra-data/hydra.conf exec hydra-dev-server --port 63333 diff --git a/src/lib/Hydra/Helper/Nix.pm b/src/lib/Hydra/Helper/Nix.pm index 7ab4ab60..bd294b73 100644 --- a/src/lib/Hydra/Helper/Nix.pm +++ b/src/lib/Hydra/Helper/Nix.pm @@ -70,6 +70,42 @@ sub getStatsdConfig { } } +sub getHydraNotifyPrometheusConfig { + my ($config) = @_; + my $cfg = $config->{hydra_notify}; + + if (!defined($cfg)) { + return undef; + } + + if (ref $cfg ne "HASH") { + print STDERR "Error reading Hydra's configuration file: hydra_notify should be a block.\n"; + return undef; + } + + my $cfg = $cfg->{prometheus}; + if (!defined($cfg)) { + return undef; + } + + if (ref $cfg ne "HASH") { + print STDERR "Error reading Hydra's configuration file: hydra_notify.prometheus should be a block.\n"; + return undef; + } + + if (defined($cfg->{"listen_address"}) && defined($cfg->{"port"})) { + return { + "listen_address" => $cfg->{'listen_address'}, + "port" => $cfg->{'port'}, + }; + } else { + print STDERR "Error reading Hydra's configuration file: hydra_notify.prometheus should include listen_address and port.\n"; + return undef; + } + + return undef; +} + sub getBaseUrl { my ($config) = @_; diff --git a/src/script/hydra-notify b/src/script/hydra-notify index 181fba36..770f0620 100755 --- a/src/script/hydra-notify +++ b/src/script/hydra-notify @@ -3,24 +3,92 @@ use strict; use utf8; use Getopt::Long; +use HTTP::Server::PSGI; use Hydra::Event; use Hydra::Event::BuildFinished; use Hydra::Helper::AddBuilds; use Hydra::Helper::Nix; use Hydra::Plugin; use Hydra::PostgresListener; +use Parallel::ForkManager; +use Prometheus::Tiny::Shared; +use Time::HiRes qw( gettimeofday tv_interval ); STDERR->autoflush(1); STDOUT->autoflush(1); binmode STDERR, ":encoding(utf8)"; +my $config = getHydraConfig(); + +my $prom = Prometheus::Tiny::Shared->new; +# Note: It is very important to pre-declare any metrics before using them. +# Add a new declaration for any new metrics you create. Metrics which are +# not pre-declared disappear when their value is null. See: +# https://metacpan.org/pod/Prometheus::Tiny#declare +$prom->declare( + "notify_plugin_executions", + type => "counter", + help => "Number of times each plugin has been called by channel." +); +$prom->declare( + "notify_plugin_runtime", + type => "histogram", + help => "Number of seconds spent executing each plugin by channel." +); +$prom->declare( + "notify_plugin_success", + type => "counter", + help => "Number of successful executions of this plugin on this channel." +); +$prom->declare( + "notify_plugin_error", + type => "counter", + help => "Number of failed executions of this plugin on this channel." +); +$prom->declare( + "event_loop_iterations", + type => "counter", + help => "Number of iterations through the event loop. Incremented at the start of the event loop." +); +$prom->declare( + "event_received", + type => "counter", + help => "Timestamp of the last time a new event was received." +); +$prom->declare( + "notify_event", + type => "counter", + help => "Number of events received on the given channel." +); +$prom->declare( + "notify_event_error", + type => "counter", + help => "Number of events received that were unprocessable by channel." +); + +my $promCfg = Hydra::Helper::Nix::getHydraNotifyPrometheusConfig($config); +if (defined($promCfg)) { + print STDERR "Starting the Prometheus exporter, listening on http://${\$promCfg->{'listen_address'}}:${\$promCfg->{'port'}}/metrics.\n"; + my $fork_manager = Parallel::ForkManager->new(1); + $fork_manager->start_child("metrics_exporter", sub { + my $server = HTTP::Server::PSGI->new( + host => $promCfg->{"listen_address"}, + port => $promCfg->{"port"}, + timeout => 1, + ); + + $server->run($prom->psgi); + }); +} else { + print STDERR "Not starting the hydra-notify Prometheus exporter.\n"; +} + my $queued_only; GetOptions( "queued-only" => \$queued_only ) or exit 1; -my $config = getHydraConfig(); my $db = Hydra::Model::DB->new(); @@ -32,15 +100,24 @@ my $listener = Hydra::PostgresListener->new($dbh); $listener->subscribe("build_started"); $listener->subscribe("build_finished"); $listener->subscribe("step_finished"); +$listener->subscribe("hydra_notify_dump_metrics"); sub runPluginsForEvent { my ($event) = @_; + my $channelName = $event->{'channel_name'}; + foreach my $plugin (@plugins) { + $prom->inc("notify_plugin_executions", { channel => $channelName, plugin => ref $plugin }); eval { + my $startTime = [gettimeofday()]; $event->execute($db, $plugin); + + $prom->histogram_observe("notify_plugin_runtime", tv_interval($startTime), { channel => $channelName, plugin => ref $plugin }); + $prom->inc("notify_plugin_success", { channel => $channelName, plugin => ref $plugin }); 1; } or do { + $prom->inc("notify_plugin_error", { channel => $channelName, plugin => ref $plugin }); print STDERR "error running $event->{'channel_name'} hooks: $@\n"; } } @@ -60,19 +137,28 @@ for my $build ($db->resultset('Builds')->search( # Process incoming notifications. while (!$queued_only) { + $prom->inc("event_loop_iterations"); my $messages = $listener->block_for_messages(); while (my $message = $messages->()) { - + $prom->set("event_received", time()); my $channelName = $message->{"channel"}; my $pid = $message->{"pid"}; my $payload = $message->{"payload"}; + $prom->inc("notify_event", { channel => $channelName }); + + if ($channelName eq "hydra_notify_dump_metrics") { + print STDERR "Dumping prometheus metrics:\n${\$prom->format}\n"; + next; + } + eval { my $event = Hydra::Event->new_event($channelName, $message->{"payload"}); runPluginsForEvent($event); 1; } or do { + $prom->inc("notify_event_error", { channel => $channelName }); print STDERR "error processing message '$payload' on channel '$channelName': $@\n"; } } diff --git a/t/Config/hydra-notify.t b/t/Config/hydra-notify.t new file mode 100644 index 00000000..7fc8aa63 --- /dev/null +++ b/t/Config/hydra-notify.t @@ -0,0 +1,87 @@ +use strict; +use Setup; + +my %ctx = test_init(hydra_config => q| + + + listen_address = 127.0.0.1 + port = 9199 + + +|); + +require Hydra::Helper::Nix; +use Test2::V0; + +is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig(Hydra::Helper::Nix::getHydraConfig()), { + 'listen_address' => "127.0.0.1", + 'port' => 9199 +}, "Reading specific configuration from the hydra.conf works"); + + +is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({ + "hydra_notify" => ":)" +}), undef, "Invalid (hydra_notify is a string) configuration options are undef"); + +is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({ + "hydra_notify" => [] +}), undef, "Invalid (hydra_notify is a list) configuration options are undef"); + +is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({ + "hydra_notify" => {} +}), undef, "Invalid (hydra_notify is an empty hash) configuration options are undef"); + +is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({ + "hydra_notify" => { + "prometheus" => ":)" + } +}), undef, "Invalid (hydra_notify.prometheus is a string) configuration options are undef"); + +is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({ + "hydra_notify" => { + "prometheus" => {} + } +}), undef, "Invalid (hydra_notify.prometheus is an empty hash) configuration options are undef"); + +is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({ + "hydra_notify" => { + "prometheus" => { + "listen_address" => "0.0.0.0" + } + } +}), undef, "Invalid (hydra_notify.prometheus.port is missing) configuration options are undef"); + +is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({ + "hydra_notify" => { + "prometheus" => { + "port" => 1234 + } + } +}), undef, "Invalid (hydra_notify.prometheus.listen_address is missing) configuration options are undef"); + +is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({ + "hydra_notify" => { + "prometheus" => { + "listen_address" => "127.0.0.1", + "port" => 1234 + } + } +}), { + "listen_address" => "127.0.0.1", + "port" => 1234 +}, "Fully specified hydra_notify.prometheus config is valid and returned"); + +is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({ + "hydra_notify" => { + "prometheus" => { + "listen_address" => "127.0.0.1", + "port" => 1234, + "extra_keys" => "meh", + } + } +}), { + "listen_address" => "127.0.0.1", + "port" => 1234 +}, "extra configuration in hydra_notify.prometheus is not returned"); + +done_testing;