Merge pull request #1001 from DeterminateSystems/hydra-notify-prometheus

hydra-notify: export prometheus metrics
This commit is contained in:
Graham Christensen 2021-08-25 11:50:43 -04:00 committed by GitHub
commit da55838703
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 253 additions and 2 deletions

View file

@ -79,6 +79,22 @@ By default, Hydra will send stats to statsd at `localhost:8125`. Point Hydra to
</statsd> </statsd>
``` ```
hydra-notify's Prometheus service
---------------------------------
hydra-notify supports running a Prometheus webserver for metrics. The
exporter does not run unless a listen address and port are specified
in the hydra configuration file, as below:
```conf
<hydra_notify>
<prometheus>
listen_address = 127.0.0.1
port = 9199
</prometheus>
</hydra_notify>
```
Using LDAP as authentication backend (optional) Using LDAP as authentication backend (optional)
----------------------------------------------- -----------------------------------------------

View file

@ -13,3 +13,21 @@ $ curl --header "Accept: application/json" http://localhost:63333/queue-runner-s
... JSON payload ... ... JSON payload ...
``` ```
## Notification Daemon
The `hydra-notify` process can expose Prometheus metrics for plugin execution. See
[hydra-notify's Prometheus service](../configuration.md#hydra-notifys-prometheus-service)
for details on enabling and configuring the exporter.
The notification exporter exposes metrics on a per-plugin, per-event-type basis: execution
durations, frequency, successes, and failures.
### Diagnostic Dump
The notification daemon can also dump its metrics to stderr whether or not the exporter
is configured. This is particularly useful for cases where metrics data is needed but the
exporter was not enabled.
To trigger this diagnostic dump, send a Postgres notification with the
`hydra_notify_dump_metrics` channel and no payload. See
[Re-sending a notification](../notifications.md#re-sending-a-notification).

View file

@ -470,6 +470,7 @@
NetPrometheus NetPrometheus
NetStatsd NetStatsd
PadWalker PadWalker
ParallelForkManager
PerlCriticCommunity PerlCriticCommunity
PrometheusTinyShared PrometheusTinyShared
Readonly Readonly

View file

@ -19,6 +19,13 @@ if [ ! -f ./.hydra-data/hydra.conf ]; then
cat << EOF > .hydra-data/hydra.conf cat << EOF > .hydra-data/hydra.conf
# test-time instances likely don't want to bootstrap nixpkgs from scratch # test-time instances likely don't want to bootstrap nixpkgs from scratch
use-substitutes = true use-substitutes = true
<hydra_notify>
<prometheus>
listen_address = 127.0.0.1
port = 64445
</prometheus>
</hydra_notify>
EOF EOF
fi fi
HYDRA_CONFIG=$(pwd)/.hydra-data/hydra.conf exec hydra-dev-server --port 63333 HYDRA_CONFIG=$(pwd)/.hydra-data/hydra.conf exec hydra-dev-server --port 63333

View file

@ -70,6 +70,42 @@ sub getStatsdConfig {
} }
} }
sub getHydraNotifyPrometheusConfig {
my ($config) = @_;
my $cfg = $config->{hydra_notify};
if (!defined($cfg)) {
return undef;
}
if (ref $cfg ne "HASH") {
print STDERR "Error reading Hydra's configuration file: hydra_notify should be a block.\n";
return undef;
}
my $cfg = $cfg->{prometheus};
if (!defined($cfg)) {
return undef;
}
if (ref $cfg ne "HASH") {
print STDERR "Error reading Hydra's configuration file: hydra_notify.prometheus should be a block.\n";
return undef;
}
if (defined($cfg->{"listen_address"}) && defined($cfg->{"port"})) {
return {
"listen_address" => $cfg->{'listen_address'},
"port" => $cfg->{'port'},
};
} else {
print STDERR "Error reading Hydra's configuration file: hydra_notify.prometheus should include listen_address and port.\n";
return undef;
}
return undef;
}
sub getBaseUrl { sub getBaseUrl {
my ($config) = @_; my ($config) = @_;

View file

@ -3,24 +3,92 @@
use strict; use strict;
use utf8; use utf8;
use Getopt::Long; use Getopt::Long;
use HTTP::Server::PSGI;
use Hydra::Event; use Hydra::Event;
use Hydra::Event::BuildFinished; use Hydra::Event::BuildFinished;
use Hydra::Helper::AddBuilds; use Hydra::Helper::AddBuilds;
use Hydra::Helper::Nix; use Hydra::Helper::Nix;
use Hydra::Plugin; use Hydra::Plugin;
use Hydra::PostgresListener; use Hydra::PostgresListener;
use Parallel::ForkManager;
use Prometheus::Tiny::Shared;
use Time::HiRes qw( gettimeofday tv_interval );
STDERR->autoflush(1); STDERR->autoflush(1);
STDOUT->autoflush(1); STDOUT->autoflush(1);
binmode STDERR, ":encoding(utf8)"; binmode STDERR, ":encoding(utf8)";
my $config = getHydraConfig();
my $prom = Prometheus::Tiny::Shared->new;
# Note: It is very important to pre-declare any metrics before using them.
# Add a new declaration for any new metrics you create. Metrics which are
# not pre-declared disappear when their value is null. See:
# https://metacpan.org/pod/Prometheus::Tiny#declare
$prom->declare(
"notify_plugin_executions",
type => "counter",
help => "Number of times each plugin has been called by channel."
);
$prom->declare(
"notify_plugin_runtime",
type => "histogram",
help => "Number of seconds spent executing each plugin by channel."
);
$prom->declare(
"notify_plugin_success",
type => "counter",
help => "Number of successful executions of this plugin on this channel."
);
$prom->declare(
"notify_plugin_error",
type => "counter",
help => "Number of failed executions of this plugin on this channel."
);
$prom->declare(
"event_loop_iterations",
type => "counter",
help => "Number of iterations through the event loop. Incremented at the start of the event loop."
);
$prom->declare(
"event_received",
type => "counter",
help => "Timestamp of the last time a new event was received."
);
$prom->declare(
"notify_event",
type => "counter",
help => "Number of events received on the given channel."
);
$prom->declare(
"notify_event_error",
type => "counter",
help => "Number of events received that were unprocessable by channel."
);
my $promCfg = Hydra::Helper::Nix::getHydraNotifyPrometheusConfig($config);
if (defined($promCfg)) {
print STDERR "Starting the Prometheus exporter, listening on http://${\$promCfg->{'listen_address'}}:${\$promCfg->{'port'}}/metrics.\n";
my $fork_manager = Parallel::ForkManager->new(1);
$fork_manager->start_child("metrics_exporter", sub {
my $server = HTTP::Server::PSGI->new(
host => $promCfg->{"listen_address"},
port => $promCfg->{"port"},
timeout => 1,
);
$server->run($prom->psgi);
});
} else {
print STDERR "Not starting the hydra-notify Prometheus exporter.\n";
}
my $queued_only; my $queued_only;
GetOptions( GetOptions(
"queued-only" => \$queued_only "queued-only" => \$queued_only
) or exit 1; ) or exit 1;
my $config = getHydraConfig();
my $db = Hydra::Model::DB->new(); my $db = Hydra::Model::DB->new();
@ -32,15 +100,24 @@ my $listener = Hydra::PostgresListener->new($dbh);
$listener->subscribe("build_started"); $listener->subscribe("build_started");
$listener->subscribe("build_finished"); $listener->subscribe("build_finished");
$listener->subscribe("step_finished"); $listener->subscribe("step_finished");
$listener->subscribe("hydra_notify_dump_metrics");
sub runPluginsForEvent { sub runPluginsForEvent {
my ($event) = @_; my ($event) = @_;
my $channelName = $event->{'channel_name'};
foreach my $plugin (@plugins) { foreach my $plugin (@plugins) {
$prom->inc("notify_plugin_executions", { channel => $channelName, plugin => ref $plugin });
eval { eval {
my $startTime = [gettimeofday()];
$event->execute($db, $plugin); $event->execute($db, $plugin);
$prom->histogram_observe("notify_plugin_runtime", tv_interval($startTime), { channel => $channelName, plugin => ref $plugin });
$prom->inc("notify_plugin_success", { channel => $channelName, plugin => ref $plugin });
1; 1;
} or do { } or do {
$prom->inc("notify_plugin_error", { channel => $channelName, plugin => ref $plugin });
print STDERR "error running $event->{'channel_name'} hooks: $@\n"; print STDERR "error running $event->{'channel_name'} hooks: $@\n";
} }
} }
@ -60,19 +137,28 @@ for my $build ($db->resultset('Builds')->search(
# Process incoming notifications. # Process incoming notifications.
while (!$queued_only) { while (!$queued_only) {
$prom->inc("event_loop_iterations");
my $messages = $listener->block_for_messages(); my $messages = $listener->block_for_messages();
while (my $message = $messages->()) { while (my $message = $messages->()) {
$prom->set("event_received", time());
my $channelName = $message->{"channel"}; my $channelName = $message->{"channel"};
my $pid = $message->{"pid"}; my $pid = $message->{"pid"};
my $payload = $message->{"payload"}; my $payload = $message->{"payload"};
$prom->inc("notify_event", { channel => $channelName });
if ($channelName eq "hydra_notify_dump_metrics") {
print STDERR "Dumping prometheus metrics:\n${\$prom->format}\n";
next;
}
eval { eval {
my $event = Hydra::Event->new_event($channelName, $message->{"payload"}); my $event = Hydra::Event->new_event($channelName, $message->{"payload"});
runPluginsForEvent($event); runPluginsForEvent($event);
1; 1;
} or do { } or do {
$prom->inc("notify_event_error", { channel => $channelName });
print STDERR "error processing message '$payload' on channel '$channelName': $@\n"; print STDERR "error processing message '$payload' on channel '$channelName': $@\n";
} }
} }

87
t/Config/hydra-notify.t Normal file
View file

@ -0,0 +1,87 @@
use strict;
use Setup;
my %ctx = test_init(hydra_config => q|
<hydra_notify>
<prometheus>
listen_address = 127.0.0.1
port = 9199
</prometheus>
</hydra_notify>
|);
require Hydra::Helper::Nix;
use Test2::V0;
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig(Hydra::Helper::Nix::getHydraConfig()), {
'listen_address' => "127.0.0.1",
'port' => 9199
}, "Reading specific configuration from the hydra.conf works");
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
"hydra_notify" => ":)"
}), undef, "Invalid (hydra_notify is a string) configuration options are undef");
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
"hydra_notify" => []
}), undef, "Invalid (hydra_notify is a list) configuration options are undef");
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
"hydra_notify" => {}
}), undef, "Invalid (hydra_notify is an empty hash) configuration options are undef");
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
"hydra_notify" => {
"prometheus" => ":)"
}
}), undef, "Invalid (hydra_notify.prometheus is a string) configuration options are undef");
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
"hydra_notify" => {
"prometheus" => {}
}
}), undef, "Invalid (hydra_notify.prometheus is an empty hash) configuration options are undef");
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
"hydra_notify" => {
"prometheus" => {
"listen_address" => "0.0.0.0"
}
}
}), undef, "Invalid (hydra_notify.prometheus.port is missing) configuration options are undef");
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
"hydra_notify" => {
"prometheus" => {
"port" => 1234
}
}
}), undef, "Invalid (hydra_notify.prometheus.listen_address is missing) configuration options are undef");
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
"hydra_notify" => {
"prometheus" => {
"listen_address" => "127.0.0.1",
"port" => 1234
}
}
}), {
"listen_address" => "127.0.0.1",
"port" => 1234
}, "Fully specified hydra_notify.prometheus config is valid and returned");
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
"hydra_notify" => {
"prometheus" => {
"listen_address" => "127.0.0.1",
"port" => 1234,
"extra_keys" => "meh",
}
}
}), {
"listen_address" => "127.0.0.1",
"port" => 1234
}, "extra configuration in hydra_notify.prometheus is not returned");
done_testing;