Merge pull request #1001 from DeterminateSystems/hydra-notify-prometheus
hydra-notify: export prometheus metrics
This commit is contained in:
commit
da55838703
|
@ -79,6 +79,22 @@ By default, Hydra will send stats to statsd at `localhost:8125`. Point Hydra to
|
||||||
</statsd>
|
</statsd>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
hydra-notify's Prometheus service
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
hydra-notify supports running a Prometheus webserver for metrics. The
|
||||||
|
exporter does not run unless a listen address and port are specified
|
||||||
|
in the hydra configuration file, as below:
|
||||||
|
|
||||||
|
```conf
|
||||||
|
<hydra_notify>
|
||||||
|
<prometheus>
|
||||||
|
listen_address = 127.0.0.1
|
||||||
|
port = 9199
|
||||||
|
</prometheus>
|
||||||
|
</hydra_notify>
|
||||||
|
```
|
||||||
|
|
||||||
Using LDAP as authentication backend (optional)
|
Using LDAP as authentication backend (optional)
|
||||||
-----------------------------------------------
|
-----------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -13,3 +13,21 @@ $ curl --header "Accept: application/json" http://localhost:63333/queue-runner-s
|
||||||
... JSON payload ...
|
... JSON payload ...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Notification Daemon
|
||||||
|
|
||||||
|
The `hydra-notify` process can expose Prometheus metrics for plugin execution. See
|
||||||
|
[hydra-notify's Prometheus service](../configuration.md#hydra-notifys-prometheus-service)
|
||||||
|
for details on enabling and configuring the exporter.
|
||||||
|
|
||||||
|
The notification exporter exposes metrics on a per-plugin, per-event-type basis: execution
|
||||||
|
durations, frequency, successes, and failures.
|
||||||
|
|
||||||
|
### Diagnostic Dump
|
||||||
|
|
||||||
|
The notification daemon can also dump its metrics to stderr whether or not the exporter
|
||||||
|
is configured. This is particularly useful for cases where metrics data is needed but the
|
||||||
|
exporter was not enabled.
|
||||||
|
|
||||||
|
To trigger this diagnostic dump, send a Postgres notification with the
|
||||||
|
`hydra_notify_dump_metrics` channel and no payload. See
|
||||||
|
[Re-sending a notification](../notifications.md#re-sending-a-notification).
|
||||||
|
|
|
@ -470,6 +470,7 @@
|
||||||
NetPrometheus
|
NetPrometheus
|
||||||
NetStatsd
|
NetStatsd
|
||||||
PadWalker
|
PadWalker
|
||||||
|
ParallelForkManager
|
||||||
PerlCriticCommunity
|
PerlCriticCommunity
|
||||||
PrometheusTinyShared
|
PrometheusTinyShared
|
||||||
Readonly
|
Readonly
|
||||||
|
|
|
@ -19,6 +19,13 @@ if [ ! -f ./.hydra-data/hydra.conf ]; then
|
||||||
cat << EOF > .hydra-data/hydra.conf
|
cat << EOF > .hydra-data/hydra.conf
|
||||||
# test-time instances likely don't want to bootstrap nixpkgs from scratch
|
# test-time instances likely don't want to bootstrap nixpkgs from scratch
|
||||||
use-substitutes = true
|
use-substitutes = true
|
||||||
|
|
||||||
|
<hydra_notify>
|
||||||
|
<prometheus>
|
||||||
|
listen_address = 127.0.0.1
|
||||||
|
port = 64445
|
||||||
|
</prometheus>
|
||||||
|
</hydra_notify>
|
||||||
EOF
|
EOF
|
||||||
fi
|
fi
|
||||||
HYDRA_CONFIG=$(pwd)/.hydra-data/hydra.conf exec hydra-dev-server --port 63333
|
HYDRA_CONFIG=$(pwd)/.hydra-data/hydra.conf exec hydra-dev-server --port 63333
|
||||||
|
|
|
@ -70,6 +70,42 @@ sub getStatsdConfig {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub getHydraNotifyPrometheusConfig {
|
||||||
|
my ($config) = @_;
|
||||||
|
my $cfg = $config->{hydra_notify};
|
||||||
|
|
||||||
|
if (!defined($cfg)) {
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ref $cfg ne "HASH") {
|
||||||
|
print STDERR "Error reading Hydra's configuration file: hydra_notify should be a block.\n";
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $cfg = $cfg->{prometheus};
|
||||||
|
if (!defined($cfg)) {
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ref $cfg ne "HASH") {
|
||||||
|
print STDERR "Error reading Hydra's configuration file: hydra_notify.prometheus should be a block.\n";
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (defined($cfg->{"listen_address"}) && defined($cfg->{"port"})) {
|
||||||
|
return {
|
||||||
|
"listen_address" => $cfg->{'listen_address'},
|
||||||
|
"port" => $cfg->{'port'},
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
print STDERR "Error reading Hydra's configuration file: hydra_notify.prometheus should include listen_address and port.\n";
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
sub getBaseUrl {
|
sub getBaseUrl {
|
||||||
my ($config) = @_;
|
my ($config) = @_;
|
||||||
|
|
|
@ -3,24 +3,92 @@
|
||||||
use strict;
|
use strict;
|
||||||
use utf8;
|
use utf8;
|
||||||
use Getopt::Long;
|
use Getopt::Long;
|
||||||
|
use HTTP::Server::PSGI;
|
||||||
use Hydra::Event;
|
use Hydra::Event;
|
||||||
use Hydra::Event::BuildFinished;
|
use Hydra::Event::BuildFinished;
|
||||||
use Hydra::Helper::AddBuilds;
|
use Hydra::Helper::AddBuilds;
|
||||||
use Hydra::Helper::Nix;
|
use Hydra::Helper::Nix;
|
||||||
use Hydra::Plugin;
|
use Hydra::Plugin;
|
||||||
use Hydra::PostgresListener;
|
use Hydra::PostgresListener;
|
||||||
|
use Parallel::ForkManager;
|
||||||
|
use Prometheus::Tiny::Shared;
|
||||||
|
use Time::HiRes qw( gettimeofday tv_interval );
|
||||||
|
|
||||||
STDERR->autoflush(1);
|
STDERR->autoflush(1);
|
||||||
STDOUT->autoflush(1);
|
STDOUT->autoflush(1);
|
||||||
binmode STDERR, ":encoding(utf8)";
|
binmode STDERR, ":encoding(utf8)";
|
||||||
|
|
||||||
|
my $config = getHydraConfig();
|
||||||
|
|
||||||
|
my $prom = Prometheus::Tiny::Shared->new;
|
||||||
|
# Note: It is very important to pre-declare any metrics before using them.
|
||||||
|
# Add a new declaration for any new metrics you create. Metrics which are
|
||||||
|
# not pre-declared disappear when their value is null. See:
|
||||||
|
# https://metacpan.org/pod/Prometheus::Tiny#declare
|
||||||
|
$prom->declare(
|
||||||
|
"notify_plugin_executions",
|
||||||
|
type => "counter",
|
||||||
|
help => "Number of times each plugin has been called by channel."
|
||||||
|
);
|
||||||
|
$prom->declare(
|
||||||
|
"notify_plugin_runtime",
|
||||||
|
type => "histogram",
|
||||||
|
help => "Number of seconds spent executing each plugin by channel."
|
||||||
|
);
|
||||||
|
$prom->declare(
|
||||||
|
"notify_plugin_success",
|
||||||
|
type => "counter",
|
||||||
|
help => "Number of successful executions of this plugin on this channel."
|
||||||
|
);
|
||||||
|
$prom->declare(
|
||||||
|
"notify_plugin_error",
|
||||||
|
type => "counter",
|
||||||
|
help => "Number of failed executions of this plugin on this channel."
|
||||||
|
);
|
||||||
|
$prom->declare(
|
||||||
|
"event_loop_iterations",
|
||||||
|
type => "counter",
|
||||||
|
help => "Number of iterations through the event loop. Incremented at the start of the event loop."
|
||||||
|
);
|
||||||
|
$prom->declare(
|
||||||
|
"event_received",
|
||||||
|
type => "counter",
|
||||||
|
help => "Timestamp of the last time a new event was received."
|
||||||
|
);
|
||||||
|
$prom->declare(
|
||||||
|
"notify_event",
|
||||||
|
type => "counter",
|
||||||
|
help => "Number of events received on the given channel."
|
||||||
|
);
|
||||||
|
$prom->declare(
|
||||||
|
"notify_event_error",
|
||||||
|
type => "counter",
|
||||||
|
help => "Number of events received that were unprocessable by channel."
|
||||||
|
);
|
||||||
|
|
||||||
|
my $promCfg = Hydra::Helper::Nix::getHydraNotifyPrometheusConfig($config);
|
||||||
|
if (defined($promCfg)) {
|
||||||
|
print STDERR "Starting the Prometheus exporter, listening on http://${\$promCfg->{'listen_address'}}:${\$promCfg->{'port'}}/metrics.\n";
|
||||||
|
my $fork_manager = Parallel::ForkManager->new(1);
|
||||||
|
$fork_manager->start_child("metrics_exporter", sub {
|
||||||
|
my $server = HTTP::Server::PSGI->new(
|
||||||
|
host => $promCfg->{"listen_address"},
|
||||||
|
port => $promCfg->{"port"},
|
||||||
|
timeout => 1,
|
||||||
|
);
|
||||||
|
|
||||||
|
$server->run($prom->psgi);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
print STDERR "Not starting the hydra-notify Prometheus exporter.\n";
|
||||||
|
}
|
||||||
|
|
||||||
my $queued_only;
|
my $queued_only;
|
||||||
|
|
||||||
GetOptions(
|
GetOptions(
|
||||||
"queued-only" => \$queued_only
|
"queued-only" => \$queued_only
|
||||||
) or exit 1;
|
) or exit 1;
|
||||||
|
|
||||||
my $config = getHydraConfig();
|
|
||||||
|
|
||||||
my $db = Hydra::Model::DB->new();
|
my $db = Hydra::Model::DB->new();
|
||||||
|
|
||||||
|
@ -32,15 +100,24 @@ my $listener = Hydra::PostgresListener->new($dbh);
|
||||||
$listener->subscribe("build_started");
|
$listener->subscribe("build_started");
|
||||||
$listener->subscribe("build_finished");
|
$listener->subscribe("build_finished");
|
||||||
$listener->subscribe("step_finished");
|
$listener->subscribe("step_finished");
|
||||||
|
$listener->subscribe("hydra_notify_dump_metrics");
|
||||||
|
|
||||||
sub runPluginsForEvent {
|
sub runPluginsForEvent {
|
||||||
my ($event) = @_;
|
my ($event) = @_;
|
||||||
|
|
||||||
|
my $channelName = $event->{'channel_name'};
|
||||||
|
|
||||||
foreach my $plugin (@plugins) {
|
foreach my $plugin (@plugins) {
|
||||||
|
$prom->inc("notify_plugin_executions", { channel => $channelName, plugin => ref $plugin });
|
||||||
eval {
|
eval {
|
||||||
|
my $startTime = [gettimeofday()];
|
||||||
$event->execute($db, $plugin);
|
$event->execute($db, $plugin);
|
||||||
|
|
||||||
|
$prom->histogram_observe("notify_plugin_runtime", tv_interval($startTime), { channel => $channelName, plugin => ref $plugin });
|
||||||
|
$prom->inc("notify_plugin_success", { channel => $channelName, plugin => ref $plugin });
|
||||||
1;
|
1;
|
||||||
} or do {
|
} or do {
|
||||||
|
$prom->inc("notify_plugin_error", { channel => $channelName, plugin => ref $plugin });
|
||||||
print STDERR "error running $event->{'channel_name'} hooks: $@\n";
|
print STDERR "error running $event->{'channel_name'} hooks: $@\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -60,19 +137,28 @@ for my $build ($db->resultset('Builds')->search(
|
||||||
|
|
||||||
# Process incoming notifications.
|
# Process incoming notifications.
|
||||||
while (!$queued_only) {
|
while (!$queued_only) {
|
||||||
|
$prom->inc("event_loop_iterations");
|
||||||
my $messages = $listener->block_for_messages();
|
my $messages = $listener->block_for_messages();
|
||||||
while (my $message = $messages->()) {
|
while (my $message = $messages->()) {
|
||||||
|
$prom->set("event_received", time());
|
||||||
my $channelName = $message->{"channel"};
|
my $channelName = $message->{"channel"};
|
||||||
my $pid = $message->{"pid"};
|
my $pid = $message->{"pid"};
|
||||||
my $payload = $message->{"payload"};
|
my $payload = $message->{"payload"};
|
||||||
|
|
||||||
|
$prom->inc("notify_event", { channel => $channelName });
|
||||||
|
|
||||||
|
if ($channelName eq "hydra_notify_dump_metrics") {
|
||||||
|
print STDERR "Dumping prometheus metrics:\n${\$prom->format}\n";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
eval {
|
eval {
|
||||||
my $event = Hydra::Event->new_event($channelName, $message->{"payload"});
|
my $event = Hydra::Event->new_event($channelName, $message->{"payload"});
|
||||||
runPluginsForEvent($event);
|
runPluginsForEvent($event);
|
||||||
|
|
||||||
1;
|
1;
|
||||||
} or do {
|
} or do {
|
||||||
|
$prom->inc("notify_event_error", { channel => $channelName });
|
||||||
print STDERR "error processing message '$payload' on channel '$channelName': $@\n";
|
print STDERR "error processing message '$payload' on channel '$channelName': $@\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
87
t/Config/hydra-notify.t
Normal file
87
t/Config/hydra-notify.t
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
use strict;
|
||||||
|
use Setup;
|
||||||
|
|
||||||
|
my %ctx = test_init(hydra_config => q|
|
||||||
|
<hydra_notify>
|
||||||
|
<prometheus>
|
||||||
|
listen_address = 127.0.0.1
|
||||||
|
port = 9199
|
||||||
|
</prometheus>
|
||||||
|
</hydra_notify>
|
||||||
|
|);
|
||||||
|
|
||||||
|
require Hydra::Helper::Nix;
|
||||||
|
use Test2::V0;
|
||||||
|
|
||||||
|
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig(Hydra::Helper::Nix::getHydraConfig()), {
|
||||||
|
'listen_address' => "127.0.0.1",
|
||||||
|
'port' => 9199
|
||||||
|
}, "Reading specific configuration from the hydra.conf works");
|
||||||
|
|
||||||
|
|
||||||
|
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
|
||||||
|
"hydra_notify" => ":)"
|
||||||
|
}), undef, "Invalid (hydra_notify is a string) configuration options are undef");
|
||||||
|
|
||||||
|
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
|
||||||
|
"hydra_notify" => []
|
||||||
|
}), undef, "Invalid (hydra_notify is a list) configuration options are undef");
|
||||||
|
|
||||||
|
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
|
||||||
|
"hydra_notify" => {}
|
||||||
|
}), undef, "Invalid (hydra_notify is an empty hash) configuration options are undef");
|
||||||
|
|
||||||
|
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
|
||||||
|
"hydra_notify" => {
|
||||||
|
"prometheus" => ":)"
|
||||||
|
}
|
||||||
|
}), undef, "Invalid (hydra_notify.prometheus is a string) configuration options are undef");
|
||||||
|
|
||||||
|
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
|
||||||
|
"hydra_notify" => {
|
||||||
|
"prometheus" => {}
|
||||||
|
}
|
||||||
|
}), undef, "Invalid (hydra_notify.prometheus is an empty hash) configuration options are undef");
|
||||||
|
|
||||||
|
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
|
||||||
|
"hydra_notify" => {
|
||||||
|
"prometheus" => {
|
||||||
|
"listen_address" => "0.0.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}), undef, "Invalid (hydra_notify.prometheus.port is missing) configuration options are undef");
|
||||||
|
|
||||||
|
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
|
||||||
|
"hydra_notify" => {
|
||||||
|
"prometheus" => {
|
||||||
|
"port" => 1234
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}), undef, "Invalid (hydra_notify.prometheus.listen_address is missing) configuration options are undef");
|
||||||
|
|
||||||
|
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
|
||||||
|
"hydra_notify" => {
|
||||||
|
"prometheus" => {
|
||||||
|
"listen_address" => "127.0.0.1",
|
||||||
|
"port" => 1234
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}), {
|
||||||
|
"listen_address" => "127.0.0.1",
|
||||||
|
"port" => 1234
|
||||||
|
}, "Fully specified hydra_notify.prometheus config is valid and returned");
|
||||||
|
|
||||||
|
is(Hydra::Helper::Nix::getHydraNotifyPrometheusConfig({
|
||||||
|
"hydra_notify" => {
|
||||||
|
"prometheus" => {
|
||||||
|
"listen_address" => "127.0.0.1",
|
||||||
|
"port" => 1234,
|
||||||
|
"extra_keys" => "meh",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}), {
|
||||||
|
"listen_address" => "127.0.0.1",
|
||||||
|
"port" => 1234
|
||||||
|
}, "extra configuration in hydra_notify.prometheus is not returned");
|
||||||
|
|
||||||
|
done_testing;
|
Loading…
Reference in a new issue