Compare commits

...

16 commits

Author SHA1 Message Date
emily 8d2a367e92 grafana-agent: make bagel.monitoring.grafana-agent.exporters an attrset
This allows us to use multiple jobs, one for each additional exporter,
and set their `job_name` accordingly.

`job_name` is exported as `job` label on the resulting metrics.
This allows us to quickly get an understanding what metrics of an
exporter are actually available by simply filtering all metrics by
`{job="$jobname"}`
2024-07-08 09:34:26 +03:00
emily db8c831c2f grafana-agent: set hostname label on all metrics
This is handy to quickly see all metrics exported by a node, without
having to mangle with the already existing `instance` label.

`hostname` is essentially a variant of `instance` but without ports.
2024-07-08 09:34:26 +03:00
Ilya K ba0d50624d Switch to push metrics with Grafana Agent 2024-07-08 09:34:24 +03:00
Ilya K 40ba3c4ae7 Prepare for remote push metrics 2024-07-08 09:33:59 +03:00
Ilya K 346a74eabc Wire up Grafana to Alertmanager 2024-07-08 09:33:59 +03:00
Ilya K e8e262c6a4 Enable Mimir Alertmanager, add example alert
Still TODO: actually connect it to Matrix
2024-07-08 09:33:59 +03:00
Pierre Bourdon 5ebd71e4d5
tf/hydra: change Hydra URL 2024-07-08 00:01:24 +02:00
Pierre Bourdon 2700ac5efc
tf/dns: fix hydra CNAME 2024-07-08 00:01:14 +02:00
Pierre Bourdon caa1fce74e
hydra: move to hydra.forkos.org 2024-07-07 23:53:21 +02:00
Pierre Bourdon 5f8228536c
bagel-box: switch to forkos.org DNS root 2024-07-07 23:52:40 +02:00
Pierre Bourdon 078f298b8c
tf/dns: add bagel-box and hydra 2024-07-07 23:48:23 +02:00
Pierre Bourdon 4b0a2cd7e5
tf: add DNS management via Gandi 2024-07-07 20:43:05 +02:00
Pierre Bourdon dcd5f68545
tf: store hydra credentials in state via numtide/secret 2024-07-07 19:18:30 +02:00
Pierre Bourdon 7c6780a2a3
gitignore: add terraform lock file 2024-07-07 19:18:30 +02:00
Pierre Bourdon dd72904bf1
flake: replace tf wrappers with a single '.#tf' command 2024-07-07 19:18:30 +02:00
Pierre Bourdon 2e9483936e
tf/hydra: fix project owner to use an automation account 2024-07-07 18:44:17 +02:00
27 changed files with 319 additions and 246 deletions

1
.gitignore vendored
View file

@ -3,3 +3,4 @@ result
config.tf.json config.tf.json
.direnv .direnv
.terraform .terraform
.terraform.lock.hcl

View file

@ -31,9 +31,10 @@
terraform = pkgs.opentofu; terraform = pkgs.opentofu;
terraformCfg = terranix.lib.terranixConfiguration { terraformCfg = terranix.lib.terranixConfiguration {
inherit system; inherit system;
modules = [ modules = [
./terraform ./terraform
{ {
bagel.gandi.enable = true;
bagel.hydra.enable = true; bagel.hydra.enable = true;
} }
]; ];
@ -41,38 +42,16 @@
in in
{ {
apps.${system} = { apps.${system} = {
apply = { tf = {
type = "app"; type = "app";
program = toString (pkgs.writers.writeBash "apply" '' program = toString (pkgs.writers.writeBash "tf" ''
set -eo pipefail set -eo pipefail
rm -f config.tf.json ln -snf ${terraformCfg} config.tf.json
cp ${terraformCfg} config.tf.json exec ${lib.getExe terraform} "$@"
${lib.getExe terraform} init
${lib.getExe terraform} apply
''); '');
}; };
plan = {
type = "app";
program = toString (pkgs.writers.writeBash "plan" ''
set -eo pipefail
rm -f config.tf.json
cp ${terraformCfg} config.tf.json
${lib.getExe terraform} init
${lib.getExe terraform} plan
'');
};
# nix run ".#destroy"
destroy = {
type = "app";
program = toString (pkgs.writers.writeBash "destroy" ''
set -eo pipefail
ln -snf ${terraformCfg} config.tf.json
${lib.getExe terraform} init
${lib.getExe terraform} destroy
'');
};
default = self.apps.${system}.apply; default = self.apps.${system}.tf;
}; };
devShells.${system}.default = pkgs.mkShell { devShells.${system}.default = pkgs.mkShell {

View file

@ -40,12 +40,11 @@
hydra.enable = true; hydra.enable = true;
hydra.dbi = "dbi:Pg:dbname=hydra;user=hydra"; hydra.dbi = "dbi:Pg:dbname=hydra;user=hydra";
}; };
bagel.meta.monitoring.address = "bagel-box.delroth.net";
security.acme.acceptTerms = true; security.acme.acceptTerms = true;
security.acme.defaults.email = "bagel@delroth.net"; security.acme.defaults.email = "infra@forkos.org";
services.openssh.enable = true; services.openssh.enable = true;
deployment.targetHost = "bagel-box.delroth.net"; deployment.targetHost = "bagel-box.infra.forkos.org";
} }

View file

@ -24,7 +24,6 @@
}; };
}; };
}; };
bagel.meta.monitoring.address = "gerrit01.infra.forkos.org";
fileSystems."/gerrit-data" = { fileSystems."/gerrit-data" = {
device = "/dev/disk/by-uuid/d1062305-0dea-4740-9a27-b6b1691862a4"; device = "/dev/disk/by-uuid/d1062305-0dea-4740-9a27-b6b1691862a4";

View file

@ -24,8 +24,6 @@
}; };
}; };
bagel.meta.monitoring.address = "fodwatch.infra.forkos.org";
i18n.defaultLocale = "en_US.UTF-8"; i18n.defaultLocale = "en_US.UTF-8";
system.stateVersion = "24.05"; system.stateVersion = "24.05";

View file

@ -21,7 +21,6 @@
enable = true; enable = true;
domain = "netbox.forkos.org"; domain = "netbox.forkos.org";
}; };
bagel.meta.monitoring.address = "meta01.infra.forkos.org";
bagel.services.prometheus.enable = true; bagel.services.prometheus.enable = true;
bagel.services.loki.enable = true; bagel.services.loki.enable = true;
bagel.services.grafana.enable = true; bagel.services.grafana.enable = true;

View file

@ -42,10 +42,10 @@ in {
port = port; port = port;
dbi = cfg.dbi; dbi = cfg.dbi;
hydraURL = "https://hydra.bagel.delroth.net"; hydraURL = "https://hydra.forkos.org";
useSubstitutes = false; useSubstitutes = false;
notificationSender = "bagel@delroth.net"; notificationSender = "hydra@forkos.org";
# XXX: hydra overlay sets pkgs.hydra, but hydra's nixos module uses # XXX: hydra overlay sets pkgs.hydra, but hydra's nixos module uses
# pkgs.hydra_unstable... # pkgs.hydra_unstable...
@ -108,7 +108,7 @@ in {
worker_processes auto; worker_processes auto;
''; '';
virtualHosts."hydra.bagel.delroth.net" = { virtualHosts."hydra.forkos.org" = {
forceSSL = true; forceSSL = true;
enableACME = true; enableACME = true;

View file

@ -0,0 +1,105 @@
{
config,
lib,
...
}:
let
cfg = config.bagel.monitoring.grafana-agent;
inherit (lib) mkEnableOption mkOption mkIf types;
passwordAsCredential = "\${CREDENTIALS_DIRECTORY}/password";
in
{
options.bagel.monitoring.grafana-agent = {
enable = (mkEnableOption "Grafana Agent") // { default = true; };
exporters = mkOption {
description = ''
Set of additional exporters to scrape.
The attribute name will be used as `job_name`
internally, which ends up exported as `job` label
on all metrics of that exporter.
'';
type = types.attrsOf (types.submodule {
options.port = mkOption {
description = "Exporter port";
type = types.int;
};
});
default = {};
};
};
config = mkIf cfg.enable {
age.secrets.grafana-agent-password.file = ../../secrets/metrics-push-password.age;
services.grafana-agent = {
enable = true;
credentials.password = config.age.secrets.grafana-agent-password.path;
settings = {
metrics = {
global.remote_write = [
{
url = "https://mimir.forkos.org/api/v1/push";
basic_auth = {
username = "promtail";
password_file = passwordAsCredential;
};
}
];
global.external_labels.hostname = config.networking.hostName;
configs = [
{
name = config.networking.hostName;
scrape_configs = lib.mapAttrsToList (name: value: {
job_name = name;
static_configs = [
{ targets = "localhost:" + (toString value.port); }
];
}) config.bagel.monitoring.grafana-agent.exporters;
}
];
};
logs = {
global.clients = [
{
url = "https://loki.forkos.org/loki/api/v1/push";
basic_auth = {
username = "promtail";
password_file = passwordAsCredential;
};
}
];
configs = [
{
name = "journald";
scrape_configs = [
{
job_name = "system";
journal = {
max_age = "12h";
labels = {
job = "systemd-journal";
host = config.networking.hostName;
};
};
relabel_configs = [
{
source_labels = [ "__journal__systemd_unit" ];
target_label = "unit";
}
];
}
];
}
];
positions_directory = "\${STATE_DIRECTORY}/positions";
};
integrations.node_exporter.enable_collectors = [
"processes"
"systemd"
];
};
};
};
}

View file

@ -2,6 +2,6 @@
imports = [ imports = [
./exporters ./exporters
./lgtm ./lgtm
./promtail.nix ./agent.nix
]; ];
} }

View file

@ -17,6 +17,6 @@ in
listenAddress = "0.0.0.0"; listenAddress = "0.0.0.0";
}; };
bagel.meta.monitoring.exporters = [ { port = 9102; } ]; bagel.monitoring.grafana-agent.exporters.cadvisor.port = 9102;
}; };
} }

View file

@ -1,37 +1,7 @@
{
config,
lib,
...
}:
let
inherit (lib) mkOption types;
in
{ {
imports = [ imports = [
./cadvisor.nix ./cadvisor.nix
./node.nix
./nginx.nix ./nginx.nix
./postgres.nix ./postgres.nix
]; ];
options.bagel = {
meta.monitoring = {
address = mkOption {
description = "Node's public address";
type = types.str;
};
exporters = mkOption {
description = "List of all exporters to scrape";
type = types.listOf (types.submodule {
options.port = mkOption {
description = "Exporter port";
type = types.int;
};
});
default = [];
};
};
};
config.networking.firewall.allowedTCPPorts = map (e: e.port) config.bagel.meta.monitoring.exporters;
} }

View file

@ -30,8 +30,6 @@ in
]; ];
}; };
bagel.meta.monitoring.exporters = [ bagel.monitoring.grafana-agent.exporters.nginxlog.port = 9103;
{ port = 9103; }
];
}; };
} }

View file

@ -1,25 +0,0 @@
{
config,
lib,
...
}:
let
cfg = config.bagel.monitoring.exporters.node;
inherit (lib) mkEnableOption mkIf;
in
{
options.bagel.monitoring.exporters.node.enable = (mkEnableOption "Standard node_exporter") // { default = true; };
config = mkIf cfg.enable {
services.prometheus.exporters.node = {
enable = true;
enabledCollectors = [
"processes"
"systemd"
];
port = 9101;
};
bagel.meta.monitoring.exporters = [ { port = 9101; } ];
};
}

View file

@ -24,8 +24,6 @@ in
services.postgresql.settings.shared_preload_libraries = "pg_stat_statements"; services.postgresql.settings.shared_preload_libraries = "pg_stat_statements";
bagel.meta.monitoring.exporters = [ bagel.monitoring.grafana-agent.exporters.postgres.port = 9104;
{ port = 9104; }
];
}; };
} }

View file

@ -0,0 +1,5 @@
groups:
- name: Demo alerts
rules:
- alert: Demo alert
expr: 1

View file

@ -2,6 +2,6 @@
imports = [ imports = [
./grafana.nix ./grafana.nix
./loki.nix ./loki.nix
./prometheus.nix ./mimir.nix
]; ];
} }

View file

@ -92,6 +92,7 @@ in
uid = "mimir"; uid = "mimir";
access = "proxy"; access = "proxy";
url = "http://127.0.0.1:9009/prometheus"; url = "http://127.0.0.1:9009/prometheus";
isDefault = true;
} }
{ {
name = "Loki"; name = "Loki";
@ -100,6 +101,17 @@ in
access = "proxy"; access = "proxy";
url = "http://127.0.0.1:9090/"; url = "http://127.0.0.1:9090/";
} }
{
name = "Mimir Alertmanager";
type = "alertmanager";
uid = "mimir-alertmanager";
access = "proxy";
url = "http://127.0.0.1:9009/";
jsonData = {
handleGrafanaManagedAlerts = true;
implementation = "mimir";
};
}
]; ];
}; };
}; };

View file

@ -12,8 +12,8 @@ in
config = mkIf cfg.enable { config = mkIf cfg.enable {
age.secrets = { age.secrets = {
loki-htpasswd = { metrics-push-htpasswd = {
file = ../../../secrets/loki-htpasswd.age; file = ../../../secrets/metrics-push-htpasswd.age;
owner = "nginx"; owner = "nginx";
}; };
loki-environment.file = ../../../secrets/loki-environment.age; loki-environment.file = ../../../secrets/loki-environment.age;
@ -93,7 +93,7 @@ in
forceSSL = true; forceSSL = true;
locations."/loki/api/v1/push" = { locations."/loki/api/v1/push" = {
proxyPass = "http://localhost:${toString config.services.loki.configuration.server.http_listen_port}"; proxyPass = "http://localhost:${toString config.services.loki.configuration.server.http_listen_port}";
basicAuthFile = config.age.secrets.loki-htpasswd.path; basicAuthFile = config.age.secrets.metrics-push-htpasswd.path;
}; };
}; };
}; };

View file

@ -0,0 +1,92 @@
{
config,
lib,
pkgs,
...
}:
let
cfg = config.bagel.services.prometheus;
inherit (lib) mkEnableOption mkIf;
mimirPort = config.services.mimir.configuration.server.http_listen_port;
in
{
options.bagel.services.prometheus.enable = mkEnableOption "Prometheus scraper";
config = mkIf cfg.enable {
age.secrets = {
metrics-push-htpasswd = {
file = ../../../secrets/metrics-push-htpasswd.age;
owner = "nginx";
};
mimir-environment.file = ../../../secrets/mimir-environment.age;
};
services.mimir = {
enable = true;
extraFlags = ["--config.expand-env=true"];
configuration = {
target = "all,alertmanager";
multitenancy_enabled = false;
common.storage = {
backend = "s3";
s3 = {
endpoint = "s3.delroth.net";
bucket_name = "bagel-mimir";
secret_access_key = "\${S3_KEY}"; # This is a secret injected via an environment variable
access_key_id = "\${S3_KEY_ID}";
};
};
server = {
http_listen_port = 9009;
grpc_server_max_recv_msg_size = 104857600;
grpc_server_max_send_msg_size = 104857600;
grpc_server_max_concurrent_streams = 1000;
};
ingester.ring.replication_factor = 1;
blocks_storage.backend = "s3";
ruler_storage = {
backend = "local";
local.directory = pkgs.runCommand "mimir-rules" {} ''
mkdir -p $out
ln -s ${./alerts} $out/anonymous
'';
};
alertmanager = {
sharding_ring.replication_factor = 1;
fallback_config_file = pkgs.writers.writeYAML "alertmanager.yaml" {
route = {
group_by = ["alertname"];
receiver = "matrix";
};
receivers = [
{
name = "matrix";
}
];
};
};
alertmanager_storage.backend = "filesystem";
ruler.alertmanager_url = "http://localhost:${toString mimirPort}/alertmanager";
};
};
systemd.services.mimir.serviceConfig.EnvironmentFile = [ config.age.secrets.mimir-environment.path ];
services.nginx.virtualHosts."mimir.forkos.org" = {
enableACME = true;
forceSSL = true;
locations."/api/v1/push" = {
proxyPass = "http://localhost:${toString mimirPort}";
basicAuthFile = config.age.secrets.metrics-push-htpasswd.path;
};
};
};
}

View file

@ -1,83 +0,0 @@
{
config,
lib,
nodes,
...
}:
let
cfg = config.bagel.services.prometheus;
inherit (lib) mkEnableOption mkIf;
forEachMachine = fn: map fn (builtins.attrValues nodes);
allMetas = forEachMachine (machine: {
name = machine.config.networking.hostName;
address = machine.config.bagel.meta.monitoring.address or null;
exporters = machine.config.bagel.meta.monitoring.exporters or [];
});
scrapableMetas = builtins.filter (m: m.address != null && m.exporters != []) allMetas;
toJobConfig = m: {
job_name = m.name;
static_configs = [
{ targets = map (e: m.address + ":" + (toString e.port)) m.exporters; }
];
};
jobConfigs = map toJobConfig scrapableMetas;
in
{
options.bagel.services.prometheus.enable = mkEnableOption "Prometheus scraper";
config = mkIf cfg.enable {
age.secrets.mimir-environment.file = ../../../secrets/mimir-environment.age;
services.prometheus = {
enable = true;
enableAgentMode = true;
listenAddress = "127.0.0.1";
port = 9001;
globalConfig.scrape_interval = "15s";
scrapeConfigs = jobConfigs;
remoteWrite = [
{ url = "http://localhost:9009/api/v1/push"; }
];
};
services.mimir = {
enable = true;
extraFlags = ["--config.expand-env=true"];
configuration = {
multitenancy_enabled = false;
common.storage = {
backend = "s3";
s3 = {
endpoint = "s3.delroth.net";
bucket_name = "bagel-mimir";
secret_access_key = "\${S3_KEY}"; # This is a secret injected via an environment variable
access_key_id = "\${S3_KEY_ID}";
};
};
server = {
http_listen_port = 9009;
grpc_server_max_recv_msg_size = 104857600;
grpc_server_max_send_msg_size = 104857600;
grpc_server_max_concurrent_streams = 1000;
};
ingester.ring.replication_factor = 1;
blocks_storage.backend = "s3";
ruler_storage = {
backend = "local";
local.directory = ./alerts;
};
};
};
systemd.services.mimir.serviceConfig.EnvironmentFile = [ config.age.secrets.mimir-environment.path ];
};
}

View file

@ -1,53 +0,0 @@
{
config,
lib,
...
}:
let
cfg = config.bagel.monitoring.promtail;
inherit (lib) mkEnableOption mkIf;
in
{
options.bagel.monitoring.promtail.enable = (mkEnableOption "Promtail log export") // { default = true; };
config = mkIf cfg.enable {
age.secrets.promtail-password = {
file = ../../secrets/promtail-password.age;
owner = "promtail";
};
services.promtail = {
enable = true;
configuration = {
server.disable = true;
clients = [
{
url = "https://loki.forkos.org/loki/api/v1/push";
basic_auth = {
username = "promtail";
password_file = config.age.secrets.promtail-password.path;
};
}
];
scrape_configs = [
{
job_name = "system";
journal = {
max_age = "12h";
labels = {
job = "systemd-journal";
host = config.networking.hostName;
};
};
relabel_configs = [
{
source_labels = [ "__journal__systemd_unit" ];
target_label = "unit";
}
];
}
];
};
};
};
}

7
terraform/common.nix Normal file
View file

@ -0,0 +1,7 @@
{
# Until we get some kind of KMS operational, store secrets in the state file.
terraform.required_providers.secret = {
version = "~> 1.2.1";
source = "numtide/secret";
};
}

View file

@ -1,5 +1,7 @@
{ {
imports = [ imports = [
./common.nix
./gandi.nix
./hydra.nix ./hydra.nix
./state.nix ./state.nix
]; ];

67
terraform/gandi.nix Normal file
View file

@ -0,0 +1,67 @@
{ lib, config, ... }:
let
inherit (lib) mkEnableOption mkIf tf;
cfg = config.bagel.gandi;
in
{
options.bagel.gandi = {
enable = mkEnableOption "the Gandi DNS configuration";
};
config = mkIf cfg.enable {
terraform.required_providers.gandi = {
version = "~> 2.3.0";
source = "go-gandi/gandi";
};
resource.secret_resource.gandi_pat.lifecycle.prevent_destroy = true;
provider.gandi = {
personal_access_token = tf.ref "resource.secret_resource.gandi_pat.value";
};
resource.gandi_livedns_domain.forkos_org = {
name = "forkos.org";
};
resource.gandi_livedns_record = let
record = name: ttl: type: values: {
inherit name ttl type values;
};
# TODO: make less fragile and have actual unique and stable names
canonicalName = record: let
name = builtins.replaceStrings ["."] ["_"] record.name;
in
"forkos_org_${record.type}_${name}";
forkosRecords = records:
builtins.listToAttrs (map (record: {
name = canonicalName record;
value = record // {
zone = tf.ref "resource.gandi_livedns_domain.forkos_org.id";
};
}) records);
in forkosRecords [
(record "cl" 3600 "A" ["163.172.69.160"])
(record "cl" 3600 "AAAA" ["2001:bc8:38ee:100:1000::10"])
(record "fodwatch" 3600 "A" ["163.172.69.160"])
(record "fodwatch" 3600 "AAAA" ["2001:bc8:38ee:100:1000::30"])
(record "netbox" 3600 "A" ["163.172.69.160"])
(record "netbox" 3600 "AAAA" ["2001:bc8:38ee:100:1000::20"])
(record "bagel-box.infra" 3600 "AAAA" ["2001:bc8:38ee:100:100::1"])
(record "gerrit01.infra" 3600 "AAAA" ["2001:bc8:38ee:100:1000::10"])
(record "fodwatch.infra" 3600 "AAAA" ["2001:bc8:38ee:100:1000::30"])
(record "meta01.infra" 3600 "AAAA" ["2001:bc8:38ee:100:1000::20"])
(record "grafana" 3600 "CNAME" ["netbox"])
(record "hydra" 3600 "CNAME" ["bagel-box.infra"])
(record "loki" 3600 "CNAME" ["meta01.infra"])
(record "mimir" 3600 "CNAME" ["grafana"])
];
};
}

View file

@ -1,6 +1,6 @@
{ lib, config, ... }: { lib, config, ... }:
let let
inherit (lib) mkEnableOption mkIf types mkOption; inherit (lib) mkEnableOption mkIf types mkOption tf;
cfg = config.bagel.hydra; cfg = config.bagel.hydra;
in in
{ {
@ -14,9 +14,12 @@ in
source = "DeterminateSystems/hydra"; source = "DeterminateSystems/hydra";
}; };
resource.secret_resource.hydra_password.lifecycle.prevent_destroy = true;
provider.hydra = { provider.hydra = {
host = "https://hydra.bagel.delroth.net"; host = "https://hydra.forkos.org";
# username/password are provided via HYDRA_USERNAME/HYDRA_PASSWORD username = "terraform";
password = tf.ref "resource.secret_resource.hydra_password.value";
}; };
resource.hydra_project.forkos = { resource.hydra_project.forkos = {
@ -24,7 +27,7 @@ in
display_name = "ForkOS"; display_name = "ForkOS";
description = "ForkOS packages collection"; description = "ForkOS packages collection";
homepage = "https://cl.forkos.org"; homepage = "https://cl.forkos.org";
owner = "raito"; owner = "terraform";
enabled = true; enabled = true;
visible = true; visible = true;
}; };