feat(alerts): add a sync failed too often alert
Signed-off-by: Raito Bezarius <masterancpp@gmail.com>
This commit is contained in:
parent
e2f5a7b0e4
commit
84efd0976d
11
services/monitoring/lgtm/alerts/forkos.yaml
Normal file
11
services/monitoring/lgtm/alerts/forkos.yaml
Normal file
|
@ -0,0 +1,11 @@
|
|||
groups:
|
||||
- name: ForkOS automation
|
||||
rules:
|
||||
- alert: SyncFailedTooOften
|
||||
expr: 'changes(node_systemd_unit_state{name=~"ows.*.service",state="failed"}[24h]) > 2'
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Synchronization job {{ $labels.name }} has failed more than twice in the last 24 hours"
|
||||
description: "On {{ $labels.instance }}, the synchronization job has failed more than twice in the last 24 hours, check if there's a conflict or a stdenv change."
|
Loading…
Reference in a new issue