# Alerting rules, evaluated by vmalert against VictoriaMetrics on control. # Everything is declared here in git. vmalert remote-writes alert state back to # VM, so firing alerts surface as the `ALERTS{alertstate="firing"}` series and # can be viewed in Grafana. No notifier is wired yet: notifier.blackhole makes # that explicit (vmalert evaluates rules but sends nowhere). To deliver alerts # later, drop blackhole and set settings."notifier.url" to an Alertmanager. { ... }: let vmUrl = "http://127.0.0.1:8428"; in { services.vmalert.instances.cnx = { enable = true; settings = { "datasource.url" = vmUrl; "remoteWrite.url" = vmUrl; # persists ALERTS / ALERTS_FOR_STATE back to VM "notifier.blackhole" = true; "httpListenAddr" = "127.0.0.1:8880"; # vmalert UI/API, loopback only (like VM) }; rules.groups = [ { name = "dns"; rules = [ { alert = "DNSSecondaryOutOfSync"; expr = "max by (zone) (knot_zone_serial) - min by (zone) (knot_zone_serial) > 0"; for = "15m"; labels.severity = "warning"; annotations.summary = "Zone {{ $labels.zone }} SOA serial differs between nameservers"; annotations.description = "The secondary is out of sync with the primary for {{ $labels.zone }}. `knotc zone-retransfer {{ $labels.zone }}` on ns2 forces a fresh pull."; } { alert = "ZoneExpiryLow"; expr = "knot_zone_status_expiration < 3600"; for = "5m"; labels.severity = "critical"; annotations.summary = "Zone {{ $labels.zone }} on {{ $labels.instance }} is within 1h of expiry"; annotations.description = "Transfers to the secondary appear to be failing; the zone stops being served when the SOA expire timer hits zero."; } ]; } { name = "host"; rules = [ { alert = "ScrapeTargetDown"; expr = "up == 0"; for = "5m"; labels.severity = "critical"; annotations.summary = "{{ $labels.job }} exporter on {{ $labels.instance }} is down"; annotations.description = "VictoriaMetrics cannot scrape this target; its metrics are missing."; } { alert = "RootFilesystemFull"; expr = ''100 * (1 - node_filesystem_avail_bytes{mountpoint="/",fstype!="tmpfs"} / node_filesystem_size_bytes{mountpoint="/",fstype!="tmpfs"}) > 90''; for = "15m"; labels.severity = "warning"; annotations.summary = "Root filesystem on {{ $labels.instance }} is over 90% full"; } ]; } ]; }; }