Alert on and chart blackbox DNS probe failures

DNSResolutionProbeFailed and DNSSECProbeFailed fire when an SOA or
DNSKEY probe to a public nameserver address stays down for 5m. The CNX
DNS dashboard gains a "DNS probes (outside-in)" row: per-zone/server
status table, probe success, and probe latency.
This commit is contained in:
Berwn
2026-06-17 15:42:13 +07:00
parent 54f607d063
commit c7b0f206c8
3 changed files with 128 additions and 2 deletions
+23
View File
@@ -80,6 +80,29 @@ in
}
];
}
{
# Outside-in DNS probes (blackbox on control). The `for` rides out a
# single dropped UDP packet; only a sustained failure fires.
name = "dns_probe";
rules = [
{
alert = "DNSResolutionProbeFailed";
expr = ''probe_success{query="SOA"} == 0'';
for = "5m";
labels.severity = "critical";
annotations.summary = "{{ $labels.zone }} is not resolving from {{ $labels.instance }}";
annotations.description = "The blackbox SOA probe to this public nameserver address is failing; from the outside the zone looks unavailable there, which the Knot stats would not show.";
}
{
alert = "DNSSECProbeFailed";
expr = ''probe_success{query="DNSKEY"} == 0'';
for = "5m";
labels.severity = "critical";
annotations.summary = "{{ $labels.zone }} DNSKEY missing from {{ $labels.instance }}";
annotations.description = "The DNSKEY probe to this public nameserver address is failing: the zone's signing keys are not being served, so validating resolvers will treat answers as bogus.";
}
];
}
];
};
}