Add blackbox exporter for outside-in DNS probes

control runs blackbox_exporter on loopback, probing each nameserver's
public v4+v6 address for every zone: SOA (zone served) and DNSKEY (still
signed, since blackbox has no DO-bit option). Probe definitions are
shared between the exporter config and the VictoriaMetrics scrape jobs
so they can't drift. Verified live against ns1/ns2 over v4 and v6.
This commit is contained in:
Berwn
2026-06-17 15:37:45 +07:00
parent 0544bf95e5
commit 54f607d063
6 changed files with 154 additions and 3 deletions
+11 -2
View File
@@ -37,10 +37,19 @@ deploy ns1 and control.
regenerable over time and control is the backup server, so this needs a
second client→server pair (e.g. control→ns2) rather than the same topology
## 3. Blackbox DNS probing
## 3. Blackbox DNS probing (done — pending deploy)
- [ ] `blackbox_exporter` on control doing real DNS + DNSSEC-validation queries
`blackbox_exporter` on control (loopback `:9115`), probing each nameserver's
public v4+v6 address for every zone: an SOA query (zone served?) and a DNSKEY
query (still signed?). Blackbox has no DO-bit option, so signing is checked by
asking for DNSKEY directly and asserting the RRset is present. Probe defs live
in `modules/monitoring/blackbox-probes.nix`, shared by the exporter
(`blackbox.nix`) and the VM scrape jobs (`server.nix`). Verified live against
ns1/ns2: SOA + DNSKEY succeed on both servers over v4 and v6.
- [x] `blackbox_exporter` on control doing real DNS + DNSSEC-validation queries
against ns1/ns2 — catches outside-in resolution failures the Knot stats miss
- [ ] still to pair (next): a `probe_success == 0` alert and a Grafana panel
## 4. Third secondary off Hetzner (resilience)
+5
View File
@@ -9,6 +9,11 @@ Metrics and dashboards live on `control`, reachable only over the ZeroTier mesh.
(`modules/monitoring/exporters.nix`).
- **knot-exporter** (`:9433`) on `ns1`/`ns2` only — reads Knot's control socket,
fed by the `mod-stats` module (query/response counters per zone).
- **blackbox_exporter** (`127.0.0.1:9115`) on `control` only — outside-in DNS
probes. For every zone it queries each nameserver's **public** address (v4 and
v6) for SOA (is the zone served?) and DNSKEY (is it still signed?). This is the
resolver's-eye view that the Knot stats can't see. Probe definitions are shared
between the exporter and the scrape jobs in `modules/monitoring/blackbox-probes.nix`.
## Storage & scraping
+1
View File
@@ -4,6 +4,7 @@
../../modules/static-ipv6.nix
../../modules/monitoring/exporters.nix
../../modules/monitoring/server.nix
../../modules/monitoring/blackbox.nix
../../modules/monitoring/alerts.nix
../../modules/docs.nix
];
+108
View File
@@ -0,0 +1,108 @@
# Blackbox DNS probe definitions, shared between the exporter module
# (modules/monitoring/blackbox.nix, which renders these into the blackbox
# config) and the scraper (modules/monitoring/server.nix, which turns them into
# VictoriaMetrics scrape jobs). Kept in one place so the module list and the
# scrape jobs can never drift apart.
#
# These query the nameservers' PUBLIC addresses, i.e. the path a real internet
# resolver takes, not the mesh — the whole point is to catch outside-in
# resolution failures the Knot stats can't see. For each zone we run two probes
# per endpoint: an SOA query (is the zone being served at all?) and a DNSKEY
# query (is it still DNSSEC-signed?). Blackbox has no DO-bit option, so we ask
# for DNSKEY directly — an authoritative signed zone returns it without EDNS0,
# and its absence means signing has broken.
{ lib }:
let
domains = import ../dns/domains.nix;
blackboxAddr = "127.0.0.1:9115";
# Public endpoints of the authoritative nameservers. The v4 addresses also
# appear in the `internet` instance in clan.nix; the v6 ones in each ns
# machine's cnx.staticIPv6. IPv6 literals are bracketed for host:port.
endpoints = [
{
instance = "ns1 v4";
target = "46.224.170.206:53";
}
{
instance = "ns1 v6";
target = "[2a01:4f8:c014:b5c5::1]:53";
}
{
instance = "ns2 v4";
target = "157.180.70.82:53";
}
{
instance = "ns2 v6";
target = "[2a01:4f9:c014:6d87::1]:53";
}
];
queries = [
{
name = "soa";
type = "SOA";
}
{
name = "dnskey";
type = "DNSKEY";
}
];
sanitize = lib.replaceStrings [ "." ] [ "_" ];
moduleName = zone: q: "dns_${q.name}_${sanitize zone}";
modules = lib.listToAttrs (
lib.concatMap (
zone:
map (
q:
lib.nameValuePair (moduleName zone q) {
prober = "dns";
timeout = "5s";
dns = {
query_name = "${zone}.";
query_type = q.type;
valid_rcodes = [ "NOERROR" ];
# Fail unless at least one answer RR of the queried type is present:
# a NOERROR with an empty answer (or a missing DNSKEY) still fails.
validate_answer_rrs.fail_if_not_matches_regexp = [ "\\s${q.type}\\s" ];
};
}
) queries
) domains
);
scrapeConfigs = lib.concatMap (
zone:
map (q: {
job_name = "blackbox_${moduleName zone q}";
metrics_path = "/probe";
params.module = [ (moduleName zone q) ];
static_configs = map (e: {
targets = [ e.target ];
labels = {
instance = e.instance;
zone = zone;
query = q.type;
};
}) endpoints;
# Hand the real DNS server to blackbox as ?target=, then point the scrape
# at the exporter itself.
relabel_configs = [
{
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
target_label = "__address__";
replacement = blackboxAddr;
}
];
}) queries
) domains;
in
{
inherit modules scrapeConfigs blackboxAddr;
}
+24
View File
@@ -0,0 +1,24 @@
# Blackbox exporter on control: outside-in DNS probes against the public
# nameserver addresses (see blackbox-probes.nix for what and why). Bound to
# loopback — only VictoriaMetrics on the same host scrapes its /probe endpoint,
# and the scrape jobs that drive it live in server.nix. The probes leave control
# over the public internet to reach ns1/ns2, which is the path we want to test.
{
lib,
pkgs,
...
}:
let
probes = import ./blackbox-probes.nix { inherit lib; };
in
{
services.prometheus.exporters.blackbox = {
enable = true;
listenAddress = "127.0.0.1";
port = 9115;
# JSON is valid YAML; enableConfigCheck runs the exporter's own --config.check
# against this file at build time, so a malformed prober is caught here.
configFile = pkgs.writeText "blackbox.yml" (builtins.toJSON { inherit (probes) modules; });
enableConfigCheck = true;
};
}
+5 -1
View File
@@ -10,6 +10,7 @@
}:
let
mesh = import ../mesh-hosts.nix { inherit config lib; };
probes = import ./blackbox-probes.nix { inherit lib; };
vmPort = 8428;
grafanaPort = 3000;
controlV6 = mesh.hosts.control;
@@ -52,7 +53,10 @@ in
(target "ns2" (v6 mesh.hosts.ns2) 9433)
];
}
];
]
# Outside-in DNS probes via the blackbox exporter (blackbox.nix). The job
# list is generated from the same probe definitions the exporter uses.
++ probes.scrapeConfigs;
};
};