Add blackbox exporter for outside-in DNS probes
control runs blackbox_exporter on loopback, probing each nameserver's public v4+v6 address for every zone: SOA (zone served) and DNSKEY (still signed, since blackbox has no DO-bit option). Probe definitions are shared between the exporter config and the VictoriaMetrics scrape jobs so they can't drift. Verified live against ns1/ns2 over v4 and v6.
This commit is contained in:
@@ -37,10 +37,19 @@ deploy ns1 and control.
|
||||
regenerable over time and control is the backup server, so this needs a
|
||||
second client→server pair (e.g. control→ns2) rather than the same topology
|
||||
|
||||
## 3. Blackbox DNS probing
|
||||
## 3. Blackbox DNS probing (done — pending deploy)
|
||||
|
||||
- [ ] `blackbox_exporter` on control doing real DNS + DNSSEC-validation queries
|
||||
`blackbox_exporter` on control (loopback `:9115`), probing each nameserver's
|
||||
public v4+v6 address for every zone: an SOA query (zone served?) and a DNSKEY
|
||||
query (still signed?). Blackbox has no DO-bit option, so signing is checked by
|
||||
asking for DNSKEY directly and asserting the RRset is present. Probe defs live
|
||||
in `modules/monitoring/blackbox-probes.nix`, shared by the exporter
|
||||
(`blackbox.nix`) and the VM scrape jobs (`server.nix`). Verified live against
|
||||
ns1/ns2: SOA + DNSKEY succeed on both servers over v4 and v6.
|
||||
|
||||
- [x] `blackbox_exporter` on control doing real DNS + DNSSEC-validation queries
|
||||
against ns1/ns2 — catches outside-in resolution failures the Knot stats miss
|
||||
- [ ] still to pair (next): a `probe_success == 0` alert and a Grafana panel
|
||||
|
||||
## 4. Third secondary off Hetzner (resilience)
|
||||
|
||||
|
||||
@@ -9,6 +9,11 @@ Metrics and dashboards live on `control`, reachable only over the ZeroTier mesh.
|
||||
(`modules/monitoring/exporters.nix`).
|
||||
- **knot-exporter** (`:9433`) on `ns1`/`ns2` only — reads Knot's control socket,
|
||||
fed by the `mod-stats` module (query/response counters per zone).
|
||||
- **blackbox_exporter** (`127.0.0.1:9115`) on `control` only — outside-in DNS
|
||||
probes. For every zone it queries each nameserver's **public** address (v4 and
|
||||
v6) for SOA (is the zone served?) and DNSKEY (is it still signed?). This is the
|
||||
resolver's-eye view that the Knot stats can't see. Probe definitions are shared
|
||||
between the exporter and the scrape jobs in `modules/monitoring/blackbox-probes.nix`.
|
||||
|
||||
## Storage & scraping
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
../../modules/static-ipv6.nix
|
||||
../../modules/monitoring/exporters.nix
|
||||
../../modules/monitoring/server.nix
|
||||
../../modules/monitoring/blackbox.nix
|
||||
../../modules/monitoring/alerts.nix
|
||||
../../modules/docs.nix
|
||||
];
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
# Blackbox DNS probe definitions, shared between the exporter module
|
||||
# (modules/monitoring/blackbox.nix, which renders these into the blackbox
|
||||
# config) and the scraper (modules/monitoring/server.nix, which turns them into
|
||||
# VictoriaMetrics scrape jobs). Kept in one place so the module list and the
|
||||
# scrape jobs can never drift apart.
|
||||
#
|
||||
# These query the nameservers' PUBLIC addresses, i.e. the path a real internet
|
||||
# resolver takes, not the mesh — the whole point is to catch outside-in
|
||||
# resolution failures the Knot stats can't see. For each zone we run two probes
|
||||
# per endpoint: an SOA query (is the zone being served at all?) and a DNSKEY
|
||||
# query (is it still DNSSEC-signed?). Blackbox has no DO-bit option, so we ask
|
||||
# for DNSKEY directly — an authoritative signed zone returns it without EDNS0,
|
||||
# and its absence means signing has broken.
|
||||
{ lib }:
|
||||
let
|
||||
domains = import ../dns/domains.nix;
|
||||
|
||||
blackboxAddr = "127.0.0.1:9115";
|
||||
|
||||
# Public endpoints of the authoritative nameservers. The v4 addresses also
|
||||
# appear in the `internet` instance in clan.nix; the v6 ones in each ns
|
||||
# machine's cnx.staticIPv6. IPv6 literals are bracketed for host:port.
|
||||
endpoints = [
|
||||
{
|
||||
instance = "ns1 v4";
|
||||
target = "46.224.170.206:53";
|
||||
}
|
||||
{
|
||||
instance = "ns1 v6";
|
||||
target = "[2a01:4f8:c014:b5c5::1]:53";
|
||||
}
|
||||
{
|
||||
instance = "ns2 v4";
|
||||
target = "157.180.70.82:53";
|
||||
}
|
||||
{
|
||||
instance = "ns2 v6";
|
||||
target = "[2a01:4f9:c014:6d87::1]:53";
|
||||
}
|
||||
];
|
||||
|
||||
queries = [
|
||||
{
|
||||
name = "soa";
|
||||
type = "SOA";
|
||||
}
|
||||
{
|
||||
name = "dnskey";
|
||||
type = "DNSKEY";
|
||||
}
|
||||
];
|
||||
|
||||
sanitize = lib.replaceStrings [ "." ] [ "_" ];
|
||||
moduleName = zone: q: "dns_${q.name}_${sanitize zone}";
|
||||
|
||||
modules = lib.listToAttrs (
|
||||
lib.concatMap (
|
||||
zone:
|
||||
map (
|
||||
q:
|
||||
lib.nameValuePair (moduleName zone q) {
|
||||
prober = "dns";
|
||||
timeout = "5s";
|
||||
dns = {
|
||||
query_name = "${zone}.";
|
||||
query_type = q.type;
|
||||
valid_rcodes = [ "NOERROR" ];
|
||||
# Fail unless at least one answer RR of the queried type is present:
|
||||
# a NOERROR with an empty answer (or a missing DNSKEY) still fails.
|
||||
validate_answer_rrs.fail_if_not_matches_regexp = [ "\\s${q.type}\\s" ];
|
||||
};
|
||||
}
|
||||
) queries
|
||||
) domains
|
||||
);
|
||||
|
||||
scrapeConfigs = lib.concatMap (
|
||||
zone:
|
||||
map (q: {
|
||||
job_name = "blackbox_${moduleName zone q}";
|
||||
metrics_path = "/probe";
|
||||
params.module = [ (moduleName zone q) ];
|
||||
static_configs = map (e: {
|
||||
targets = [ e.target ];
|
||||
labels = {
|
||||
instance = e.instance;
|
||||
zone = zone;
|
||||
query = q.type;
|
||||
};
|
||||
}) endpoints;
|
||||
# Hand the real DNS server to blackbox as ?target=, then point the scrape
|
||||
# at the exporter itself.
|
||||
relabel_configs = [
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
target_label = "__param_target";
|
||||
}
|
||||
{
|
||||
target_label = "__address__";
|
||||
replacement = blackboxAddr;
|
||||
}
|
||||
];
|
||||
}) queries
|
||||
) domains;
|
||||
in
|
||||
{
|
||||
inherit modules scrapeConfigs blackboxAddr;
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
# Blackbox exporter on control: outside-in DNS probes against the public
|
||||
# nameserver addresses (see blackbox-probes.nix for what and why). Bound to
|
||||
# loopback — only VictoriaMetrics on the same host scrapes its /probe endpoint,
|
||||
# and the scrape jobs that drive it live in server.nix. The probes leave control
|
||||
# over the public internet to reach ns1/ns2, which is the path we want to test.
|
||||
{
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
probes = import ./blackbox-probes.nix { inherit lib; };
|
||||
in
|
||||
{
|
||||
services.prometheus.exporters.blackbox = {
|
||||
enable = true;
|
||||
listenAddress = "127.0.0.1";
|
||||
port = 9115;
|
||||
# JSON is valid YAML; enableConfigCheck runs the exporter's own --config.check
|
||||
# against this file at build time, so a malformed prober is caught here.
|
||||
configFile = pkgs.writeText "blackbox.yml" (builtins.toJSON { inherit (probes) modules; });
|
||||
enableConfigCheck = true;
|
||||
};
|
||||
}
|
||||
@@ -10,6 +10,7 @@
|
||||
}:
|
||||
let
|
||||
mesh = import ../mesh-hosts.nix { inherit config lib; };
|
||||
probes = import ./blackbox-probes.nix { inherit lib; };
|
||||
vmPort = 8428;
|
||||
grafanaPort = 3000;
|
||||
controlV6 = mesh.hosts.control;
|
||||
@@ -52,7 +53,10 @@ in
|
||||
(target "ns2" (v6 mesh.hosts.ns2) 9433)
|
||||
];
|
||||
}
|
||||
];
|
||||
]
|
||||
# Outside-in DNS probes via the blackbox exporter (blackbox.nix). The job
|
||||
# list is generated from the same probe definitions the exporter uses.
|
||||
++ probes.scrapeConfigs;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user