Files
cnx-network-clan/modules/monitoring/dashboards/dns.json
T
Berwn 33ac7e106b Add VictoriaMetrics + Grafana DNS monitoring over the mesh
control runs VictoriaMetrics (loopback) and Grafana; every machine exports
node metrics and the nameservers export Knot stats (mod-stats + knot-exporter).
Scraping and the Grafana UI ride the ZeroTier mesh only, scoped by nftables to
the mesh /88; the public side stays closed by the Hetzner cloud firewall. The
provisioned DNS dashboard includes a per-zone SOA serial table to catch
primary/secondary drift. ZeroTier ULAs are centralised in mesh-hosts.nix.
2026-06-17 10:17:27 +07:00

172 lines
5.5 KiB
JSON

{
"uid": "cnx-dns",
"title": "CNX DNS",
"tags": ["dns", "knot", "cnx"],
"timezone": "browser",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": { "from": "now-6h", "to": "now" },
"templating": { "list": [] },
"annotations": { "list": [] },
"panels": [
{
"type": "row",
"title": "DNS / Zones",
"id": 1,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }
},
{
"type": "table",
"title": "Zone SOA serial (per nameserver)",
"description": "ns1 and ns2 should report the same serial per zone. A divergence here is the secondary-out-of-sync condition.",
"id": 2,
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 },
"options": { "showHeader": true },
"fieldConfig": {
"defaults": { "custom": { "align": "auto" } },
"overrides": []
},
"targets": [
{
"refId": "A",
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"expr": "knot_zone_serial",
"format": "table",
"instant": true,
"legendFormat": "{{zone}} @ {{instance}}"
}
]
},
{
"type": "timeseries",
"title": "Seconds until zone expiry",
"description": "On secondaries this counts down between successful transfers; a steady decline toward zero means transfers are failing.",
"id": 3,
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 },
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
"targets": [
{
"refId": "A",
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"expr": "knot_zone_status_expiration",
"legendFormat": "{{zone}} @ {{instance}}"
}
]
},
{
"type": "timeseries",
"title": "Query rate by nameserver",
"id": 4,
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 9 },
"fieldConfig": { "defaults": { "unit": "qps" }, "overrides": [] },
"targets": [
{
"refId": "A",
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"expr": "sum by (instance) (rate(knot_stats_request_protocol_total[5m]))",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "timeseries",
"title": "Response codes",
"id": 5,
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 9 },
"fieldConfig": { "defaults": { "unit": "qps" }, "overrides": [] },
"targets": [
{
"refId": "A",
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"expr": "sum by (type) (rate(knot_stats_response_code_total[5m]))",
"legendFormat": "{{type}}"
}
]
},
{
"type": "row",
"title": "Hosts",
"id": 6,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 17 }
},
{
"type": "timeseries",
"title": "CPU busy %",
"id": 7,
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 18 },
"fieldConfig": {
"defaults": { "unit": "percent", "min": 0, "max": 100 },
"overrides": []
},
"targets": [
{
"refId": "A",
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"expr": "100 - (avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "timeseries",
"title": "Memory used %",
"id": 8,
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 18 },
"fieldConfig": {
"defaults": { "unit": "percent", "min": 0, "max": 100 },
"overrides": []
},
"targets": [
{
"refId": "A",
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"expr": "100 * (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "timeseries",
"title": "Root filesystem used %",
"id": 9,
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 26 },
"fieldConfig": {
"defaults": { "unit": "percent", "min": 0, "max": 100 },
"overrides": []
},
"targets": [
{
"refId": "A",
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"expr": "100 * (1 - node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"})",
"legendFormat": "{{instance}}"
}
]
},
{
"type": "timeseries",
"title": "Load average (1m)",
"id": 10,
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 26 },
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
"targets": [
{
"refId": "A",
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
"expr": "node_load1",
"legendFormat": "{{instance}}"
}
]
}
]
}