d4a171640b
control runs VictoriaLogs (:9428, 30d, mesh-scoped) with a matching Grafana datasource. Each host ships journald via systemd's own journald.upload to the /insert/journald endpoint -- no extra agent. control uploads over loopback so its logs survive a mesh outage; ns1 and ns2 push over the mesh.
148 lines
4.8 KiB
Nix
148 lines
4.8 KiB
Nix
# Monitoring server, imported by control only: VictoriaMetrics (TSDB + scraper)
|
|
# and Grafana. VictoriaMetrics binds loopback (only Grafana, on the same host,
|
|
# reads it). Grafana is reachable over the ZeroTier mesh, scoped by the firewall
|
|
# rule at the bottom; the Hetzner cloud firewall keeps it off the public net.
|
|
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
...
|
|
}:
|
|
let
|
|
mesh = import ../mesh-hosts.nix { inherit config lib; };
|
|
probes = import ./blackbox-probes.nix { inherit lib; };
|
|
vmPort = 8428;
|
|
logsPort = 9428;
|
|
grafanaPort = 3000;
|
|
controlV6 = mesh.hosts.control;
|
|
|
|
# A single scrape target with a friendly instance label. IPv6 mesh addresses
|
|
# must be bracketed for Prometheus-style targets.
|
|
target = name: addr: port: {
|
|
targets = [ "${addr}:${toString port}" ];
|
|
labels.instance = name;
|
|
};
|
|
v6 = addr: "[${addr}]";
|
|
|
|
adminPasswordFile = config.clan.core.vars.generators.grafana-admin.files."password".path;
|
|
in
|
|
{
|
|
services.victoriametrics = {
|
|
enable = true;
|
|
listenAddress = "127.0.0.1:${toString vmPort}";
|
|
retentionPeriod = "180d";
|
|
# The scraper dials IPv4-only by default; our ns1/ns2 targets are mesh ULAs,
|
|
# so without this VM drops them with "no suitable address found (try -enableTCP6)".
|
|
extraOptions = [ "-enableTCP6" ];
|
|
prometheusConfig = {
|
|
global.scrape_interval = "30s";
|
|
scrape_configs = [
|
|
{
|
|
job_name = "node";
|
|
static_configs = [
|
|
# control scrapes its own node_exporter over loopback so host metrics
|
|
# survive even if the mesh is down; ns1/ns2 are scraped over the mesh.
|
|
(target "control" "127.0.0.1" 9100)
|
|
(target "ns1" (v6 mesh.hosts.ns1) 9100)
|
|
(target "ns2" (v6 mesh.hosts.ns2) 9100)
|
|
];
|
|
}
|
|
{
|
|
job_name = "knot";
|
|
static_configs = [
|
|
(target "ns1" (v6 mesh.hosts.ns1) 9433)
|
|
(target "ns2" (v6 mesh.hosts.ns2) 9433)
|
|
];
|
|
}
|
|
]
|
|
# Outside-in DNS probes via the blackbox exporter (blackbox.nix). The job
|
|
# list is generated from the same probe definitions the exporter uses.
|
|
++ probes.scrapeConfigs;
|
|
};
|
|
};
|
|
|
|
# Centralized logs: VictoriaLogs ingests journald from all three hosts, each
|
|
# of which runs systemd-journal-upload against /insert/journald (exporters.nix).
|
|
# Binds all interfaces because ns1/ns2 push over the mesh; the firewall rule at
|
|
# the bottom scopes 9428 to the mesh subnet and the Hetzner firewall closes the
|
|
# public side. Retention is set via extraOptions (no dedicated NixOS option).
|
|
services.victorialogs = {
|
|
enable = true;
|
|
listenAddress = ":${toString logsPort}";
|
|
extraOptions = [ "-retentionPeriod=30d" ];
|
|
};
|
|
|
|
# Admin password generated once and stored as a clan secret. Retrieve with:
|
|
# clan vars get control grafana-admin/password
|
|
clan.core.vars.generators.grafana-admin = {
|
|
files."password" = {
|
|
secret = true;
|
|
owner = "grafana";
|
|
group = "grafana";
|
|
};
|
|
runtimeInputs = [ pkgs.openssl ];
|
|
script = ''
|
|
openssl rand -base64 24 | tr -d "\n" > "$out"/password
|
|
'';
|
|
};
|
|
|
|
services.grafana = {
|
|
enable = true;
|
|
# VictoriaLogs datasource plugin so journald is greppable from Grafana,
|
|
# alongside the metrics datasource.
|
|
declarativePlugins = [ pkgs.grafanaPlugins.victoriametrics-logs-datasource ];
|
|
settings = {
|
|
server = {
|
|
http_addr = "::";
|
|
http_port = grafanaPort;
|
|
root_url = "http://${v6 controlV6}:${toString grafanaPort}/";
|
|
};
|
|
security = {
|
|
admin_user = "admin";
|
|
admin_password = "$__file{${adminPasswordFile}}";
|
|
};
|
|
"auth.anonymous".enabled = false;
|
|
users.allow_sign_up = false;
|
|
};
|
|
provision = {
|
|
enable = true;
|
|
datasources.settings = {
|
|
apiVersion = 1;
|
|
datasources = [
|
|
{
|
|
name = "VictoriaMetrics";
|
|
type = "prometheus";
|
|
uid = "victoriametrics";
|
|
access = "proxy";
|
|
url = "http://127.0.0.1:${toString vmPort}";
|
|
isDefault = true;
|
|
}
|
|
{
|
|
name = "VictoriaLogs";
|
|
type = "victoriametrics-logs-datasource";
|
|
uid = "victorialogs";
|
|
access = "proxy";
|
|
url = "http://127.0.0.1:${toString logsPort}";
|
|
}
|
|
];
|
|
};
|
|
dashboards.settings = {
|
|
apiVersion = 1;
|
|
providers = [
|
|
{
|
|
name = "cnx";
|
|
options.path = ./dashboards;
|
|
options.foldersFromFilesStructure = false;
|
|
}
|
|
];
|
|
};
|
|
};
|
|
};
|
|
|
|
# Grafana (admin laptops + servers) and VictoriaLogs ingestion (ns1/ns2 push
|
|
# journald over the mesh) reachable only from the ZeroTier mesh.
|
|
networking.firewall.extraInputRules = ''
|
|
ip6 saddr ${mesh.subnet} tcp dport { ${toString grafanaPort}, ${toString logsPort} } accept
|
|
'';
|
|
}
|