Add VictoriaLogs for centralized journald across all hosts
control runs VictoriaLogs (:9428, 30d, mesh-scoped) with a matching Grafana datasource. Each host ships journald via systemd's own journald.upload to the /insert/journald endpoint -- no extra agent. control uploads over loopback so its logs survive a mesh outage; ns1 and ns2 push over the mesh.
This commit is contained in:
@@ -57,7 +57,13 @@ ns1/ns2: SOA + DNSKEY succeed on both servers over v4 and v6.
|
||||
- [ ] A secondary nameserver on a different provider/network so a single-provider
|
||||
outage doesn't take all authoritative DNS down (architectural — new machine)
|
||||
|
||||
## 5. Centralized logs
|
||||
## 5. Centralized logs (done — pending deploy)
|
||||
|
||||
- [ ] VictoriaLogs on control to grep journald across all three hosts, pairing
|
||||
VictoriaLogs on control (`:9428`, 30d retention, mesh-scoped) in
|
||||
`modules/monitoring/server.nix`, plus a VictoriaLogs Grafana datasource. All
|
||||
three hosts ship journald with systemd's own `services.journald.upload` to the
|
||||
`/insert/journald` endpoint (`modules/monitoring/exporters.nix`) — no extra
|
||||
agent. control uploads over loopback; ns1/ns2 over the mesh.
|
||||
|
||||
- [x] VictoriaLogs on control to grep journald across all three hosts, pairing
|
||||
with the existing VictoriaMetrics setup
|
||||
|
||||
+20
-1
@@ -42,6 +42,25 @@ Dashboards are provisioned from `modules/monitoring/dashboards/` (any JSON file
|
||||
there is picked up):
|
||||
|
||||
- **CNX DNS** (`dns.json`) — firing alerts, per-nameserver SOA serials, zone
|
||||
expiry countdowns, query/response rates, and host CPU/memory/disk/load.
|
||||
expiry countdowns, query/response rates, host CPU/memory/disk/load, and the
|
||||
outside-in DNS probes.
|
||||
- **CNX Backups** (`backups.json`) — borgbackup job health, time since the last
|
||||
run, and per-job state. See [Backups](./backups.md).
|
||||
|
||||
## Logs
|
||||
|
||||
**VictoriaLogs** on `control` (`:9428`), 30-day retention
|
||||
(`modules/monitoring/server.nix`). All three hosts ship journald to it via
|
||||
systemd's own `services.journald.upload` → the `/insert/journald` endpoint
|
||||
(`modules/monitoring/exporters.nix`); no extra agent. `control` uploads over
|
||||
loopback so its logs survive a mesh outage, `ns1`/`ns2` push over the mesh, and
|
||||
9428 is firewall-scoped to the mesh like everything else.
|
||||
|
||||
Query logs from Grafana via the provisioned **VictoriaLogs** datasource (Explore
|
||||
view, LogsQL), or directly in the built-in UI at `http://[control]:9428/select/vmui`.
|
||||
Logs are tagged with `_HOSTNAME` and `_SYSTEMD_UNIT`, so to follow one service
|
||||
across hosts:
|
||||
|
||||
```
|
||||
_SYSTEMD_UNIT:"knot.service"
|
||||
```
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# Metric exporters, imported by every machine. Host metrics everywhere; Knot DNS
|
||||
# metrics on the nameservers. Everything is reachable only over the ZeroTier mesh
|
||||
# Per-host observability agents, imported by every machine. Host metrics
|
||||
# everywhere; Knot DNS metrics on the nameservers; journald shipped to
|
||||
# VictoriaLogs on control. Everything is reachable only over the ZeroTier mesh
|
||||
# (see the firewall rule at the bottom); the public side is already closed by the
|
||||
# Hetzner cloud firewall.
|
||||
{
|
||||
@@ -86,6 +87,22 @@ in
|
||||
];
|
||||
};
|
||||
|
||||
# Ship journald to VictoriaLogs on control (services.victorialogs in
|
||||
# server.nix). control uploads to loopback so its own logs survive a mesh
|
||||
# outage; ns1/ns2 push over the mesh to control's ZeroTier address.
|
||||
services.journald.upload = {
|
||||
enable = true;
|
||||
settings.Upload.URL =
|
||||
let
|
||||
dest =
|
||||
if config.networking.hostName == "control" then
|
||||
"127.0.0.1:9428"
|
||||
else
|
||||
"[${mesh.hosts.control}]:9428";
|
||||
in
|
||||
"http://${dest}/insert/journald";
|
||||
};
|
||||
|
||||
# Scrape ports reachable only from the ZeroTier mesh.
|
||||
networking.firewall.extraInputRules = ''
|
||||
ip6 saddr ${mesh.subnet} tcp dport { ${lib.concatMapStringsSep ", " toString ports} } accept
|
||||
|
||||
@@ -12,6 +12,7 @@ let
|
||||
mesh = import ../mesh-hosts.nix { inherit config lib; };
|
||||
probes = import ./blackbox-probes.nix { inherit lib; };
|
||||
vmPort = 8428;
|
||||
logsPort = 9428;
|
||||
grafanaPort = 3000;
|
||||
controlV6 = mesh.hosts.control;
|
||||
|
||||
@@ -60,6 +61,17 @@ in
|
||||
};
|
||||
};
|
||||
|
||||
# Centralized logs: VictoriaLogs ingests journald from all three hosts, each
|
||||
# of which runs systemd-journal-upload against /insert/journald (exporters.nix).
|
||||
# Binds all interfaces because ns1/ns2 push over the mesh; the firewall rule at
|
||||
# the bottom scopes 9428 to the mesh subnet and the Hetzner firewall closes the
|
||||
# public side. Retention is set via extraOptions (no dedicated NixOS option).
|
||||
services.victorialogs = {
|
||||
enable = true;
|
||||
listenAddress = ":${toString logsPort}";
|
||||
extraOptions = [ "-retentionPeriod=30d" ];
|
||||
};
|
||||
|
||||
# Admin password generated once and stored as a clan secret. Retrieve with:
|
||||
# clan vars get control grafana-admin/password
|
||||
clan.core.vars.generators.grafana-admin = {
|
||||
@@ -76,6 +88,9 @@ in
|
||||
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
# VictoriaLogs datasource plugin so journald is greppable from Grafana,
|
||||
# alongside the metrics datasource.
|
||||
declarativePlugins = [ pkgs.grafanaPlugins.victoriametrics-logs-datasource ];
|
||||
settings = {
|
||||
server = {
|
||||
http_addr = "::";
|
||||
@@ -102,6 +117,13 @@ in
|
||||
url = "http://127.0.0.1:${toString vmPort}";
|
||||
isDefault = true;
|
||||
}
|
||||
{
|
||||
name = "VictoriaLogs";
|
||||
type = "victoriametrics-logs-datasource";
|
||||
uid = "victorialogs";
|
||||
access = "proxy";
|
||||
url = "http://127.0.0.1:${toString logsPort}";
|
||||
}
|
||||
];
|
||||
};
|
||||
dashboards.settings = {
|
||||
@@ -117,8 +139,9 @@ in
|
||||
};
|
||||
};
|
||||
|
||||
# Grafana reachable only from the ZeroTier mesh (admin laptops + servers).
|
||||
# Grafana (admin laptops + servers) and VictoriaLogs ingestion (ns1/ns2 push
|
||||
# journald over the mesh) reachable only from the ZeroTier mesh.
|
||||
networking.firewall.extraInputRules = ''
|
||||
ip6 saddr ${mesh.subnet} tcp dport ${toString grafanaPort} accept
|
||||
ip6 saddr ${mesh.subnet} tcp dport { ${toString grafanaPort}, ${toString logsPort} } accept
|
||||
'';
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user