Add VictoriaLogs for centralized journald across all hosts
control runs VictoriaLogs (:9428, 30d, mesh-scoped) with a matching Grafana datasource. Each host ships journald via systemd's own journald.upload to the /insert/journald endpoint -- no extra agent. control uploads over loopback so its logs survive a mesh outage; ns1 and ns2 push over the mesh.
This commit is contained in:
@@ -57,7 +57,13 @@ ns1/ns2: SOA + DNSKEY succeed on both servers over v4 and v6.
|
|||||||
- [ ] A secondary nameserver on a different provider/network so a single-provider
|
- [ ] A secondary nameserver on a different provider/network so a single-provider
|
||||||
outage doesn't take all authoritative DNS down (architectural — new machine)
|
outage doesn't take all authoritative DNS down (architectural — new machine)
|
||||||
|
|
||||||
## 5. Centralized logs
|
## 5. Centralized logs (done — pending deploy)
|
||||||
|
|
||||||
- [ ] VictoriaLogs on control to grep journald across all three hosts, pairing
|
VictoriaLogs on control (`:9428`, 30d retention, mesh-scoped) in
|
||||||
|
`modules/monitoring/server.nix`, plus a VictoriaLogs Grafana datasource. All
|
||||||
|
three hosts ship journald with systemd's own `services.journald.upload` to the
|
||||||
|
`/insert/journald` endpoint (`modules/monitoring/exporters.nix`) — no extra
|
||||||
|
agent. control uploads over loopback; ns1/ns2 over the mesh.
|
||||||
|
|
||||||
|
- [x] VictoriaLogs on control to grep journald across all three hosts, pairing
|
||||||
with the existing VictoriaMetrics setup
|
with the existing VictoriaMetrics setup
|
||||||
|
|||||||
+20
-1
@@ -42,6 +42,25 @@ Dashboards are provisioned from `modules/monitoring/dashboards/` (any JSON file
|
|||||||
there is picked up):
|
there is picked up):
|
||||||
|
|
||||||
- **CNX DNS** (`dns.json`) — firing alerts, per-nameserver SOA serials, zone
|
- **CNX DNS** (`dns.json`) — firing alerts, per-nameserver SOA serials, zone
|
||||||
expiry countdowns, query/response rates, and host CPU/memory/disk/load.
|
expiry countdowns, query/response rates, host CPU/memory/disk/load, and the
|
||||||
|
outside-in DNS probes.
|
||||||
- **CNX Backups** (`backups.json`) — borgbackup job health, time since the last
|
- **CNX Backups** (`backups.json`) — borgbackup job health, time since the last
|
||||||
run, and per-job state. See [Backups](./backups.md).
|
run, and per-job state. See [Backups](./backups.md).
|
||||||
|
|
||||||
|
## Logs
|
||||||
|
|
||||||
|
**VictoriaLogs** on `control` (`:9428`), 30-day retention
|
||||||
|
(`modules/monitoring/server.nix`). All three hosts ship journald to it via
|
||||||
|
systemd's own `services.journald.upload` → the `/insert/journald` endpoint
|
||||||
|
(`modules/monitoring/exporters.nix`); no extra agent. `control` uploads over
|
||||||
|
loopback so its logs survive a mesh outage, `ns1`/`ns2` push over the mesh, and
|
||||||
|
9428 is firewall-scoped to the mesh like everything else.
|
||||||
|
|
||||||
|
Query logs from Grafana via the provisioned **VictoriaLogs** datasource (Explore
|
||||||
|
view, LogsQL), or directly in the built-in UI at `http://[control]:9428/select/vmui`.
|
||||||
|
Logs are tagged with `_HOSTNAME` and `_SYSTEMD_UNIT`, so to follow one service
|
||||||
|
across hosts:
|
||||||
|
|
||||||
|
```
|
||||||
|
_SYSTEMD_UNIT:"knot.service"
|
||||||
|
```
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
# Metric exporters, imported by every machine. Host metrics everywhere; Knot DNS
|
# Per-host observability agents, imported by every machine. Host metrics
|
||||||
# metrics on the nameservers. Everything is reachable only over the ZeroTier mesh
|
# everywhere; Knot DNS metrics on the nameservers; journald shipped to
|
||||||
|
# VictoriaLogs on control. Everything is reachable only over the ZeroTier mesh
|
||||||
# (see the firewall rule at the bottom); the public side is already closed by the
|
# (see the firewall rule at the bottom); the public side is already closed by the
|
||||||
# Hetzner cloud firewall.
|
# Hetzner cloud firewall.
|
||||||
{
|
{
|
||||||
@@ -86,6 +87,22 @@ in
|
|||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Ship journald to VictoriaLogs on control (services.victorialogs in
|
||||||
|
# server.nix). control uploads to loopback so its own logs survive a mesh
|
||||||
|
# outage; ns1/ns2 push over the mesh to control's ZeroTier address.
|
||||||
|
services.journald.upload = {
|
||||||
|
enable = true;
|
||||||
|
settings.Upload.URL =
|
||||||
|
let
|
||||||
|
dest =
|
||||||
|
if config.networking.hostName == "control" then
|
||||||
|
"127.0.0.1:9428"
|
||||||
|
else
|
||||||
|
"[${mesh.hosts.control}]:9428";
|
||||||
|
in
|
||||||
|
"http://${dest}/insert/journald";
|
||||||
|
};
|
||||||
|
|
||||||
# Scrape ports reachable only from the ZeroTier mesh.
|
# Scrape ports reachable only from the ZeroTier mesh.
|
||||||
networking.firewall.extraInputRules = ''
|
networking.firewall.extraInputRules = ''
|
||||||
ip6 saddr ${mesh.subnet} tcp dport { ${lib.concatMapStringsSep ", " toString ports} } accept
|
ip6 saddr ${mesh.subnet} tcp dport { ${lib.concatMapStringsSep ", " toString ports} } accept
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ let
|
|||||||
mesh = import ../mesh-hosts.nix { inherit config lib; };
|
mesh = import ../mesh-hosts.nix { inherit config lib; };
|
||||||
probes = import ./blackbox-probes.nix { inherit lib; };
|
probes = import ./blackbox-probes.nix { inherit lib; };
|
||||||
vmPort = 8428;
|
vmPort = 8428;
|
||||||
|
logsPort = 9428;
|
||||||
grafanaPort = 3000;
|
grafanaPort = 3000;
|
||||||
controlV6 = mesh.hosts.control;
|
controlV6 = mesh.hosts.control;
|
||||||
|
|
||||||
@@ -60,6 +61,17 @@ in
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Centralized logs: VictoriaLogs ingests journald from all three hosts, each
|
||||||
|
# of which runs systemd-journal-upload against /insert/journald (exporters.nix).
|
||||||
|
# Binds all interfaces because ns1/ns2 push over the mesh; the firewall rule at
|
||||||
|
# the bottom scopes 9428 to the mesh subnet and the Hetzner firewall closes the
|
||||||
|
# public side. Retention is set via extraOptions (no dedicated NixOS option).
|
||||||
|
services.victorialogs = {
|
||||||
|
enable = true;
|
||||||
|
listenAddress = ":${toString logsPort}";
|
||||||
|
extraOptions = [ "-retentionPeriod=30d" ];
|
||||||
|
};
|
||||||
|
|
||||||
# Admin password generated once and stored as a clan secret. Retrieve with:
|
# Admin password generated once and stored as a clan secret. Retrieve with:
|
||||||
# clan vars get control grafana-admin/password
|
# clan vars get control grafana-admin/password
|
||||||
clan.core.vars.generators.grafana-admin = {
|
clan.core.vars.generators.grafana-admin = {
|
||||||
@@ -76,6 +88,9 @@ in
|
|||||||
|
|
||||||
services.grafana = {
|
services.grafana = {
|
||||||
enable = true;
|
enable = true;
|
||||||
|
# VictoriaLogs datasource plugin so journald is greppable from Grafana,
|
||||||
|
# alongside the metrics datasource.
|
||||||
|
declarativePlugins = [ pkgs.grafanaPlugins.victoriametrics-logs-datasource ];
|
||||||
settings = {
|
settings = {
|
||||||
server = {
|
server = {
|
||||||
http_addr = "::";
|
http_addr = "::";
|
||||||
@@ -102,6 +117,13 @@ in
|
|||||||
url = "http://127.0.0.1:${toString vmPort}";
|
url = "http://127.0.0.1:${toString vmPort}";
|
||||||
isDefault = true;
|
isDefault = true;
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
name = "VictoriaLogs";
|
||||||
|
type = "victoriametrics-logs-datasource";
|
||||||
|
uid = "victorialogs";
|
||||||
|
access = "proxy";
|
||||||
|
url = "http://127.0.0.1:${toString logsPort}";
|
||||||
|
}
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
dashboards.settings = {
|
dashboards.settings = {
|
||||||
@@ -117,8 +139,9 @@ in
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
# Grafana reachable only from the ZeroTier mesh (admin laptops + servers).
|
# Grafana (admin laptops + servers) and VictoriaLogs ingestion (ns1/ns2 push
|
||||||
|
# journald over the mesh) reachable only from the ZeroTier mesh.
|
||||||
networking.firewall.extraInputRules = ''
|
networking.firewall.extraInputRules = ''
|
||||||
ip6 saddr ${mesh.subnet} tcp dport ${toString grafanaPort} accept
|
ip6 saddr ${mesh.subnet} tcp dport { ${toString grafanaPort}, ${toString logsPort} } accept
|
||||||
'';
|
'';
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user