{ "uid": "cnx-backups", "title": "CNX Backups", "tags": ["backup", "borg", "cnx"], "timezone": "browser", "schemaVersion": 39, "version": 1, "refresh": "1m", "time": { "from": "now-7d", "to": "now" }, "templating": { "list": [] }, "annotations": { "list": [] }, "panels": [ { "type": "row", "title": "Backups", "id": 1, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 } }, { "type": "stat", "title": "Backup health", "description": "1 if any borgbackup job is in the failed state, 0 otherwise. A successful run leaves the oneshot unit inactive (still OK); only a real failure shows FAILED. Derived from the node_exporter systemd collector on the backup client (ns1).", "id": 2, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "gridPos": { "h": 5, "w": 8, "x": 0, "y": 1 }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "noValue": "no data", "mappings": [ { "type": "value", "options": { "0": { "text": "OK", "color": "green", "index": 0 }, "1": { "text": "FAILED", "color": "red", "index": 1 } } } ] }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "background", "graphMode": "none", "textMode": "auto", "orientation": "auto" }, "targets": [ { "refId": "A", "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "expr": "max(node_systemd_unit_state{name=~\"borgbackup-job-.+\\\\.service\",state=\"failed\"})", "instant": true } ] }, { "type": "stat", "title": "Last backup run", "description": "When the most recent backup timer last fired (the daily borgbackup job). 'No data' before the first run.", "id": 3, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "gridPos": { "h": 5, "w": 8, "x": 8, "y": 1 }, "fieldConfig": { "defaults": { "unit": "dateTimeFromNow", "color": { "mode": "fixed", "fixedColor": "text" }, "noValue": "never" }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "none", "graphMode": "none", "textMode": "auto", "orientation": "auto" }, "targets": [ { "refId": "A", "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "expr": "max(node_systemd_timer_last_trigger_seconds{name=~\"borgbackup-job-.+\\\\.timer\"}) * 1000", "instant": true } ] }, { "type": "stat", "title": "Time since last backup", "description": "Age of the most recent backup. Backups run daily, so anything past ~25h means a run was missed. Red over 25h.", "id": 4, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "gridPos": { "h": 5, "w": 8, "x": 16, "y": 1 }, "fieldConfig": { "defaults": { "unit": "s", "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 90000 } ] }, "noValue": "never" }, "overrides": [] }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "colorMode": "background", "graphMode": "none", "textMode": "auto", "orientation": "auto" }, "targets": [ { "refId": "A", "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "expr": "time() - max(node_systemd_timer_last_trigger_seconds{name=~\"borgbackup-job-.+\\\\.timer\"})", "instant": true } ] }, { "type": "table", "title": "Backup jobs (current state)", "description": "Every borgbackup job and the systemd unit state it is currently in, per client. 'inactive' is the normal resting state of a oneshot job between runs.", "id": 5, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }, "options": { "showHeader": true }, "fieldConfig": { "defaults": { "custom": { "align": "auto" } }, "overrides": [] }, "targets": [ { "refId": "A", "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "expr": "node_systemd_unit_state{name=~\"borgbackup-job-.+\\\\.service\"} == 1", "format": "table", "instant": true } ], "transformations": [ { "id": "organize", "options": { "excludeByName": { "Time": true, "Value": true, "__name__": true, "job": true, "type": true } } } ] }, { "type": "timeseries", "title": "Failed state over time", "description": "1 while a backup job is in the failed state. A spike here is a backup that did not complete and was not retried before the next scrape.", "id": 6, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }, "fieldConfig": { "defaults": { "unit": "short", "min": 0, "max": 1 }, "overrides": [] }, "targets": [ { "refId": "A", "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "expr": "node_systemd_unit_state{name=~\"borgbackup-job-.+\\\\.service\",state=\"failed\"}", "legendFormat": "{{instance}} {{name}}" } ] }, { "type": "timeseries", "title": "Time since last backup (history)", "description": "Age of the latest backup over time. The sawtooth should reset to near zero once a day; a steady climb without a reset means backups stopped running.", "id": 7, "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 14 }, "fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] }, "targets": [ { "refId": "A", "datasource": { "type": "prometheus", "uid": "victoriametrics" }, "expr": "time() - node_systemd_timer_last_trigger_seconds{name=~\"borgbackup-job-.+\\\\.timer\"}", "legendFormat": "{{instance}} {{name}}" } ] } ] }