195 lines
6.0 KiB
JSON
195 lines
6.0 KiB
JSON
{
|
|
"uid": "cnx-uptime",
|
|
"title": "CNX Uptime",
|
|
"tags": ["uptime", "availability", "cnx"],
|
|
"timezone": "browser",
|
|
"schemaVersion": 39,
|
|
"version": 1,
|
|
"refresh": "30s",
|
|
"time": { "from": "now-24h", "to": "now" },
|
|
"templating": { "list": [] },
|
|
"annotations": { "list": [] },
|
|
"panels": [
|
|
{
|
|
"type": "row",
|
|
"title": "Uptime",
|
|
"id": 1,
|
|
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }
|
|
},
|
|
{
|
|
"type": "stat",
|
|
"title": "Host status",
|
|
"description": "Whether VictoriaMetrics is currently able to scrape each host's node_exporter. UP means the host (and its mesh path) is reachable; DOWN means the scrape failed. One tile per machine.",
|
|
"id": 2,
|
|
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
|
|
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 1 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [{ "color": "green", "value": null }]
|
|
},
|
|
"noValue": "no data",
|
|
"mappings": [
|
|
{
|
|
"type": "value",
|
|
"options": {
|
|
"0": { "text": "DOWN", "color": "red", "index": 0 },
|
|
"1": { "text": "UP", "color": "green", "index": 1 }
|
|
}
|
|
}
|
|
]
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"textMode": "value_and_name",
|
|
"orientation": "auto"
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
|
|
"expr": "up{job=\"node\"}",
|
|
"legendFormat": "{{instance}}",
|
|
"instant": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"type": "stat",
|
|
"title": "Current uptime",
|
|
"description": "Time since each host last booted (now - node_boot_time_seconds). A value that drops back to near zero means the host rebooted.",
|
|
"id": 3,
|
|
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
|
|
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 1 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "dtdurations",
|
|
"color": { "mode": "fixed", "fixedColor": "text" },
|
|
"noValue": "no data"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"colorMode": "none",
|
|
"graphMode": "none",
|
|
"textMode": "value_and_name",
|
|
"orientation": "auto"
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
|
|
"expr": "time() - node_boot_time_seconds{job=\"node\"}",
|
|
"legendFormat": "{{instance}}",
|
|
"instant": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"type": "bargauge",
|
|
"title": "Availability over window",
|
|
"description": "Fraction of successful scrapes over the selected time range, per host (avg of up over $__range). 100% means every scrape in the window succeeded; dips reveal flapping or outages. Red below 99%.",
|
|
"id": 4,
|
|
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 7 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"min": 0,
|
|
"max": 100,
|
|
"color": { "mode": "thresholds" },
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "red", "value": null },
|
|
{ "color": "yellow", "value": 99 },
|
|
{ "color": "green", "value": 99.9 }
|
|
]
|
|
},
|
|
"noValue": "no data"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"displayMode": "gradient",
|
|
"orientation": "horizontal",
|
|
"showUnfilled": true,
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
|
|
"expr": "avg_over_time(up{job=\"node\"}[$__range]) * 100",
|
|
"legendFormat": "{{instance}}",
|
|
"instant": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"type": "timeseries",
|
|
"title": "Uptime over time",
|
|
"description": "Host uptime across the window. The line should climb steadily; a reset to zero marks a reboot.",
|
|
"id": 5,
|
|
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
|
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 7 },
|
|
"fieldConfig": {
|
|
"defaults": { "unit": "s", "custom": { "fillOpacity": 0 } },
|
|
"overrides": []
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
|
|
"expr": "time() - node_boot_time_seconds{job=\"node\"}",
|
|
"legendFormat": "{{instance}}"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"type": "timeseries",
|
|
"title": "Up/down history",
|
|
"description": "1 while a host's node_exporter was scrapeable, 0 while it was not. Gaps to zero are outages or lost mesh connectivity.",
|
|
"id": 6,
|
|
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
|
|
"gridPos": { "h": 6, "w": 24, "x": 0, "y": 15 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"min": 0,
|
|
"max": 1,
|
|
"custom": { "fillOpacity": 20, "lineInterpolation": "stepAfter" }
|
|
},
|
|
"overrides": []
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"datasource": { "type": "prometheus", "uid": "victoriametrics" },
|
|
"expr": "up{job=\"node\"}",
|
|
"legendFormat": "{{instance}}"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|