mob next [ci-skip] [ci skip] [skip ci]

lastFile:modules/clan/prometheus/default.nix
This commit is contained in:
2026-06-19 15:06:27 +07:00
parent dffaf4bf0d
commit 6d5ce0a0a0
2 changed files with 57 additions and 17 deletions
+17 -17
View File
@@ -360,23 +360,23 @@
};
services.prometheus = {
rules = [
(builtins.toJSON {
groups = [
{
name = "default";
rules = [
{
alert = "NodeDown";
expr = "up == 0";
for = "1m";
annotations.summary = "Node {{ $labels.job }} has been down for more than 1 minutes. {{ $labels.instance }}";
}
];
}
];
})
];
# rules = [
# (builtins.toJSON {
# groups = [
# {
# name = "default";
# rules = [
# {
# alert = "NodeDown";
# expr = "up == 0";
# for = "1m";
# annotations.summary = "Node {{ $labels.job }} has been down for more than 1 minutes. {{ $labels.instance }}";
# }
# ];
# }
# ];
# })
# ];
alertmanager = {
enable = true;
+40
View File
@@ -18,6 +18,11 @@
default = "1m";
description = "How often to scrape targets. Default is 1 minutes";
};
extraRules = lib.mkOption {
type = with lib.types; listOf attrs;
default = [ ];
description = "Additional rules for Prometheus";
};
};
};
@@ -78,6 +83,41 @@
) machineVal.settings.exporters;
}) roles.nodes.machines;
rules = [
(builtins.toJSON {
groups = [
{
name = "default";
rules = [
{
alert = "NodesDown";
expr = "up == 0";
for = "1m";
labels = {
severity = "critical";
};
annotations.summary = "Node {{ $labels.job }} has been down for more than 1 minutes. {{ $labels.instance }}";
}
{
alert = "SmartCtlErrors";
expr = "smartctl_device_error_log_count > 0";
for = "5m";
labels = {
severity = "medium";
};
annotations.summary = ''
Errors occur on {{ $labels.job }}
Disk {{ $labels.device }}
{{ $labels.instance }}
'';
}
]
++ settings.extraRules;
}
];
})
];
};
};