mob next [ci-skip] [ci skip] [skip ci]

lastFile:modules/clan/prometheus/default.nix
This commit is contained in:
2026-06-19 15:06:27 +07:00
parent dffaf4bf0d
commit 6d5ce0a0a0
2 changed files with 57 additions and 17 deletions
+17 -17
View File
@@ -360,23 +360,23 @@
}; };
services.prometheus = { services.prometheus = {
rules = [ # rules = [
(builtins.toJSON { # (builtins.toJSON {
groups = [ # groups = [
{ # {
name = "default"; # name = "default";
rules = [ # rules = [
{ # {
alert = "NodeDown"; # alert = "NodeDown";
expr = "up == 0"; # expr = "up == 0";
for = "1m"; # for = "1m";
annotations.summary = "Node {{ $labels.job }} has been down for more than 1 minutes. {{ $labels.instance }}"; # annotations.summary = "Node {{ $labels.job }} has been down for more than 1 minutes. {{ $labels.instance }}";
} # }
]; # ];
} # }
]; # ];
}) # })
]; # ];
alertmanager = { alertmanager = {
enable = true; enable = true;
+40
View File
@@ -18,6 +18,11 @@
default = "1m"; default = "1m";
description = "How often to scrape targets. Default is 1 minutes"; description = "How often to scrape targets. Default is 1 minutes";
}; };
extraRules = lib.mkOption {
type = with lib.types; listOf attrs;
default = [ ];
description = "Additional rules for Prometheus";
};
}; };
}; };
@@ -78,6 +83,41 @@
) machineVal.settings.exporters; ) machineVal.settings.exporters;
}) roles.nodes.machines; }) roles.nodes.machines;
rules = [
(builtins.toJSON {
groups = [
{
name = "default";
rules = [
{
alert = "NodesDown";
expr = "up == 0";
for = "1m";
labels = {
severity = "critical";
};
annotations.summary = "Node {{ $labels.job }} has been down for more than 1 minutes. {{ $labels.instance }}";
}
{
alert = "SmartCtlErrors";
expr = "smartctl_device_error_log_count > 0";
for = "5m";
labels = {
severity = "medium";
};
annotations.summary = ''
Errors occur on {{ $labels.job }}
Disk {{ $labels.device }}
{{ $labels.instance }}
'';
}
]
++ settings.extraRules;
}
];
})
];
}; };
}; };