mob next [ci-skip] [ci skip] [skip ci]
lastFile:modules/clan/prometheus/default.nix
This commit is contained in:
@@ -360,23 +360,6 @@
|
||||
};
|
||||
|
||||
services.prometheus = {
|
||||
# rules = [
|
||||
# (builtins.toJSON {
|
||||
# groups = [
|
||||
# {
|
||||
# name = "default";
|
||||
# rules = [
|
||||
# {
|
||||
# alert = "NodeDown";
|
||||
# expr = "up == 0";
|
||||
# for = "1m";
|
||||
# annotations.summary = "Node {{ $labels.job }} has been down for more than 1 minutes. {{ $labels.instance }}";
|
||||
# }
|
||||
# ];
|
||||
# }
|
||||
# ];
|
||||
# })
|
||||
# ];
|
||||
|
||||
alertmanager = {
|
||||
enable = true;
|
||||
|
||||
@@ -103,11 +103,24 @@
|
||||
expr = "smartctl_device_error_log_count > 0";
|
||||
for = "5m";
|
||||
labels = {
|
||||
severity = "medium";
|
||||
severity = "critical";
|
||||
};
|
||||
annotations.summary = ''
|
||||
Errors occur on {{ $labels.job }}
|
||||
Disk {{ $labels.device }}
|
||||
Disk {{ $labels.device }} {{ $value }}
|
||||
{{ $labels.instance }}
|
||||
'';
|
||||
}
|
||||
{
|
||||
alert = "ZFSPoolsHealth";
|
||||
expr = "zfs_pool_health > 0";
|
||||
for = "5m";
|
||||
labels = {
|
||||
severity = "critical";
|
||||
};
|
||||
annotations.summary = ''
|
||||
Unhealthy Pool at {{ $labels.job }}
|
||||
Pool {{ $labels.pool }} value {{ $value }}
|
||||
{{ $labels.instance }}
|
||||
'';
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user