diff --git a/inventories/default.nix b/inventories/default.nix index 204d61a..18b0a83 100644 --- a/inventories/default.nix +++ b/inventories/default.nix @@ -320,81 +320,95 @@ name = "prometheus"; input = "self"; }; - roles.server.machines."rigel".settings = { }; - roles.server.extraModules = [ - ( - { config, pkgs, ... }: - { - # clan.core.vars.generators.prometheus = { - # files.matrix-alertmanager-token.secret = true; - # files.matrix-alertmanager-secret.secret = true; - # files.matrix-alertmanager-urlfile = { - # secret = true; - # owner = "alertmanager"; - # group = "alertmanager"; - # }; - # script = '' - # echo "" > $out/matrix-alertmanager-token - # openssl rand -hex 32 > "$out"/matrix-alertmanager-secret - # - # echo "http://localhost:3000/alerts?secret=$(cat $out/matrix-alertmanager-secret)" > $out/matrix-alertmanager-urlfile - # ''; - # runtimeInputs = [ - # pkgs.openssl - # ]; - # }; - # - # services.matrix-alertmanager = { - # enable = true; - # tokenFile = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-token.path; - # secretFile = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-secret.path; - # homeserverUrl = "https://matrix-client.matrix.org"; - # matrixUser = "@kuroiris:matrix.org"; - # matrixRooms = [ - # { - # receivers = [ - # "matrix" - # ]; - # roomId = "!rqIrWqPvsXqMgYpcNZ:matrix.org"; - # } - # ]; - # }; - - # services.prometheus = { - # - # alertmanager = { - # enable = true; - # configuration = { - # global = { - # resolve_timeout = "5m"; - # }; - # route = { - # receiver = "default"; - # routes = [ - # { - # receiver = "matrix"; - # } - # ]; - # }; - # receivers = [ - # { name = "default"; } - # { - # name = "matrix"; - # webhook_configs = [ - # { - # url_file = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-urlfile.path; - # send_resolved = true; - # } - # ]; - # } - # ]; - # }; - # }; - # - # }; - } - ) - ]; + roles.server.machines."rigel".settings = { + matrix-alertmanager = { + enable = true; + homeserverUrl = "https://matrix-client.matrix.org"; + matrixUser = "@kuroiris:matrix.org"; + matrixRooms = [ + { + receivers = [ + "matrix" + ]; + roomId = "!rqIrWqPvsXqMgYpcNZ:matrix.org"; + } + ]; + }; + }; + # roles.server.extraModules = [ + # ( + # { config, pkgs, ... }: + # { + # # clan.core.vars.generators.prometheus = { + # # files.matrix-alertmanager-token.secret = true; + # # files.matrix-alertmanager-secret.secret = true; + # # files.matrix-alertmanager-urlfile = { + # # secret = true; + # # owner = "alertmanager"; + # # group = "alertmanager"; + # # }; + # # script = '' + # # echo "" > $out/matrix-alertmanager-token + # # openssl rand -hex 32 > "$out"/matrix-alertmanager-secret + # # + # # echo "http://localhost:3000/alerts?secret=$(cat $out/matrix-alertmanager-secret)" > $out/matrix-alertmanager-urlfile + # # ''; + # # runtimeInputs = [ + # # pkgs.openssl + # # ]; + # # }; + # # + # # services.matrix-alertmanager = { + # # enable = true; + # # tokenFile = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-token.path; + # # secretFile = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-secret.path; + # # homeserverUrl = "https://matrix-client.matrix.org"; + # # matrixUser = "@kuroiris:matrix.org"; + # # matrixRooms = [ + # # { + # # receivers = [ + # # "matrix" + # # ]; + # # roomId = "!rqIrWqPvsXqMgYpcNZ:matrix.org"; + # # } + # # ]; + # # }; + # + # # services.prometheus = { + # # + # # alertmanager = { + # # enable = true; + # # configuration = { + # # global = { + # # resolve_timeout = "5m"; + # # }; + # # route = { + # # receiver = "default"; + # # routes = [ + # # { + # # receiver = "matrix"; + # # } + # # ]; + # # }; + # # receivers = [ + # # { name = "default"; } + # # { + # # name = "matrix"; + # # webhook_configs = [ + # # { + # # url_file = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-urlfile.path; + # # send_resolved = true; + # # } + # # ]; + # # } + # # ]; + # # }; + # # }; + # # + # # }; + # } + # ) + # ]; roles.nodes.machines = { vega.settings = { diff --git a/modules/clan/prometheus/default.nix b/modules/clan/prometheus/default.nix index 71451c5..1c83b4d 100644 --- a/modules/clan/prometheus/default.nix +++ b/modules/clan/prometheus/default.nix @@ -122,147 +122,150 @@ lib.concatMap (entry: entry.receivers) settings.matrix-alertmanager.matrixRooms ); in - { - networking.firewall.allowedTCPPorts = [ - 9090 - ]; - services.prometheus = { - enable = true; - - globalConfig = { - scrape_interval = settings.scrape_interval; - }; - - alertmanagers = [ - { - scheme = "http"; - path_prefix = "/"; - static_configs = [ { targets = [ "localhost:9093" ]; } ]; - } + lib.mkMerge [ + { + networking.firewall.allowedTCPPorts = [ + 9090 ]; - - alertmanager = { + services.prometheus = { enable = true; - configuration = { - global = { - resolve_timeout = "5m"; + + globalConfig = { + scrape_interval = settings.scrape_interval; + }; + + alertmanagers = [ + { + scheme = "http"; + path_prefix = "/"; + static_configs = [ { targets = [ "localhost:9093" ]; } ]; + } + ]; + + alertmanager = { + enable = true; + configuration = { + global = { + resolve_timeout = "5m"; + }; + route = { + receiver = "default"; + routes = map (mReceiver: { receiver = mReceiver; }) matrixRoomReceivers; + }; + receivers = [ + { name = "default"; } + ] + ++ map (mReceiver: { + name = mReceiver; + webhook_configs = [ + { + url_file = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-urlfile.path; + send_resolved = true; + } + ]; + }) matrixRoomReceivers; }; - route = { - receiver = "default"; - routes = map (mReceiver: { receiver = mReceiver; }) matrixRoomReceivers; - }; - receivers = [ - { name = "default"; } - ] - ++ map (mReceiver: { - name = mReceiver; - webhook_config = [ + }; + + scrapeConfigs = lib.mapAttrsToList (machineName: machineVal: { + tls_config.insecure_skip_verify = true; + job_name = "${machineName}"; + static_configs = lib.mapAttrsToList ( + exporterName: exporterVal: + let + targetPort = + if exporterVal ? port then + exporterVal.port + else + config.services.prometheus.exporters."${exporterName}".port; + targetHost = getYggdrasilIP machineName; + in + { + targets = [ "[${targetHost}]:${lib.toString targetPort}" ]; + } + ) machineVal.settings.exporters; + }) roles.nodes.machines; + + rules = [ + (builtins.toJSON { + groups = [ { - url_file = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-urlfile.path; - send_resolved = true; + name = "default"; + rules = [ + { + alert = "NodesDown"; + expr = "count by (job) (up == 0) > 0"; + for = "1m"; + labels = { + severity = "critical"; + }; + annotations.summary = "Node {{ $labels.job }} has been down for more than 1 minutes."; + } + { + alert = "SmartCtlErrors"; + expr = "smartctl_device_error_log_count > 0"; + for = "5m"; + labels = { + severity = "critical"; + }; + annotations.summary = '' + Errors occur on {{ $labels.job }} + Disk {{ $labels.device }} {{ $value }} + ''; + } + { + alert = "ZFSPoolsHealth"; + expr = "zfs_pool_health > 0"; + for = "5m"; + labels = { + severity = "critical"; + }; + annotations.summary = '' + Unhealthy Pool at {{ $labels.job }} + Pool {{ $labels.pool }} value {{ $value }} + ''; + } + ] + ++ settings.extra_rules; } ]; - }) matrixRoomReceivers; + }) + ]; + + }; + + } + (lib.optionalAttrs settings.matrix-alertmanager.enable { + + clan.core.vars.generators.prometheus = { + files.matrix-alertmanager-token.secret = true; + files.matrix-alertmanager-secret.secret = true; + files.matrix-alertmanager-urlfile = { + secret = true; + owner = "alertmanager"; + group = "alertmanager"; }; + script = '' + echo "" > $out/matrix-alertmanager-token + openssl rand -hex 32 > "$out"/matrix-alertmanager-secret + + echo "http://localhost:3000/alerts?secret=$(cat $out/matrix-alertmanager-secret)" > $out/matrix-alertmanager-urlfile + ''; + runtimeInputs = [ + pkgs.openssl + ]; }; - scrapeConfigs = lib.mapAttrsToList (machineName: machineVal: { - tls_config.insecure_skip_verify = true; - job_name = "${machineName}"; - static_configs = lib.mapAttrsToList ( - exporterName: exporterVal: - let - targetPort = - if exporterVal ? port then - exporterVal.port - else - config.services.prometheus.exporters."${exporterName}".port; - targetHost = getYggdrasilIP machineName; - in - { - targets = [ "[${targetHost}]:${lib.toString targetPort}" ]; - } - ) machineVal.settings.exporters; - }) roles.nodes.machines; - - rules = [ - (builtins.toJSON { - groups = [ - { - name = "default"; - rules = [ - { - alert = "NodesDown"; - expr = "count by (job) (up == 0) > 0"; - for = "1m"; - labels = { - severity = "critical"; - }; - annotations.summary = "Node {{ $labels.job }} has been down for more than 1 minutes."; - } - { - alert = "SmartCtlErrors"; - expr = "smartctl_device_error_log_count > 0"; - for = "5m"; - labels = { - severity = "critical"; - }; - annotations.summary = '' - Errors occur on {{ $labels.job }} - Disk {{ $labels.device }} {{ $value }} - ''; - } - { - alert = "ZFSPoolsHealth"; - expr = "zfs_pool_health > 0"; - for = "5m"; - labels = { - severity = "critical"; - }; - annotations.summary = '' - Unhealthy Pool at {{ $labels.job }} - Pool {{ $labels.pool }} value {{ $value }} - ''; - } - ] - ++ settings.extra_rules; - } - ]; - }) - ]; - - }; - - } - // lib.optionalAttrs settings.matrix-alertmanager.enable { - clan.core.vars.generators.prometheus = { - files.matrix-alertmanager-token.secret = true; - files.matrix-alertmanager-secret.secret = true; - files.matrix-alertmanager-urlfile = { - secret = true; - owner = "alertmanager"; - group = "alertmanager"; + services.matrix-alertmanager = lib.mkIf settings.matrix-alertmanager.enable { + enable = true; + tokenFile = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-token.path; + secretFile = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-secret.path; + homeserverUrl = settings.matrix-alertmanager.homeserverUrl; + matrixUser = settings.matrix-alertmanager.matrixUser; + matrixRooms = settings.matrix-alertmanager.matrixRooms; }; - script = '' - echo "" > $out/matrix-alertmanager-token - openssl rand -hex 32 > "$out"/matrix-alertmanager-secret - - echo "http://localhost:3000/alerts?secret=$(cat $out/matrix-alertmanager-secret)" > $out/matrix-alertmanager-urlfile - ''; - runtimeInputs = [ - pkgs.openssl - ]; - }; - - services.matrix-alertmanager = lib.mkIf settings.matrix-alertmanager.enable { - enable = true; - tokenFile = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-token.path; - secretFile = config.clan.core.vars.generators.prometheus.files.matrix-alertmanager-secret.path; - homeserverUrl = settings.matrix-alertmanager.homeserverUrl; - matrixUser = settings.matrix-alertmanager.matrixUser; - matrixRooms = settings.matrix-alertmanager.matrixRooms; - }; - }; + }) + ]; }; };