diff --git a/modules/nixos/services/alertmanager.nix b/modules/nixos/services/alertmanager.nix index f483fae..88bb9b7 100644 --- a/modules/nixos/services/alertmanager.nix +++ b/modules/nixos/services/alertmanager.nix @@ -63,14 +63,6 @@ in name = "ntfy"; webhook_configs = lib.singleton { url = "http://localhost:${toString cfg.ntfyBridgePort}/hook"; }; }; - inhibit_rules = lib.singleton { - source_matchers = [ - ''alertname="InstanceDown"'' - ''job="node"'' - ]; - target_matchers = lib.singleton ''alertname!="InstanceDown"''; - equal = [ "instance" ]; - }; }; }; diff --git a/modules/nixos/services/nameservers/public.nix b/modules/nixos/services/nameservers/public.nix index 0ad08a6..fee45f1 100644 --- a/modules/nixos/services/nameservers/public.nix +++ b/modules/nixos/services/nameservers/public.nix @@ -46,12 +46,7 @@ let in inputs.dns.lib.toString zone { SOA = { - nameServer = - nsRecords - |> lib.map (record: record.name) - |> lib.naturalSort - |> lib.head - |> (hostName: "${hostName}.${zone}."); + nameServer = "${netCfg.hostName}.${zone}."; adminEmail = "hostmaster@sstork.dev"; serial = 1; }; diff --git a/modules/nixos/services/prometheus.nix b/modules/nixos/services/prometheus.nix index 22ee4cc..ef20489 100644 --- a/modules/nixos/services/prometheus.nix +++ b/modules/nixos/services/prometheus.nix @@ -58,25 +58,25 @@ in scrapeConfigs = [ { job_name = "prometheus"; - static_configs = - allHosts - |> lib.attrValues - |> lib.filter (host: host.config.custom.services.prometheus.enable) - |> lib.map (host: { - targets = lib.singleton host.config.custom.services.prometheus.domain; - labels.instance = host.config.networking.hostName; - }); + static_configs = lib.singleton { + targets = + allHosts + |> lib.attrValues + |> lib.map (host: host.config.custom.services.prometheus) + |> lib.filter (prometheus: prometheus.enable) + |> lib.map (prometheus: prometheus.domain); + }; } { job_name = "alertmanager"; - static_configs = - allHosts - |> lib.attrValues - |> lib.filter (host: host.config.custom.services.alertmanager.enable) - |> lib.map (host: { - targets = lib.singleton host.config.custom.services.alertmanager.domain; - labels.instance = host.config.networking.hostName; - }); + static_configs = lib.singleton { + targets = + allHosts + |> lib.attrValues + |> lib.map (host: host.config.custom.services.alertmanager) + |> lib.filter (alertmanager: alertmanager.enable) + |> lib.map (alertmanager: alertmanager.domain); + }; } ]; @@ -84,55 +84,39 @@ in { groups = lib.singleton { name = "Rules"; - rules = - ( - allHosts - |> lib.attrValues - |> lib.filter (host: host.config.custom.services.alloy.enable) - |> lib.filter (host: host.config.custom.networking.overlay.role == "server") - |> lib.map (host: host.config.networking.hostName) - |> lib.map (hostName: { - alert = "InstanceDown"; - expr = ''absent_over_time(up{instance="${hostName}", job="node"}[2m])''; - labels.severity = "critical"; - annotations = { - summary = "${hostName} is DOWN"; - description = "${hostName} has not reported any metrics for more than 2 minutes."; - }; - }) - ) - ++ [ - { - alert = "ServiceDown"; - expr = ''up{job=~"prometheus|alertmanager"} == 0''; - for = "2m"; - annotations = { - summary = "{{ $labels.job }} on {{ $labels.instance }} is DOWN"; - description = "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 2 minutes."; - }; - } - { - alert = "CominDeploymentFailed"; - expr = ''comin_deployment_info{status!="done"}''; - annotations = { - summary = "{{ $labels.instance }} deployment failed"; - description = "The deployment of {{ $labels.instance }} with comin is failing."; - }; - } - { - alert = "CominDeploymentCommitMismatch"; - expr = "count(count by (commit_id) (comin_deployment_info)) > 1"; - for = "10m"; - annotations = { - summary = "Hosts are running different commits"; - description = "Not all hosts are running the same git commit, which may indicate a failed deployment and could lead to incompatible configurations."; - }; - } - ]; + rules = [ + { + alert = "InstanceDown"; + expr = "up == 0"; + for = "2m"; + labels.severity = "critical"; + annotations = { + summary = "{{ $labels.instance }} is DOWN"; + description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."; + }; + } + { + alert = "CominDeploymentFailed"; + expr = ''comin_deployment_info{status!="done"}''; + annotations = { + summary = "{{ $labels.instance }} deployment failed"; + description = "The deployment of {{ $labels.instance }} with comin is failing."; + }; + } + { + alert = "CominDeploymentCommitMismatch"; + expr = "count(count by (commit_id) (comin_deployment_info)) > 1"; + for = "10m"; + annotations = { + summary = "Hosts are running different commits"; + description = "Not all hosts are running the same git commit, which may indicate a failed deployment and could lead to incompatible configurations."; + }; + } + ]; }; } |> lib.strings.toJSON - |> pkgs.writeText "prometheus-rules" + |> pkgs.writeText "prometheus-instance-down-rule" |> toString |> lib.singleton; };