diff --git a/modules/nixos/services/alertmanager.nix b/modules/nixos/services/alertmanager.nix new file mode 100644 index 0000000..6e4f103 --- /dev/null +++ b/modules/nixos/services/alertmanager.nix @@ -0,0 +1,89 @@ +{ + config, + lib, + allHosts, + ... +}: +let + cfg = config.custom.services.alertmanager; +in +{ + options.custom.services.alertmanager = { + enable = lib.mkEnableOption ""; + domain = lib.mkOption { + type = lib.types.nonEmptyStr; + default = ""; + }; + port = lib.mkOption { + type = lib.types.port; + default = 9093; + }; + clusterPort = lib.mkOption { + type = lib.types.port; + default = 9094; + }; + ntfyBridgePort = lib.mkOption { + type = lib.types.port; + default = 11512; + }; + }; + + config = lib.mkIf cfg.enable { + services = { + prometheus.alertmanager = { + enable = true; + + listenAddress = "localhost"; + inherit (cfg) port; + webExternalUrl = "https://${cfg.domain}"; + + extraFlags = [ + "--cluster.advertise-address=${config.custom.networking.overlay.address}:${toString cfg.clusterPort}" + "--cluster.listen-address=${config.custom.networking.overlay.address}:${toString cfg.clusterPort}" + ] + ++ ( + allHosts + |> lib.attrValues + |> lib.filter (host: host.config.networking.hostName != config.networking.hostName) + |> lib.filter (host: host.config.custom.services.alertmanager.enable) + |> lib.map ( + host: "--cluster.peer ${host.config.custom.networking.overlay.address}:${toString cfg.clusterPort}" + ) + ); + + configuration = { + route = { + group_by = [ + "alertname" + "instance" + ]; + receiver = "ntfy"; + }; + receivers = lib.singleton { + name = "ntfy"; + webhook_configs = lib.singleton { url = "http://localhost:${toString cfg.ntfyBridgePort}/hook"; }; + }; + }; + }; + + prometheus.alertmanager-ntfy = { + enable = true; + settings = { + http.addr = "localhost:${toString cfg.ntfyBridgePort}"; + ntfy = { + baseurl = "https://ntfy.sh"; + notification.topic = "splitleaf"; + }; + }; + }; + + nebula.networks.mesh.firewall.inbound = lib.singleton { + port = cfg.clusterPort; + proto = "any"; + group = "server"; + }; + }; + + custom.services.caddy.virtualHosts.${cfg.domain}.port = cfg.port; + }; +} diff --git a/modules/nixos/services/prometheus.nix b/modules/nixos/services/prometheus.nix new file mode 100644 index 0000000..7e2cc16 --- /dev/null +++ b/modules/nixos/services/prometheus.nix @@ -0,0 +1,100 @@ +{ + config, + pkgs, + lib, + allHosts, + ... +}: +let + cfg = config.custom.services.prometheus; +in +{ + options.custom.services.prometheus = { + enable = lib.mkEnableOption ""; + domain = lib.mkOption { + type = lib.types.nonEmptyStr; + default = ""; + }; + port = lib.mkOption { + type = lib.types.port; + default = 9090; + }; + }; + + config = lib.mkIf cfg.enable { + services.prometheus = { + enable = true; + stateDir = "prometheus"; + + listenAddress = "localhost"; + inherit (cfg) port; + webExternalUrl = "https://${cfg.domain}"; + + globalConfig = { + scrape_interval = "30s"; + external_labels.monitor = "global"; + }; + + alertmanagers = lib.singleton { + scheme = "https"; + static_configs = lib.singleton { + targets = + allHosts + |> lib.attrValues + |> lib.map (host: host.config.custom.services.alertmanager) + |> lib.filter (alertmanager: alertmanager.enable) + |> lib.map (alertmanager: alertmanager.domain); + }; + }; + + scrapeConfigs = [ + { + job_name = "prometheus"; + static_configs = lib.singleton { + targets = + allHosts + |> lib.attrValues + |> lib.map (host: host.config.custom.services.prometheus) + |> lib.filter (prometheus: prometheus.enable) + |> lib.map (prometheus: prometheus.domain); + }; + } + { + job_name = "alertmanager"; + static_configs = lib.singleton { + targets = + allHosts + |> lib.attrValues + |> lib.map (host: host.config.custom.services.alertmanager) + |> lib.filter (alertmanager: alertmanager.enable) + |> lib.map (alertmanager: alertmanager.domain); + }; + } + ]; + + ruleFiles = + { + groups = lib.singleton { + name = "InstanceDown"; + rules = lib.singleton { + alert = "InstanceDown"; + expr = "up == 0"; + for = "2m"; + labels.severity = "critical"; + annotations.summary = "Instance {{ $labels.instance }} down"; + }; + }; + } + |> lib.strings.toJSON + |> pkgs.writeText "prometheus-instance-down-rule" + |> toString + |> lib.singleton; + }; + + custom = { + services.caddy.virtualHosts.${cfg.domain}.port = cfg.port; + + persistence.directories = [ "/var/lib/${config.services.prometheus.stateDir}" ]; + }; + }; +} diff --git a/profiles/server.nix b/profiles/server.nix index 08c9454..0fd337c 100644 --- a/profiles/server.nix +++ b/profiles/server.nix @@ -12,6 +12,14 @@ enable = true; domain = "alloy.${config.custom.networking.overlay.fqdn}"; }; + prometheus = { + enable = true; + domain = "prometheus.${config.custom.networking.overlay.fqdn}"; + }; + alertmanager = { + enable = true; + domain = "alertmanager.${config.custom.networking.overlay.fqdn}"; + }; }; }; }