diff --git a/hosts/vps-monitor/default.nix b/hosts/vps-monitor/default.nix index a8f8d97..1cc36c3 100644 --- a/hosts/vps-monitor/default.nix +++ b/hosts/vps-monitor/default.nix @@ -55,11 +55,6 @@ enable = true; domain = "metrics.${privateDomain}"; }; - - victorialogs = { - enable = true; - domain = "logs.${privateDomain}"; - }; }; }; } diff --git a/modules/nixos/services/alertmanager.nix b/modules/nixos/services/alertmanager.nix new file mode 100644 index 0000000..6e4f103 --- /dev/null +++ b/modules/nixos/services/alertmanager.nix @@ -0,0 +1,89 @@ +{ + config, + lib, + allHosts, + ... +}: +let + cfg = config.custom.services.alertmanager; +in +{ + options.custom.services.alertmanager = { + enable = lib.mkEnableOption ""; + domain = lib.mkOption { + type = lib.types.nonEmptyStr; + default = ""; + }; + port = lib.mkOption { + type = lib.types.port; + default = 9093; + }; + clusterPort = lib.mkOption { + type = lib.types.port; + default = 9094; + }; + ntfyBridgePort = lib.mkOption { + type = lib.types.port; + default = 11512; + }; + }; + + config = lib.mkIf cfg.enable { + services = { + prometheus.alertmanager = { + enable = true; + + listenAddress = "localhost"; + inherit (cfg) port; + webExternalUrl = "https://${cfg.domain}"; + + extraFlags = [ + "--cluster.advertise-address=${config.custom.networking.overlay.address}:${toString cfg.clusterPort}" + "--cluster.listen-address=${config.custom.networking.overlay.address}:${toString cfg.clusterPort}" + ] + ++ ( + allHosts + |> lib.attrValues + |> lib.filter (host: host.config.networking.hostName != config.networking.hostName) + |> lib.filter (host: host.config.custom.services.alertmanager.enable) + |> lib.map ( + host: "--cluster.peer ${host.config.custom.networking.overlay.address}:${toString cfg.clusterPort}" + ) + ); + + configuration = { + route = { + group_by = [ + "alertname" + "instance" + ]; + receiver = "ntfy"; + }; + receivers = lib.singleton { + name = "ntfy"; + webhook_configs = lib.singleton { url = "http://localhost:${toString cfg.ntfyBridgePort}/hook"; }; + }; + }; + }; + + prometheus.alertmanager-ntfy = { + enable = true; + settings = { + http.addr = "localhost:${toString cfg.ntfyBridgePort}"; + ntfy = { + baseurl = "https://ntfy.sh"; + notification.topic = "splitleaf"; + }; + }; + }; + + nebula.networks.mesh.firewall.inbound = lib.singleton { + port = cfg.clusterPort; + proto = "any"; + group = "server"; + }; + }; + + custom.services.caddy.virtualHosts.${cfg.domain}.port = cfg.port; + }; +} diff --git a/modules/nixos/services/alloy.nix b/modules/nixos/services/alloy.nix index 6ad99c5..3bed041 100644 --- a/modules/nixos/services/alloy.nix +++ b/modules/nixos/services/alloy.nix @@ -17,48 +17,25 @@ in type = lib.types.nonEmptyStr; default = "https://metrics.${config.custom.networking.overlay.domain}/prometheus/api/v1/write"; }; - logsEndpoint = lib.mkOption { - type = lib.types.nonEmptyStr; - default = "https://logs.${config.custom.networking.overlay.domain}/insert/loki/api/v1/push"; - }; - collect = { - metrics = { - system = lib.mkEnableOption "" // { - default = true; - }; - victorialogs = lib.mkEnableOption "" // { - default = config.services.victorialogs.enable; - }; - caddy = lib.mkEnableOption "" // { - default = config.services.caddy.enable; - }; + collect.metrics = { + system = lib.mkEnableOption "" // { + default = true; }; - logs.openssh = lib.mkEnableOption "" // { - default = config.services.openssh.enable; + caddy = lib.mkEnableOption "" // { + default = config.services.caddy.enable; }; }; }; config = lib.mkIf cfg.enable { assertions = - let - metricsAssertions = - cfg.collect.metrics - |> lib.attrNames - |> lib.filter (name: name != "system") - |> lib.map (name: { - assertion = cfg.collect.metrics.${name} -> config.services.${name}.enable; - message = "Alloy cannot collect `${name}` metrics without the `${name}` service"; - }); - logsAssertions = - cfg.collect.logs - |> lib.attrNames - |> lib.map (name: { - assertion = cfg.collect.logs.${name} -> config.services.${name}.enable; - message = "Alloy cannot collect '${name}' logs without the '${name}' service"; - }); - in - metricsAssertions ++ logsAssertions; + cfg.collect.metrics + |> lib.attrNames + |> lib.filter (name: name != "system") + |> lib.map (name: { + assertion = cfg.collect.metrics.${name} -> config.services.${name}.enable; + message = "Alloy cannot collect `${name}` metrics without the `${name}` service"; + }); services.alloy = { enable = true; @@ -84,16 +61,6 @@ in } ''; }; - "alloy/logs-endpoint.alloy" = { - enable = cfg.collect.logs |> anyIsTrue; - text = '' - loki.write "default" { - endpoint { - url = "${cfg.logsEndpoint}" - } - } - ''; - }; "alloy/system-metrics.alloy" = { enable = cfg.collect.metrics.system; text = '' @@ -108,20 +75,6 @@ in } ''; }; - "alloy/victorialogs-metrics.alloy" = { - enable = cfg.collect.metrics.victorialogs; - text = '' - prometheus.scrape "victorialogs" { - targets = [{ - __address__ = "localhost:${toString config.custom.web-services.victorialogs.port}", - job = "victorialogs", - instance = constants.hostname, - }] - forward_to = [prometheus.remote_write.default.receiver] - scrape_interval = "15s" - } - ''; - }; "alloy/caddy-metrics.alloy" = { enable = cfg.collect.metrics.caddy; text = '' @@ -136,15 +89,6 @@ in } ''; }; - "alloy/sshd-logs.alloy" = { - enable = cfg.collect.logs.openssh; - text = '' - loki.source.journal "sshd" { - matches = "_SYSTEMD_UNIT=sshd.service" - forward_to = [loki.write.default.receiver] - } - ''; - }; }; custom.services.caddy.virtualHosts.${cfg.domain}.port = cfg.port; diff --git a/modules/nixos/services/prometheus.nix b/modules/nixos/services/prometheus.nix new file mode 100644 index 0000000..7e2cc16 --- /dev/null +++ b/modules/nixos/services/prometheus.nix @@ -0,0 +1,100 @@ +{ + config, + pkgs, + lib, + allHosts, + ... +}: +let + cfg = config.custom.services.prometheus; +in +{ + options.custom.services.prometheus = { + enable = lib.mkEnableOption ""; + domain = lib.mkOption { + type = lib.types.nonEmptyStr; + default = ""; + }; + port = lib.mkOption { + type = lib.types.port; + default = 9090; + }; + }; + + config = lib.mkIf cfg.enable { + services.prometheus = { + enable = true; + stateDir = "prometheus"; + + listenAddress = "localhost"; + inherit (cfg) port; + webExternalUrl = "https://${cfg.domain}"; + + globalConfig = { + scrape_interval = "30s"; + external_labels.monitor = "global"; + }; + + alertmanagers = lib.singleton { + scheme = "https"; + static_configs = lib.singleton { + targets = + allHosts + |> lib.attrValues + |> lib.map (host: host.config.custom.services.alertmanager) + |> lib.filter (alertmanager: alertmanager.enable) + |> lib.map (alertmanager: alertmanager.domain); + }; + }; + + scrapeConfigs = [ + { + job_name = "prometheus"; + static_configs = lib.singleton { + targets = + allHosts + |> lib.attrValues + |> lib.map (host: host.config.custom.services.prometheus) + |> lib.filter (prometheus: prometheus.enable) + |> lib.map (prometheus: prometheus.domain); + }; + } + { + job_name = "alertmanager"; + static_configs = lib.singleton { + targets = + allHosts + |> lib.attrValues + |> lib.map (host: host.config.custom.services.alertmanager) + |> lib.filter (alertmanager: alertmanager.enable) + |> lib.map (alertmanager: alertmanager.domain); + }; + } + ]; + + ruleFiles = + { + groups = lib.singleton { + name = "InstanceDown"; + rules = lib.singleton { + alert = "InstanceDown"; + expr = "up == 0"; + for = "2m"; + labels.severity = "critical"; + annotations.summary = "Instance {{ $labels.instance }} down"; + }; + }; + } + |> lib.strings.toJSON + |> pkgs.writeText "prometheus-instance-down-rule" + |> toString + |> lib.singleton; + }; + + custom = { + services.caddy.virtualHosts.${cfg.domain}.port = cfg.port; + + persistence.directories = [ "/var/lib/${config.services.prometheus.stateDir}" ]; + }; + }; +} diff --git a/modules/nixos/web-services/grafana.nix b/modules/nixos/web-services/grafana.nix index 3bf9049..60f4134 100644 --- a/modules/nixos/web-services/grafana.nix +++ b/modules/nixos/web-services/grafana.nix @@ -37,15 +37,6 @@ in default = "https://${config.custom.web-services.victoriametrics.domain}"; }; }; - victorialogs = { - enable = lib.mkEnableOption "" // { - default = config.custom.web-services.victorialogs.enable; - }; - url = lib.mkOption { - type = lib.types.nonEmptyStr; - default = "https://${config.custom.web-services.victorialogs.domain}"; - }; - }; }; dashboards = { nodeExporter.enable = lib.mkEnableOption "" // { @@ -54,9 +45,6 @@ in victoriametrics.enable = lib.mkEnableOption "" // { default = config.custom.web-services.victoriametrics.enable; }; - victorialogs.enable = lib.mkEnableOption "" // { - default = config.custom.web-services.victorialogs.enable; - }; }; }; @@ -110,12 +98,6 @@ in inherit (cfg.datasources.victoriametrics) url; isDefault = false; }) - (lib.mkIf cfg.datasources.victorialogs.enable { - name = "VictoriaLogs"; - type = "victoriametrics-logs-datasource"; - inherit (cfg.datasources.victorialogs) url; - isDefault = false; - }) ]; }; }; @@ -126,7 +108,6 @@ in in [ (lib.optional cfg.datasources.victoriametrics.enable plugins.victoriametrics-metrics-datasource) - (lib.optional cfg.datasources.victorialogs.enable plugins.victoriametrics-logs-datasource) ] |> lib.concatLists; }; @@ -157,22 +138,6 @@ in '' ); }; - # https://grafana.com/grafana/dashboards/22084-victorialogs-single-node/ - "grafana-dashboards/victorialogs-single-node-patched.json" = { - enable = cfg.dashboards.victorialogs.enable; - source = - pkgs.fetchurl { - name = "victorialogs-single-node.json"; - url = "https://grafana.com/api/dashboards/22084/revisions/8/download"; - hash = "sha256-/a3Rbp/6oyiLBnQtGupyFZW+fIHQfkyKRRTyfofxVTM="; - } - |> ( - src: - pkgs.runCommand "victorialogs-single-node-patched.json" { buildInputs = [ pkgs.gnused ]; } '' - sed 's/victoria-logs-//g' ${src} > $out - '' - ); - }; }; custom.services.caddy.virtualHosts.${cfg.domain}.port = cfg.port; diff --git a/modules/nixos/web-services/victorialogs.nix b/modules/nixos/web-services/victorialogs.nix deleted file mode 100644 index 80c9dc9..0000000 --- a/modules/nixos/web-services/victorialogs.nix +++ /dev/null @@ -1,44 +0,0 @@ -{ config, lib, ... }: -let - cfg = config.custom.web-services.victorialogs; -in -{ - options.custom.web-services.victorialogs = { - enable = lib.mkEnableOption ""; - domain = lib.mkOption { - type = lib.types.nonEmptyStr; - default = ""; - }; - port = lib.mkOption { - type = lib.types.port; - default = 9428; - }; - }; - - config = lib.mkIf cfg.enable { - users = { - users.victorialogs = { - isSystemUser = true; - group = config.users.groups.victoriametrics.name; - }; - groups.victorialogs = { }; - }; - - systemd.services.victorialogs.serviceConfig = { - DynamicUser = lib.mkForce false; - User = config.users.users.victorialogs.name; - Group = config.users.groups.victorialogs.name; - }; - - services.victorialogs = { - enable = true; - listenAddress = "localhost:${toString cfg.port}"; - }; - - custom = { - services.caddy.virtualHosts.${cfg.domain}.port = cfg.port; - - persistence.directories = [ "/var/lib/${config.services.victorialogs.stateDir}" ]; - }; - }; -} diff --git a/profiles/server.nix b/profiles/server.nix index 08c9454..0fd337c 100644 --- a/profiles/server.nix +++ b/profiles/server.nix @@ -12,6 +12,14 @@ enable = true; domain = "alloy.${config.custom.networking.overlay.fqdn}"; }; + prometheus = { + enable = true; + domain = "prometheus.${config.custom.networking.overlay.fqdn}"; + }; + alertmanager = { + enable = true; + domain = "alertmanager.${config.custom.networking.overlay.fqdn}"; + }; }; }; }