From 89b4cb7299c6e12b83e2bf807e85eb93a9e27984 Mon Sep 17 00:00:00 2001 From: Ryan Yin Date: Tue, 29 Oct 2024 10:45:10 +0800 Subject: [PATCH] nixos/victoriametrics: harden systemd unit, add more options. --- .../services/databases/victoriametrics.nix | 187 +++++++++++++++--- nixos/tests/victoriametrics.nix | 62 +++--- 2 files changed, 190 insertions(+), 59 deletions(-) diff --git a/nixos/modules/services/databases/victoriametrics.nix b/nixos/modules/services/databases/victoriametrics.nix index 923163a8049e6..b26ad47c9a136 100644 --- a/nixos/modules/services/databases/victoriametrics.nix +++ b/nixos/modules/services/databases/victoriametrics.nix @@ -1,65 +1,188 @@ -{ config, pkgs, lib, ... }: -let cfg = config.services.victoriametrics; in { - options.services.victoriametrics = with lib; { - enable = mkEnableOption "VictoriaMetrics, a time series database, long-term remote storage for Prometheus"; + config, + pkgs, + lib, + ... +}: +with lib; +let + cfg = config.services.victoriametrics; + settingsFormat = pkgs.formats.yaml { }; + + startCLIList = + [ + "${cfg.package}/bin/victoria-metrics" + "-storageDataPath=/var/lib/${cfg.stateDir}" + "-httpListenAddr=${cfg.listenAddress}" + + ] + ++ lib.optionals (cfg.retentionPeriod != null) [ "-retentionPeriod=${cfg.retentionPeriod}" ] + ++ cfg.extraOptions; + prometheusConfigYml = checkedConfig ( + settingsFormat.generate "prometheusConfig.yaml" cfg.prometheusConfig + ); + + checkedConfig = + file: + pkgs.runCommand "checked-config" { nativeBuildInputs = [ cfg.package ]; } '' + ln -s ${file} $out + ${lib.escapeShellArgs startCLIList} -promscrape.config=${file} -dryRun + ''; +in +{ + options.services.victoriametrics = { + enable = mkEnableOption "VictoriaMetrics is a fast, cost-effective and scalable monitoring solution and time series database."; package = mkPackageOption pkgs "victoriametrics" { }; + listenAddress = mkOption { default = ":8428"; type = types.str; description = '' - The listen address for the http interface. + TCP address to listen for incoming http requests. ''; }; + + stateDir = mkOption { + type = types.str; + default = "victoriametrics"; + description = '' + Directory below `/var/lib` to store VictoriaMetrics metrics data. + This directory will be created automatically using systemd's StateDirectory mechanism. + ''; + }; + retentionPeriod = mkOption { - type = types.int; - default = 1; + type = types.nullOr types.str; + default = null; + example = "15d"; + description = '' + How long to retain samples in storage. + The minimum retentionPeriod is 24h or 1d. See also -retentionFilter + The following optional suffixes are supported: s (second), h (hour), d (day), w (week), y (year). + If suffix isn't set, then the duration is counted in months (default 1) + ''; + }; + + prometheusConfig = lib.mkOption { + type = lib.types.submodule { freeformType = settingsFormat.type; }; + default = { }; + example = literalExpression '' + { + scrape_configs = [ + { + job_name = "postgres-exporter"; + metrics_path = "/metrics"; + static_configs = [ + { + targets = ["1.2.3.4:9187"]; + labels.type = "database"; + } + ]; + } + { + job_name = "node-exporter"; + metrics_path = "/metrics"; + static_configs = [ + { + targets = ["1.2.3.4:9100"]; + labels.type = "node"; + } + { + targets = ["5.6.7.8:9100"]; + labels.type = "node"; + } + ]; + } + ]; + } + ''; description = '' - Retention period in months. + Config for prometheus style metrics. + See the docs: + for more information. ''; }; + extraOptions = mkOption { type = types.listOf types.str; - default = []; + default = [ ]; + example = literalExpression '' + [ + "-httpAuth.username=username" + "-httpAuth.password=file:///abs/path/to/file" + "-loggerLevel=WARN" + ] + ''; description = '' - Extra options to pass to VictoriaMetrics. See the README: - - or {command}`victoriametrics -help` for more - information. + Extra options to pass to VictoriaMetrics. See the docs: + + or {command}`victoriametrics -help` for more information. ''; }; }; + config = lib.mkIf cfg.enable { systemd.services.victoriametrics = { description = "VictoriaMetrics time series database"; + wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; startLimitBurst = 5; + serviceConfig = { - Restart = "on-failure"; - RestartSec = 1; - StateDirectory = "victoriametrics"; + ExecStart = lib.escapeShellArgs ( + startCLIList + ++ lib.optionals (cfg.prometheusConfig != null) [ "-promscrape.config=${prometheusConfigYml}" ] + ); + DynamicUser = true; - ExecStart = '' - ${cfg.package}/bin/victoria-metrics \ - -storageDataPath=/var/lib/victoriametrics \ - -httpListenAddr ${cfg.listenAddress} \ - -retentionPeriod ${toString cfg.retentionPeriod} \ - ${lib.escapeShellArgs cfg.extraOptions} - ''; - # victoriametrics 1.59 with ~7GB of data seems to eventually panic when merging files and then - # begins restart-looping forever. Set LimitNOFILE= to a large number to work around this issue. - # - # panic: FATAL: unrecoverable error when merging small parts in the partition "/var/lib/victoriametrics/data/small/2021_08": - # cannot open source part for merging: cannot open values file in stream mode: - # cannot open file "/var/lib/victoriametrics/data/small/2021_08/[...]/values.bin": - # open /var/lib/victoriametrics/data/small/2021_08/[...]/values.bin: too many open files + RestartSec = 1; + Restart = "on-failure"; + RuntimeDirectory = "victoriametrics"; + RuntimeDirectoryMode = "0700"; + StateDirectory = cfg.stateDir; + StateDirectoryMode = "0700"; + + # Increase the limit to avoid errors like 'too many open files' when merging small parts LimitNOFILE = 1048576; + + # Hardening + DeviceAllow = [ "/dev/null rw" ]; + DevicePolicy = "strict"; + LockPersonality = true; + MemoryDenyWriteExecute = true; + NoNewPrivileges = true; + PrivateDevices = true; + PrivateTmp = true; + PrivateUsers = true; + ProtectClock = true; + ProtectControlGroups = true; + ProtectHome = true; + ProtectHostname = true; + ProtectKernelLogs = true; + ProtectKernelModules = true; + ProtectKernelTunables = true; + ProtectProc = "invisible"; + ProtectSystem = "full"; + RemoveIPC = true; + RestrictAddressFamilies = [ + "AF_INET" + "AF_INET6" + "AF_UNIX" + ]; + RestrictNamespaces = true; + RestrictRealtime = true; + RestrictSUIDSGID = true; + SystemCallArchitectures = "native"; + SystemCallFilter = [ + "@system-service" + "~@privileged" + ]; }; - wantedBy = [ "multi-user.target" ]; postStart = let - bindAddr = (lib.optionalString (lib.hasPrefix ":" cfg.listenAddress) "127.0.0.1") + cfg.listenAddress; + bindAddr = + (lib.optionalString (lib.hasPrefix ":" cfg.listenAddress) "127.0.0.1") + cfg.listenAddress; in lib.mkBefore '' until ${lib.getBin pkgs.curl}/bin/curl -s -o /dev/null http://${bindAddr}/ping; do diff --git a/nixos/tests/victoriametrics.nix b/nixos/tests/victoriametrics.nix index 5e364b67bf87e..e45d0a30f3a6f 100644 --- a/nixos/tests/victoriametrics.nix +++ b/nixos/tests/victoriametrics.nix @@ -1,33 +1,41 @@ -# This test runs influxdb and checks if influxdb is up and running +# This test runs victoriametrics and checks if victoriametrics is able to write points and run simple query -import ./make-test-python.nix ({ pkgs, ...} : { - name = "victoriametrics"; - meta = with pkgs.lib.maintainers; { - maintainers = [ yorickvp ]; - }; +import ./make-test-python.nix ( + { pkgs, ... }: + { + name = "victoriametrics"; + meta = with pkgs.lib.maintainers; { + maintainers = [ + yorickvp + ryan4yin + ]; + }; - nodes = { - one = { ... }: { - services.victoriametrics.enable = true; + nodes = { + one = + { ... }: + { + services.victoriametrics.enable = true; + }; }; - }; - testScript = '' - start_all() + testScript = '' + start_all() - one.wait_for_unit("victoriametrics.service") + one.wait_for_unit("victoriametrics.service") - # write some points and run simple query - out = one.succeed( - "curl -f -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/write'" - ) - cmd = ( - """curl -f -s -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'""" - ) - # data takes a while to appear - one.wait_until_succeeds(f"[[ $({cmd} | wc -l) -ne 0 ]]") - out = one.succeed(cmd) - assert '"values":[123]' in out - assert '"values":[1.23]' in out - ''; -}) + # write some points and run simple query + out = one.succeed( + "curl -f -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/write'" + ) + cmd = ( + """curl -f -s -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'""" + ) + # data takes a while to appear + one.wait_until_succeeds(f"[[ $({cmd} | wc -l) -ne 0 ]]") + out = one.succeed(cmd) + assert '"values":[123]' in out + assert '"values":[1.23]' in out + ''; + } +)