{ config, lib, pkgs, utils, ... }: let cfg = config.services.llama-cpp; modelsPresetFile = if cfg.modelsPreset != null then pkgs.writeText "llama-models.ini" (lib.generators.toINI { } cfg.modelsPreset) else null; in { options = { services.llama-cpp = { enable = lib.mkEnableOption "LLaMA C++ server"; package = lib.mkPackageOption pkgs "llama-cpp" { }; model = lib.mkOption { type = lib.types.nullOr lib.types.path; example = "/models/mistral-instruct-7b/ggml-model-q4_0.gguf"; description = "Model path."; default = null; }; modelsDir = lib.mkOption { type = lib.types.nullOr lib.types.path; example = "/models/"; description = "Models directory."; default = null; }; modelsPreset = lib.mkOption { type = lib.types.nullOr (lib.types.attrsOf lib.types.attrs); default = null; description = '' Models preset configuration as a Nix attribute set. This is converted to an INI file and passed to llama-server via --model-preset. See llama-server documentation for available options. ''; example = lib.literalExpression '' { "Qwen3-Coder-Next" = { hf-repo = "unsloth/Qwen3-Coder-Next-GGUF"; hf-file = "Qwen3-Coder-Next-UD-Q4_K_XL.gguf"; alias = "unsloth/Qwen3-Coder-Next"; fit = "on"; seed = "3407"; temp = "1.0"; top-p = "0.95"; min-p = "0.01"; top-k = "40"; jinja = "on"; }; } ''; }; extraFlags = lib.mkOption { type = lib.types.listOf lib.types.str; description = "Extra flags passed to llama-cpp-server."; example = [ "-c" "4096" "-ngl" "32" "--numa" "numactl" ]; default = [ ]; }; host = lib.mkOption { type = lib.types.str; default = "127.0.0.1"; example = "0.0.0.0"; description = "IP address the LLaMA C++ server listens on."; }; port = lib.mkOption { type = lib.types.port; default = 8080; description = "Listen port for LLaMA C++ server."; }; openFirewall = lib.mkOption { type = lib.types.bool; default = false; description = "Open ports in the firewall for LLaMA C++ server."; }; }; }; config = lib.mkIf cfg.enable { systemd.services.llama-cpp = { description = "LLaMA C++ server"; after = [ "network.target" ]; wantedBy = [ "multi-user.target" ]; serviceConfig = { Type = "idle"; KillSignal = "SIGINT"; StateDirectory = "llama-cpp"; CacheDirectory = "llama-cpp"; WorkingDirectory = "/var/lib/llama-cpp"; Environment = [ "LLAMA_CACHE=/var/cache/llama-cpp" ]; ExecStart = let args = [ "--host" cfg.host "--port" (toString cfg.port) ] ++ lib.optionals (cfg.model != null) [ "-m" cfg.model ] ++ lib.optionals (cfg.modelsDir != null) [ "--models-dir" cfg.modelsDir ] ++ lib.optionals (cfg.modelsPreset != null) [ "--models-preset" modelsPresetFile ] ++ cfg.extraFlags; in "${cfg.package}/bin/llama-server ${utils.escapeSystemdExecArgs args}"; Restart = "on-failure"; RestartSec = 300; # for GPU acceleration PrivateDevices = false; # hardening DynamicUser = true; CapabilityBoundingSet = ""; RestrictAddressFamilies = [ "AF_INET" "AF_INET6" "AF_UNIX" ]; NoNewPrivileges = true; PrivateMounts = true; PrivateTmp = true; PrivateUsers = true; ProtectClock = true; ProtectControlGroups = true; ProtectHome = true; ProtectKernelLogs = true; ProtectKernelModules = true; ProtectKernelTunables = true; ProtectSystem = "strict"; MemoryDenyWriteExecute = true; LockPersonality = true; RemoveIPC = true; RestrictNamespaces = true; RestrictRealtime = true; RestrictSUIDSGID = true; SystemCallArchitectures = "native"; SystemCallFilter = [ "@system-service" "~@privileged" ]; SystemCallErrorNumber = "EPERM"; ProtectProc = "invisible"; ProtectHostname = true; ProcSubset = "pid"; }; }; networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; }; }; meta.maintainers = with lib.maintainers; [ newam ]; }