Merge pull request #278536 from newAM/llama-cpp-server

nixos/llama-cpp: init

authored by Lin Jian and committed by GitHub 3179a4e2 c7482616

+111
+111
nixos/modules/services/misc/llama-cpp.nix
··· 1 + { config, lib, pkgs, utils, ... }: 2 + 3 + let 4 + cfg = config.services.llama-cpp; 5 + in { 6 + 7 + options = { 8 + 9 + services.llama-cpp = { 10 + enable = lib.mkEnableOption "LLaMA C++ server"; 11 + 12 + package = lib.mkPackageOption pkgs "llama-cpp" { }; 13 + 14 + model = lib.mkOption { 15 + type = lib.types.path; 16 + example = "/models/mistral-instruct-7b/ggml-model-q4_0.gguf"; 17 + description = "Model path."; 18 + }; 19 + 20 + extraFlags = lib.mkOption { 21 + type = lib.types.listOf lib.types.str; 22 + description = "Extra flags passed to llama-cpp-server."; 23 + example = ["-c" "4096" "-ngl" "32" "--numa"]; 24 + default = []; 25 + }; 26 + 27 + host = lib.mkOption { 28 + type = lib.types.str; 29 + default = "127.0.0.1"; 30 + example = "0.0.0.0"; 31 + description = "IP address the LLaMA C++ server listens on."; 32 + }; 33 + 34 + port = lib.mkOption { 35 + type = lib.types.port; 36 + default = 8080; 37 + description = "Listen port for LLaMA C++ server."; 38 + }; 39 + 40 + openFirewall = lib.mkOption { 41 + type = lib.types.bool; 42 + default = false; 43 + description = "Open ports in the firewall for LLaMA C++ server."; 44 + }; 45 + }; 46 + 47 + }; 48 + 49 + config = lib.mkIf cfg.enable { 50 + 51 + systemd.services.llama-cpp = { 52 + description = "LLaMA C++ server"; 53 + after = ["network.target"]; 54 + wantedBy = ["multi-user.target"]; 55 + 56 + serviceConfig = { 57 + Type = "idle"; 58 + KillSignal = "SIGINT"; 59 + ExecStart = "${cfg.package}/bin/llama-cpp-server --log-disable --host ${cfg.host} --port ${builtins.toString cfg.port} -m ${cfg.model} ${utils.escapeSystemdExecArgs cfg.extraFlags}"; 60 + Restart = "on-failure"; 61 + RestartSec = 300; 62 + 63 + # for GPU acceleration 64 + PrivateDevices = false; 65 + 66 + # hardening 67 + DynamicUser = true; 68 + CapabilityBoundingSet = ""; 69 + RestrictAddressFamilies = [ 70 + "AF_INET" 71 + "AF_INET6" 72 + "AF_UNIX" 73 + ]; 74 + NoNewPrivileges = true; 75 + PrivateMounts = true; 76 + PrivateTmp = true; 77 + PrivateUsers = true; 78 + ProtectClock = true; 79 + ProtectControlGroups = true; 80 + ProtectHome = true; 81 + ProtectKernelLogs = true; 82 + ProtectKernelModules = true; 83 + ProtectKernelTunables = true; 84 + ProtectSystem = "strict"; 85 + MemoryDenyWriteExecute = true; 86 + LockPersonality = true; 87 + RemoveIPC = true; 88 + RestrictNamespaces = true; 89 + RestrictRealtime = true; 90 + RestrictSUIDSGID = true; 91 + SystemCallArchitectures = "native"; 92 + SystemCallFilter = [ 93 + "@system-service" 94 + "~@privileged" 95 + "~@resources" 96 + ]; 97 + SystemCallErrorNumber = "EPERM"; 98 + ProtectProc = "invisible"; 99 + ProtectHostname = true; 100 + ProcSubset = "pid"; 101 + }; 102 + }; 103 + 104 + networking.firewall = lib.mkIf cfg.openFirewall { 105 + allowedTCPPorts = [ cfg.port ]; 106 + }; 107 + 108 + }; 109 + 110 + meta.maintainers = with lib.maintainers; [ newam ]; 111 + }