Merge pull request #309469 from ck3d/localai-2140

local-ai: 2.13.0 -> 2.14.0

authored by Pol Dellaiera and committed by GitHub ba454fe3 58a5e4fc

+48 -19
+17 -9
pkgs/by-name/lo/local-ai/module.nix
··· 28 28 type = types.either types.package types.str; 29 29 default = "models"; 30 30 }; 31 + 32 + parallelRequests = mkOption { 33 + type = types.int; 34 + default = 1; 35 + }; 36 + 37 + logLevel = mkOption { 38 + type = types.enum [ "error" "warn" "info" "debug" "trace" ]; 39 + default = "warn"; 40 + }; 31 41 }; 32 42 33 43 config = lib.mkIf cfg.enable { 34 44 systemd.services.local-ai = { 35 45 wantedBy = [ "multi-user.target" ]; 46 + environment.LLAMACPP_PARALLEL = toString cfg.parallelRequests; 36 47 serviceConfig = { 37 48 DynamicUser = true; 38 49 ExecStart = lib.escapeShellArgs ([ 39 50 "${cfg.package}/bin/local-ai" 40 - "--debug" 41 - "--address" 42 - ":${toString cfg.port}" 43 - "--threads" 44 - (toString cfg.threads) 45 - "--localai-config-dir" 46 - "." 47 - "--models-path" 48 - (toString cfg.models) 51 + "--address=:${toString cfg.port}" 52 + "--threads=${toString cfg.threads}" 53 + "--localai-config-dir=." 54 + "--models-path=${cfg.models}" 55 + "--log-level=${cfg.logLevel}" 49 56 ] 57 + ++ lib.optional (cfg.parallelRequests > 1) "--parallel-requests" 50 58 ++ cfg.extraArgs); 51 59 RuntimeDirectory = "local-ai"; 52 60 WorkingDirectory = "%t/local-ai";
+10 -10
pkgs/by-name/lo/local-ai/package.nix
··· 100 100 src = fetchFromGitHub { 101 101 owner = "ggerganov"; 102 102 repo = "llama.cpp"; 103 - rev = "784e11dea1f5ce9638851b2b0dddb107e2a609c8"; 104 - hash = "sha256-yAQAUo5J+a6O2kTqhFL1UH0tANxpQn3JhAd3MByaC6I="; 103 + rev = "6ecf3189e00a1e8e737a78b6d10e1d7006e050a2"; 104 + hash = "sha256-JS287UdCzj6Es134cbhr8y/AoejMEux0w++/pZ5NejY="; 105 105 fetchSubmodules = true; 106 106 }; 107 107 postPatch = prev.postPatch + '' ··· 254 254 src = fetchFromGitHub { 255 255 owner = "ggerganov"; 256 256 repo = "whisper.cpp"; 257 - rev = "858452d58dba3acdc3431c9bced2bb8cfd9bf418"; 258 - hash = "sha256-2fT3RgGpBex1mF6GJsVDo4rb0F31YqxTymsXcrpQAZk="; 257 + rev = "8fac6455ffeb0a0950a84e790ddb74f7290d33c4"; 258 + hash = "sha256-Dez/Q2vMvSmscS+BJwkgZ4QG+ebM/N8s1Okd5my0CWI="; 259 259 }; 260 260 261 261 nativeBuildInputs = [ cmake pkg-config ] ··· 305 305 src = fetchFromGitHub { 306 306 owner = "mudler"; 307 307 repo = "go-stable-diffusion"; 308 - rev = "362df9da29f882dbf09ade61972d16a1f53c3485"; 309 - hash = "sha256-A5KvMZOviPsIpPHxM8cacT+qE2x1iFJAbPsRs4sLijY="; 308 + rev = "4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f"; 309 + hash = "sha256-KXUvMP6cDyWib4rG0RmVRm3pgrdsfKXaH3k0v5/mTe8="; 310 310 fetchSubmodules = true; 311 311 }; 312 312 buildFlags = [ "libstablediffusion.a" ]; ··· 342 342 src = fetchFromGitHub { 343 343 owner = "M0Rf30"; 344 344 repo = "go-tiny-dream"; 345 - rev = "22a12a4bc0ac5455856f28f3b771331a551a4293"; 346 - hash = "sha256-DAVHD6E0OKHf4C2ldoI0Mm7813DIrmWFONUhSCQPCfc="; 345 + rev = "c04fa463ace9d9a6464313aa5f9cd0f953b6c057"; 346 + hash = "sha256-uow3vbAI4F/fTGjYOKOLqTpKq7NgGYSZhGlEhn7h6s0="; 347 347 fetchSubmodules = true; 348 348 }; 349 349 postUnpack = '' ··· 373 373 stdenv; 374 374 375 375 pname = "local-ai"; 376 - version = "2.13.0"; 376 + version = "2.14.0"; 377 377 src = fetchFromGitHub { 378 378 owner = "go-skynet"; 379 379 repo = "LocalAI"; 380 380 rev = "v${version}"; 381 - hash = "sha256-jZE8Ow9FFhnx/jvsURLYlYtSuKpE4UWBezxg/mpHs9g="; 381 + hash = "sha256-wr7sTMjGofGiZZbRJ+RfgXx9TM9Adu2NBAXeB3P5Ep0="; 382 382 }; 383 383 384 384 self = buildGoModule.override { stdenv = effectiveStdenv; } {
+21
pkgs/by-name/lo/local-ai/tests.nix
··· 6 6 , writers 7 7 , symlinkJoin 8 8 , jq 9 + , prom2json 9 10 }: 10 11 let 11 12 common-config = { config, ... }: { ··· 14 15 enable = true; 15 16 package = self; 16 17 threads = config.virtualisation.cores; 18 + logLevel = "debug"; 17 19 }; 18 20 }; 19 21 ··· 36 38 '' 37 39 machine.wait_for_open_port(${port}) 38 40 machine.succeed("curl -f http://localhost:${port}/readyz") 41 + 42 + machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json") 43 + 44 + machine.copy_from_vm("metrics.json") 39 45 ''; 40 46 }); 41 47 ··· 80 86 machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json") 81 87 machine.succeed("curl -f http://localhost:${port}/embeddings --json @${writers.writeJSON "request.json" requests.request} --output embeddings.json") 82 88 machine.succeed("${jq}/bin/jq --exit-status 'debug | .model == \"${model}\"' embeddings.json") 89 + 90 + machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json") 91 + 92 + machine.copy_from_vm("metrics.json") 83 93 ''; 84 94 }; 85 95 ··· 92 102 # https://localai.io/advanced/#full-config-model-file-reference 93 103 model-configs.${model} = rec { 94 104 context_size = 8192; 105 + backend = "llama-cpp"; 95 106 parameters = { 96 107 # https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF 97 108 # https://ai.meta.com/blog/meta-llama-3/ ··· 157 168 virtualisation.cores = 4; 158 169 virtualisation.memorySize = 8192; 159 170 services.local-ai.models = models; 171 + # TODO: Add test case parallel requests 172 + services.local-ai.parallelRequests = 2; 160 173 }; 161 174 passthru = { inherit models requests; }; 162 175 testScript = ··· 180 193 machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json") 181 194 machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json") 182 195 machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json") 196 + 197 + machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json") 198 + 199 + machine.copy_from_vm("metrics.json") 183 200 ''; 184 201 }; 185 202 ··· 243 260 machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav") 244 261 machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json") 245 262 machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json") 263 + 264 + machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json") 265 + 266 + machine.copy_from_vm("metrics.json") 246 267 ''; 247 268 }; 248 269 }