at master 133 lines 3.3 kB view raw
1{ 2 lib, 3 fetchFromGitHub, 4 stdenv, 5 makeWrapper, 6 python3Packages, 7 tk, 8 addDriverRunpath, 9 10 apple-sdk_12, 11 12 koboldLiteSupport ? true, 13 14 config, 15 cudaPackages ? { }, 16 17 cublasSupport ? config.cudaSupport, 18 # You can find a full list here: https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ 19 # For example if you're on an RTX 3060 that means you're using "Ampere" and you need to pass "sm_86" 20 cudaArches ? cudaPackages.flags.realArches or [ ], 21 22 clblastSupport ? stdenv.hostPlatform.isLinux, 23 clblast, 24 ocl-icd, 25 26 vulkanSupport ? true, 27 vulkan-loader, 28 shaderc, 29 metalSupport ? stdenv.hostPlatform.isDarwin, 30 nix-update-script, 31}: 32 33let 34 makeBool = option: bool: (if bool then "${option}=1" else ""); 35 36 libraryPathWrapperArgs = lib.optionalString config.cudaSupport '' 37 --prefix LD_LIBRARY_PATH : "${lib.makeLibraryPath [ addDriverRunpath.driverLink ]}" 38 ''; 39 40 effectiveStdenv = if cublasSupport then cudaPackages.backendStdenv else stdenv; 41in 42effectiveStdenv.mkDerivation (finalAttrs: { 43 pname = "koboldcpp"; 44 version = "1.98.1"; 45 46 src = fetchFromGitHub { 47 owner = "LostRuins"; 48 repo = "koboldcpp"; 49 tag = "v${finalAttrs.version}"; 50 hash = "sha256-CJM97DRSIq2d3X6aR096+9QwBeI4kQNzxufdSoEydco="; 51 }; 52 53 enableParallelBuilding = true; 54 55 nativeBuildInputs = [ 56 makeWrapper 57 python3Packages.wrapPython 58 ]; 59 60 pythonInputs = builtins.attrValues { inherit (python3Packages) tkinter customtkinter packaging; }; 61 62 buildInputs = [ 63 tk 64 ] 65 ++ finalAttrs.pythonInputs 66 ++ lib.optionals stdenv.hostPlatform.isDarwin [ apple-sdk_12 ] 67 ++ lib.optionals cublasSupport [ 68 cudaPackages.libcublas 69 cudaPackages.cuda_nvcc 70 cudaPackages.cuda_cudart 71 cudaPackages.cuda_cccl 72 ] 73 ++ lib.optionals clblastSupport [ 74 clblast 75 ocl-icd 76 ] 77 ++ lib.optionals vulkanSupport [ 78 vulkan-loader 79 shaderc 80 ]; 81 82 pythonPath = finalAttrs.pythonInputs; 83 84 makeFlags = [ 85 (makeBool "LLAMA_CUBLAS" cublasSupport) 86 (makeBool "LLAMA_CLBLAST" clblastSupport) 87 (makeBool "LLAMA_VULKAN" vulkanSupport) 88 (makeBool "LLAMA_METAL" metalSupport) 89 (lib.optionals cublasSupport "CUDA_DOCKER_ARCH=${builtins.head cudaArches}") 90 ]; 91 92 installPhase = '' 93 runHook preInstall 94 95 mkdir -p "$out/bin" 96 97 install -Dm755 koboldcpp.py "$out/bin/koboldcpp.unwrapped" 98 cp *.so "$out/bin" 99 cp *.embd "$out/bin" 100 101 ${lib.optionalString metalSupport '' 102 cp *.metal "$out/bin" 103 ''} 104 105 ${lib.optionalString (!koboldLiteSupport) '' 106 rm "$out/bin/kcpp_docs.embd" 107 rm "$out/bin/klite.embd" 108 ''} 109 110 runHook postInstall 111 ''; 112 113 postFixup = '' 114 wrapPythonProgramsIn "$out/bin" "$pythonPath" 115 makeWrapper "$out/bin/koboldcpp.unwrapped" "$out/bin/koboldcpp" \ 116 --prefix PATH : ${lib.makeBinPath [ tk ]} ${libraryPathWrapperArgs} 117 ''; 118 119 passthru.updateScript = nix-update-script { }; 120 121 meta = { 122 changelog = "https://github.com/LostRuins/koboldcpp/releases/tag/v${finalAttrs.version}"; 123 description = "Way to run various GGML and GGUF models"; 124 homepage = "https://github.com/LostRuins/koboldcpp"; 125 license = lib.licenses.agpl3Only; 126 mainProgram = "koboldcpp"; 127 maintainers = with lib.maintainers; [ 128 maxstrid 129 FlameFlag 130 ]; 131 platforms = lib.platforms.unix; 132 }; 133})