1{
2 lib,
3 fetchFromGitHub,
4 stdenv,
5 makeWrapper,
6 python3Packages,
7 tk,
8 addDriverRunpath,
9
10 apple-sdk_12,
11
12 koboldLiteSupport ? true,
13
14 config,
15 cudaPackages ? { },
16
17 cublasSupport ? config.cudaSupport,
18 # You can find a full list here: https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
19 # For example if you're on an RTX 3060 that means you're using "Ampere" and you need to pass "sm_86"
20 cudaArches ? cudaPackages.flags.realArches or [ ],
21
22 clblastSupport ? stdenv.hostPlatform.isLinux,
23 clblast,
24 ocl-icd,
25
26 vulkanSupport ? true,
27 vulkan-loader,
28 shaderc,
29 metalSupport ? stdenv.hostPlatform.isDarwin,
30 nix-update-script,
31}:
32
33let
34 makeBool = option: bool: (if bool then "${option}=1" else "");
35
36 libraryPathWrapperArgs = lib.optionalString config.cudaSupport ''
37 --prefix LD_LIBRARY_PATH : "${lib.makeLibraryPath [ addDriverRunpath.driverLink ]}"
38 '';
39
40 effectiveStdenv = if cublasSupport then cudaPackages.backendStdenv else stdenv;
41in
42effectiveStdenv.mkDerivation (finalAttrs: {
43 pname = "koboldcpp";
44 version = "1.98.1";
45
46 src = fetchFromGitHub {
47 owner = "LostRuins";
48 repo = "koboldcpp";
49 tag = "v${finalAttrs.version}";
50 hash = "sha256-CJM97DRSIq2d3X6aR096+9QwBeI4kQNzxufdSoEydco=";
51 };
52
53 enableParallelBuilding = true;
54
55 nativeBuildInputs = [
56 makeWrapper
57 python3Packages.wrapPython
58 ];
59
60 pythonInputs = builtins.attrValues { inherit (python3Packages) tkinter customtkinter packaging; };
61
62 buildInputs = [
63 tk
64 ]
65 ++ finalAttrs.pythonInputs
66 ++ lib.optionals stdenv.hostPlatform.isDarwin [ apple-sdk_12 ]
67 ++ lib.optionals cublasSupport [
68 cudaPackages.libcublas
69 cudaPackages.cuda_nvcc
70 cudaPackages.cuda_cudart
71 cudaPackages.cuda_cccl
72 ]
73 ++ lib.optionals clblastSupport [
74 clblast
75 ocl-icd
76 ]
77 ++ lib.optionals vulkanSupport [
78 vulkan-loader
79 shaderc
80 ];
81
82 pythonPath = finalAttrs.pythonInputs;
83
84 makeFlags = [
85 (makeBool "LLAMA_CUBLAS" cublasSupport)
86 (makeBool "LLAMA_CLBLAST" clblastSupport)
87 (makeBool "LLAMA_VULKAN" vulkanSupport)
88 (makeBool "LLAMA_METAL" metalSupport)
89 (lib.optionals cublasSupport "CUDA_DOCKER_ARCH=${builtins.head cudaArches}")
90 ];
91
92 installPhase = ''
93 runHook preInstall
94
95 mkdir -p "$out/bin"
96
97 install -Dm755 koboldcpp.py "$out/bin/koboldcpp.unwrapped"
98 cp *.so "$out/bin"
99 cp *.embd "$out/bin"
100
101 ${lib.optionalString metalSupport ''
102 cp *.metal "$out/bin"
103 ''}
104
105 ${lib.optionalString (!koboldLiteSupport) ''
106 rm "$out/bin/kcpp_docs.embd"
107 rm "$out/bin/klite.embd"
108 ''}
109
110 runHook postInstall
111 '';
112
113 postFixup = ''
114 wrapPythonProgramsIn "$out/bin" "$pythonPath"
115 makeWrapper "$out/bin/koboldcpp.unwrapped" "$out/bin/koboldcpp" \
116 --prefix PATH : ${lib.makeBinPath [ tk ]} ${libraryPathWrapperArgs}
117 '';
118
119 passthru.updateScript = nix-update-script { };
120
121 meta = {
122 changelog = "https://github.com/LostRuins/koboldcpp/releases/tag/v${finalAttrs.version}";
123 description = "Way to run various GGML and GGUF models";
124 homepage = "https://github.com/LostRuins/koboldcpp";
125 license = lib.licenses.agpl3Only;
126 mainProgram = "koboldcpp";
127 maintainers = with lib.maintainers; [
128 maxstrid
129 FlameFlag
130 ];
131 platforms = lib.platforms.unix;
132 };
133})