1{
2 lib,
3 autoAddDriverRunpath,
4 cmake,
5 fetchFromGitHub,
6 nix-update-script,
7 stdenv,
8
9 config,
10 cudaSupport ? config.cudaSupport,
11 cudaPackages ? { },
12
13 rocmSupport ? config.rocmSupport,
14 rocmPackages ? { },
15 rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
16
17 openclSupport ? false,
18 clblast,
19
20 blasSupport ? builtins.all (x: !x) [
21 cudaSupport
22 metalSupport
23 openclSupport
24 rocmSupport
25 vulkanSupport
26 ],
27 blas,
28
29 pkg-config,
30 metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && !openclSupport,
31 vulkanSupport ? false,
32 rpcSupport ? false,
33 apple-sdk_14,
34 curl,
35 llama-cpp,
36 shaderc,
37 vulkan-headers,
38 vulkan-loader,
39 ninja,
40 git,
41}:
42
43let
44 # It's necessary to consistently use backendStdenv when building with CUDA support,
45 # otherwise we get libstdc++ errors downstream.
46 # cuda imposes an upper bound on the gcc version
47 effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
48 inherit (lib)
49 cmakeBool
50 cmakeFeature
51 optionals
52 optionalString
53 ;
54
55 cudaBuildInputs = with cudaPackages; [
56 cuda_cccl # <nv/target>
57
58 # A temporary hack for reducing the closure size, remove once cudaPackages
59 # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
60 cuda_cudart
61 libcublas
62 ];
63
64 rocmBuildInputs = with rocmPackages; [
65 clr
66 hipblas
67 rocblas
68 ];
69
70 vulkanBuildInputs = [
71 shaderc
72 vulkan-headers
73 vulkan-loader
74 ];
75in
76effectiveStdenv.mkDerivation (finalAttrs: {
77 pname = "llama-cpp";
78 version = "6479";
79
80 src = fetchFromGitHub {
81 owner = "ggml-org";
82 repo = "llama.cpp";
83 tag = "b${finalAttrs.version}";
84 hash = "sha256-wgfYjG9m/ainCI85FlCb12Dz01R+pZfFeDX613M4xpQ=";
85 leaveDotGit = true;
86 postFetch = ''
87 git -C "$out" rev-parse --short HEAD > $out/COMMIT
88 find "$out" -name .git -print0 | xargs -0 rm -rf
89 '';
90 };
91
92 patches = lib.optionals vulkanSupport [ ./disable_bfloat16.patch ];
93
94 postPatch = ''
95 # Workaround for local-ai package which overrides this package to an older llama-cpp
96 if [ -f ./ggml/src/ggml-metal.m ]; then
97 substituteInPlace ./ggml/src/ggml-metal.m \
98 --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
99 fi
100
101 if [ -f ./ggml/src/ggml-metal/ggml-metal.m ]; then
102 substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
103 --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
104 fi
105 '';
106
107 nativeBuildInputs = [
108 cmake
109 ninja
110 pkg-config
111 git
112 ]
113 ++ optionals cudaSupport [
114 cudaPackages.cuda_nvcc
115 autoAddDriverRunpath
116 ];
117
118 buildInputs =
119 optionals cudaSupport cudaBuildInputs
120 ++ optionals openclSupport [ clblast ]
121 ++ optionals rocmSupport rocmBuildInputs
122 ++ optionals blasSupport [ blas ]
123 ++ optionals vulkanSupport vulkanBuildInputs
124 ++ optionals metalSupport [ apple-sdk_14 ]
125 ++ [ curl ];
126
127 preConfigure = ''
128 prependToVar cmakeFlags "-DLLAMA_BUILD_COMMIT:STRING=$(cat COMMIT)"
129 '';
130
131 cmakeFlags = [
132 # -march=native is non-deterministic; override with platform-specific flags if needed
133 (cmakeBool "GGML_NATIVE" false)
134 (cmakeBool "LLAMA_BUILD_EXAMPLES" false)
135 (cmakeBool "LLAMA_BUILD_SERVER" true)
136 (cmakeBool "LLAMA_BUILD_TESTS" (finalAttrs.finalPackage.doCheck or false))
137 (cmakeBool "LLAMA_CURL" true)
138 (cmakeBool "BUILD_SHARED_LIBS" true)
139 (cmakeBool "GGML_BLAS" blasSupport)
140 (cmakeBool "GGML_CLBLAST" openclSupport)
141 (cmakeBool "GGML_CUDA" cudaSupport)
142 (cmakeBool "GGML_HIP" rocmSupport)
143 (cmakeBool "GGML_METAL" metalSupport)
144 (cmakeBool "GGML_RPC" rpcSupport)
145 (cmakeBool "GGML_VULKAN" vulkanSupport)
146 (cmakeFeature "LLAMA_BUILD_NUMBER" finalAttrs.version)
147 ]
148 ++ optionals cudaSupport [
149 (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
150 ]
151 ++ optionals rocmSupport [
152 (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++")
153 (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
154 ]
155 ++ optionals metalSupport [
156 (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
157 (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true)
158 ]
159 ++ optionals rpcSupport [
160 # This is done so we can move rpc-server out of bin because llama.cpp doesn't
161 # install rpc-server in their install target.
162 (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
163 ];
164
165 # upstream plans on adding targets at the cmakelevel, remove those
166 # additional steps after that
167 postInstall = ''
168 # Match previous binary name for this package
169 ln -sf $out/bin/llama-cli $out/bin/llama
170
171 mkdir -p $out/include
172 cp $src/include/llama.h $out/include/
173 ''
174 + optionalString rpcSupport "cp bin/rpc-server $out/bin/llama-rpc-server";
175
176 # the tests are failing as of 2025-08
177 doCheck = false;
178
179 passthru = {
180 tests = lib.optionalAttrs stdenv.hostPlatform.isDarwin {
181 metal = llama-cpp.override { metalSupport = true; };
182 };
183 updateScript = nix-update-script {
184 attrPath = "llama-cpp";
185 extraArgs = [
186 "--version-regex"
187 "b(.*)"
188 ];
189 };
190 };
191
192 meta = {
193 description = "Inference of Meta's LLaMA model (and others) in pure C/C++";
194 homepage = "https://github.com/ggml-org/llama.cpp";
195 license = lib.licenses.mit;
196 mainProgram = "llama";
197 maintainers = with lib.maintainers; [
198 booxter
199 dit7ya
200 philiptaron
201 xddxdd
202 ];
203 platforms = lib.platforms.unix;
204 badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin;
205 broken = metalSupport && !effectiveStdenv.hostPlatform.isDarwin;
206 };
207})