1{
2 lib,
3 autoAddDriverRunpath,
4 cmake,
5 fetchFromGitHub,
6 nix-update-script,
7 stdenv,
8
9 config,
10 cudaSupport ? config.cudaSupport,
11 cudaPackages ? { },
12
13 rocmSupport ? config.rocmSupport,
14 rocmPackages ? { },
15
16 openclSupport ? false,
17 clblast,
18
19 blasSupport ? builtins.all (x: !x) [
20 cudaSupport
21 metalSupport
22 openclSupport
23 rocmSupport
24 vulkanSupport
25 ],
26 blas,
27
28 pkg-config,
29 metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && !openclSupport,
30 vulkanSupport ? false,
31 rpcSupport ? false,
32 curl,
33 shaderc,
34 vulkan-headers,
35 vulkan-loader,
36 ninja,
37 git,
38}:
39
40let
41 # It's necessary to consistently use backendStdenv when building with CUDA support,
42 # otherwise we get libstdc++ errors downstream.
43 # cuda imposes an upper bound on the gcc version, e.g. the latest gcc compatible with cudaPackages_11 is gcc11
44 effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
45 inherit (lib)
46 cmakeBool
47 cmakeFeature
48 optionals
49 optionalString
50 ;
51
52 cudaBuildInputs = with cudaPackages; [
53 cuda_cccl # <nv/target>
54
55 # A temporary hack for reducing the closure size, remove once cudaPackages
56 # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
57 cuda_cudart
58 libcublas
59 ];
60
61 rocmBuildInputs = with rocmPackages; [
62 clr
63 hipblas
64 rocblas
65 ];
66
67 vulkanBuildInputs = [
68 shaderc
69 vulkan-headers
70 vulkan-loader
71 ];
72in
73effectiveStdenv.mkDerivation (finalAttrs: {
74 pname = "llama-cpp";
75 version = "5985";
76
77 src = fetchFromGitHub {
78 owner = "ggml-org";
79 repo = "llama.cpp";
80 tag = "b${finalAttrs.version}";
81 hash = "sha256-OoV/p4Es/X/xQW7PpDLq5YLVYjieIE5+1itvtJECH54=";
82 leaveDotGit = true;
83 postFetch = ''
84 git -C "$out" rev-parse --short HEAD > $out/COMMIT
85 find "$out" -name .git -print0 | xargs -0 rm -rf
86 '';
87 };
88
89 postPatch = ''
90 # Workaround for local-ai package which overrides this package to an older llama-cpp
91 if [ -f ./ggml/src/ggml-metal.m ]; then
92 substituteInPlace ./ggml/src/ggml-metal.m \
93 --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
94 fi
95
96 if [ -f ./ggml/src/ggml-metal/ggml-metal.m ]; then
97 substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
98 --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
99 fi
100
101 substituteInPlace ./scripts/build-info.sh \
102 --replace-fail 'build_number="0"' 'build_number="${finalAttrs.version}"' \
103 --replace-fail 'build_commit="unknown"' "build_commit=\"$(cat COMMIT)\""
104 '';
105
106 nativeBuildInputs = [
107 cmake
108 ninja
109 pkg-config
110 git
111 ]
112 ++ optionals cudaSupport [
113 cudaPackages.cuda_nvcc
114 autoAddDriverRunpath
115 ];
116
117 buildInputs =
118 optionals cudaSupport cudaBuildInputs
119 ++ optionals openclSupport [ clblast ]
120 ++ optionals rocmSupport rocmBuildInputs
121 ++ optionals blasSupport [ blas ]
122 ++ optionals vulkanSupport vulkanBuildInputs
123 ++ [ curl ];
124
125 cmakeFlags = [
126 # -march=native is non-deterministic; override with platform-specific flags if needed
127 (cmakeBool "GGML_NATIVE" false)
128 (cmakeBool "LLAMA_BUILD_SERVER" true)
129 (cmakeBool "LLAMA_CURL" true)
130 (cmakeBool "BUILD_SHARED_LIBS" true)
131 (cmakeBool "GGML_BLAS" blasSupport)
132 (cmakeBool "GGML_CLBLAST" openclSupport)
133 (cmakeBool "GGML_CUDA" cudaSupport)
134 (cmakeBool "GGML_HIP" rocmSupport)
135 (cmakeBool "GGML_METAL" metalSupport)
136 (cmakeBool "GGML_RPC" rpcSupport)
137 (cmakeBool "GGML_VULKAN" vulkanSupport)
138 ]
139 ++ optionals cudaSupport [
140 (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
141 ]
142 ++ optionals rocmSupport ([
143 (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++")
144 # TODO: this should become `clr.gpuTargets` in the future.
145 (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmPackages.rocblas.amdgpu_targets)
146 ])
147 ++ optionals metalSupport [
148 (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
149 (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true)
150 ]
151 ++ optionals rpcSupport [
152 # This is done so we can move rpc-server out of bin because llama.cpp doesn't
153 # install rpc-server in their install target.
154 "-DCMAKE_SKIP_BUILD_RPATH=ON"
155 ];
156
157 # upstream plans on adding targets at the cmakelevel, remove those
158 # additional steps after that
159 postInstall = ''
160 # Match previous binary name for this package
161 ln -sf $out/bin/llama-cli $out/bin/llama
162
163 mkdir -p $out/include
164 cp $src/include/llama.h $out/include/
165 ''
166 + optionalString rpcSupport "cp bin/rpc-server $out/bin/llama-rpc-server";
167
168 passthru.updateScript = nix-update-script {
169 attrPath = "llama-cpp";
170 extraArgs = [
171 "--version-regex"
172 "b(.*)"
173 ];
174 };
175
176 meta = with lib; {
177 description = "Inference of Meta's LLaMA model (and others) in pure C/C++";
178 homepage = "https://github.com/ggml-org/llama.cpp";
179 license = licenses.mit;
180 mainProgram = "llama";
181 maintainers = with maintainers; [
182 dit7ya
183 elohmeier
184 philiptaron
185 xddxdd
186 ];
187 platforms = platforms.unix;
188 badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin;
189 broken = (metalSupport && !effectiveStdenv.hostPlatform.isDarwin);
190 };
191})