at master 207 lines 5.8 kB view raw
1{ 2 lib, 3 autoAddDriverRunpath, 4 cmake, 5 fetchFromGitHub, 6 nix-update-script, 7 stdenv, 8 9 config, 10 cudaSupport ? config.cudaSupport, 11 cudaPackages ? { }, 12 13 rocmSupport ? config.rocmSupport, 14 rocmPackages ? { }, 15 rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets, 16 17 openclSupport ? false, 18 clblast, 19 20 blasSupport ? builtins.all (x: !x) [ 21 cudaSupport 22 metalSupport 23 openclSupport 24 rocmSupport 25 vulkanSupport 26 ], 27 blas, 28 29 pkg-config, 30 metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && !openclSupport, 31 vulkanSupport ? false, 32 rpcSupport ? false, 33 apple-sdk_14, 34 curl, 35 llama-cpp, 36 shaderc, 37 vulkan-headers, 38 vulkan-loader, 39 ninja, 40 git, 41}: 42 43let 44 # It's necessary to consistently use backendStdenv when building with CUDA support, 45 # otherwise we get libstdc++ errors downstream. 46 # cuda imposes an upper bound on the gcc version 47 effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv; 48 inherit (lib) 49 cmakeBool 50 cmakeFeature 51 optionals 52 optionalString 53 ; 54 55 cudaBuildInputs = with cudaPackages; [ 56 cuda_cccl # <nv/target> 57 58 # A temporary hack for reducing the closure size, remove once cudaPackages 59 # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 60 cuda_cudart 61 libcublas 62 ]; 63 64 rocmBuildInputs = with rocmPackages; [ 65 clr 66 hipblas 67 rocblas 68 ]; 69 70 vulkanBuildInputs = [ 71 shaderc 72 vulkan-headers 73 vulkan-loader 74 ]; 75in 76effectiveStdenv.mkDerivation (finalAttrs: { 77 pname = "llama-cpp"; 78 version = "6479"; 79 80 src = fetchFromGitHub { 81 owner = "ggml-org"; 82 repo = "llama.cpp"; 83 tag = "b${finalAttrs.version}"; 84 hash = "sha256-wgfYjG9m/ainCI85FlCb12Dz01R+pZfFeDX613M4xpQ="; 85 leaveDotGit = true; 86 postFetch = '' 87 git -C "$out" rev-parse --short HEAD > $out/COMMIT 88 find "$out" -name .git -print0 | xargs -0 rm -rf 89 ''; 90 }; 91 92 patches = lib.optionals vulkanSupport [ ./disable_bfloat16.patch ]; 93 94 postPatch = '' 95 # Workaround for local-ai package which overrides this package to an older llama-cpp 96 if [ -f ./ggml/src/ggml-metal.m ]; then 97 substituteInPlace ./ggml/src/ggml-metal.m \ 98 --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" 99 fi 100 101 if [ -f ./ggml/src/ggml-metal/ggml-metal.m ]; then 102 substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \ 103 --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" 104 fi 105 ''; 106 107 nativeBuildInputs = [ 108 cmake 109 ninja 110 pkg-config 111 git 112 ] 113 ++ optionals cudaSupport [ 114 cudaPackages.cuda_nvcc 115 autoAddDriverRunpath 116 ]; 117 118 buildInputs = 119 optionals cudaSupport cudaBuildInputs 120 ++ optionals openclSupport [ clblast ] 121 ++ optionals rocmSupport rocmBuildInputs 122 ++ optionals blasSupport [ blas ] 123 ++ optionals vulkanSupport vulkanBuildInputs 124 ++ optionals metalSupport [ apple-sdk_14 ] 125 ++ [ curl ]; 126 127 preConfigure = '' 128 prependToVar cmakeFlags "-DLLAMA_BUILD_COMMIT:STRING=$(cat COMMIT)" 129 ''; 130 131 cmakeFlags = [ 132 # -march=native is non-deterministic; override with platform-specific flags if needed 133 (cmakeBool "GGML_NATIVE" false) 134 (cmakeBool "LLAMA_BUILD_EXAMPLES" false) 135 (cmakeBool "LLAMA_BUILD_SERVER" true) 136 (cmakeBool "LLAMA_BUILD_TESTS" (finalAttrs.finalPackage.doCheck or false)) 137 (cmakeBool "LLAMA_CURL" true) 138 (cmakeBool "BUILD_SHARED_LIBS" true) 139 (cmakeBool "GGML_BLAS" blasSupport) 140 (cmakeBool "GGML_CLBLAST" openclSupport) 141 (cmakeBool "GGML_CUDA" cudaSupport) 142 (cmakeBool "GGML_HIP" rocmSupport) 143 (cmakeBool "GGML_METAL" metalSupport) 144 (cmakeBool "GGML_RPC" rpcSupport) 145 (cmakeBool "GGML_VULKAN" vulkanSupport) 146 (cmakeFeature "LLAMA_BUILD_NUMBER" finalAttrs.version) 147 ] 148 ++ optionals cudaSupport [ 149 (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString) 150 ] 151 ++ optionals rocmSupport [ 152 (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++") 153 (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets) 154 ] 155 ++ optionals metalSupport [ 156 (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") 157 (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true) 158 ] 159 ++ optionals rpcSupport [ 160 # This is done so we can move rpc-server out of bin because llama.cpp doesn't 161 # install rpc-server in their install target. 162 (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) 163 ]; 164 165 # upstream plans on adding targets at the cmakelevel, remove those 166 # additional steps after that 167 postInstall = '' 168 # Match previous binary name for this package 169 ln -sf $out/bin/llama-cli $out/bin/llama 170 171 mkdir -p $out/include 172 cp $src/include/llama.h $out/include/ 173 '' 174 + optionalString rpcSupport "cp bin/rpc-server $out/bin/llama-rpc-server"; 175 176 # the tests are failing as of 2025-08 177 doCheck = false; 178 179 passthru = { 180 tests = lib.optionalAttrs stdenv.hostPlatform.isDarwin { 181 metal = llama-cpp.override { metalSupport = true; }; 182 }; 183 updateScript = nix-update-script { 184 attrPath = "llama-cpp"; 185 extraArgs = [ 186 "--version-regex" 187 "b(.*)" 188 ]; 189 }; 190 }; 191 192 meta = { 193 description = "Inference of Meta's LLaMA model (and others) in pure C/C++"; 194 homepage = "https://github.com/ggml-org/llama.cpp"; 195 license = lib.licenses.mit; 196 mainProgram = "llama"; 197 maintainers = with lib.maintainers; [ 198 booxter 199 dit7ya 200 philiptaron 201 xddxdd 202 ]; 203 platforms = lib.platforms.unix; 204 badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin; 205 broken = metalSupport && !effectiveStdenv.hostPlatform.isDarwin; 206 }; 207})