Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1{ 2 lib, 3 autoAddDriverRunpath, 4 cmake, 5 fetchFromGitHub, 6 nix-update-script, 7 stdenv, 8 9 config, 10 cudaSupport ? config.cudaSupport, 11 cudaPackages ? { }, 12 13 rocmSupport ? config.rocmSupport, 14 rocmPackages ? { }, 15 16 openclSupport ? false, 17 clblast, 18 19 blasSupport ? builtins.all (x: !x) [ 20 cudaSupport 21 metalSupport 22 openclSupport 23 rocmSupport 24 vulkanSupport 25 ], 26 blas, 27 28 pkg-config, 29 metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && !openclSupport, 30 vulkanSupport ? false, 31 rpcSupport ? false, 32 curl, 33 shaderc, 34 vulkan-headers, 35 vulkan-loader, 36 ninja, 37 git, 38}: 39 40let 41 # It's necessary to consistently use backendStdenv when building with CUDA support, 42 # otherwise we get libstdc++ errors downstream. 43 # cuda imposes an upper bound on the gcc version, e.g. the latest gcc compatible with cudaPackages_11 is gcc11 44 effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv; 45 inherit (lib) 46 cmakeBool 47 cmakeFeature 48 optionals 49 optionalString 50 ; 51 52 cudaBuildInputs = with cudaPackages; [ 53 cuda_cccl # <nv/target> 54 55 # A temporary hack for reducing the closure size, remove once cudaPackages 56 # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 57 cuda_cudart 58 libcublas 59 ]; 60 61 rocmBuildInputs = with rocmPackages; [ 62 clr 63 hipblas 64 rocblas 65 ]; 66 67 vulkanBuildInputs = [ 68 shaderc 69 vulkan-headers 70 vulkan-loader 71 ]; 72in 73effectiveStdenv.mkDerivation (finalAttrs: { 74 pname = "llama-cpp"; 75 version = "5985"; 76 77 src = fetchFromGitHub { 78 owner = "ggml-org"; 79 repo = "llama.cpp"; 80 tag = "b${finalAttrs.version}"; 81 hash = "sha256-OoV/p4Es/X/xQW7PpDLq5YLVYjieIE5+1itvtJECH54="; 82 leaveDotGit = true; 83 postFetch = '' 84 git -C "$out" rev-parse --short HEAD > $out/COMMIT 85 find "$out" -name .git -print0 | xargs -0 rm -rf 86 ''; 87 }; 88 89 postPatch = '' 90 # Workaround for local-ai package which overrides this package to an older llama-cpp 91 if [ -f ./ggml/src/ggml-metal.m ]; then 92 substituteInPlace ./ggml/src/ggml-metal.m \ 93 --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" 94 fi 95 96 if [ -f ./ggml/src/ggml-metal/ggml-metal.m ]; then 97 substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \ 98 --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" 99 fi 100 101 substituteInPlace ./scripts/build-info.sh \ 102 --replace-fail 'build_number="0"' 'build_number="${finalAttrs.version}"' \ 103 --replace-fail 'build_commit="unknown"' "build_commit=\"$(cat COMMIT)\"" 104 ''; 105 106 nativeBuildInputs = [ 107 cmake 108 ninja 109 pkg-config 110 git 111 ] 112 ++ optionals cudaSupport [ 113 cudaPackages.cuda_nvcc 114 autoAddDriverRunpath 115 ]; 116 117 buildInputs = 118 optionals cudaSupport cudaBuildInputs 119 ++ optionals openclSupport [ clblast ] 120 ++ optionals rocmSupport rocmBuildInputs 121 ++ optionals blasSupport [ blas ] 122 ++ optionals vulkanSupport vulkanBuildInputs 123 ++ [ curl ]; 124 125 cmakeFlags = [ 126 # -march=native is non-deterministic; override with platform-specific flags if needed 127 (cmakeBool "GGML_NATIVE" false) 128 (cmakeBool "LLAMA_BUILD_SERVER" true) 129 (cmakeBool "LLAMA_CURL" true) 130 (cmakeBool "BUILD_SHARED_LIBS" true) 131 (cmakeBool "GGML_BLAS" blasSupport) 132 (cmakeBool "GGML_CLBLAST" openclSupport) 133 (cmakeBool "GGML_CUDA" cudaSupport) 134 (cmakeBool "GGML_HIP" rocmSupport) 135 (cmakeBool "GGML_METAL" metalSupport) 136 (cmakeBool "GGML_RPC" rpcSupport) 137 (cmakeBool "GGML_VULKAN" vulkanSupport) 138 ] 139 ++ optionals cudaSupport [ 140 (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString) 141 ] 142 ++ optionals rocmSupport ([ 143 (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++") 144 # TODO: this should become `clr.gpuTargets` in the future. 145 (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmPackages.rocblas.amdgpu_targets) 146 ]) 147 ++ optionals metalSupport [ 148 (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") 149 (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true) 150 ] 151 ++ optionals rpcSupport [ 152 # This is done so we can move rpc-server out of bin because llama.cpp doesn't 153 # install rpc-server in their install target. 154 "-DCMAKE_SKIP_BUILD_RPATH=ON" 155 ]; 156 157 # upstream plans on adding targets at the cmakelevel, remove those 158 # additional steps after that 159 postInstall = '' 160 # Match previous binary name for this package 161 ln -sf $out/bin/llama-cli $out/bin/llama 162 163 mkdir -p $out/include 164 cp $src/include/llama.h $out/include/ 165 '' 166 + optionalString rpcSupport "cp bin/rpc-server $out/bin/llama-rpc-server"; 167 168 passthru.updateScript = nix-update-script { 169 attrPath = "llama-cpp"; 170 extraArgs = [ 171 "--version-regex" 172 "b(.*)" 173 ]; 174 }; 175 176 meta = with lib; { 177 description = "Inference of Meta's LLaMA model (and others) in pure C/C++"; 178 homepage = "https://github.com/ggml-org/llama.cpp"; 179 license = licenses.mit; 180 mainProgram = "llama"; 181 maintainers = with maintainers; [ 182 dit7ya 183 elohmeier 184 philiptaron 185 xddxdd 186 ]; 187 platforms = platforms.unix; 188 badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin; 189 broken = (metalSupport && !effectiveStdenv.hostPlatform.isDarwin); 190 }; 191})