Merge pull request #289513 from happysalada/update_llama_cpp

llama-cpp: pull upstream flake changes

authored by Yt and committed by GitHub cd7f814c 728d2d1f

+94 -85
+94 -85
pkgs/by-name/ll/llama-cpp/package.nix
··· 15 15 , openclSupport ? false 16 16 , clblast 17 17 18 - , blasSupport ? !rocmSupport && !cudaSupport 19 - , openblas 18 + , blasSupport ? builtins.all (x: !x) [ cudaSupport metalSupport openclSupport rocmSupport vulkanSupport ] 20 19 , pkg-config 21 20 , metalSupport ? stdenv.isDarwin && stdenv.isAarch64 && !openclSupport 22 - , patchelf 23 - , static ? true # if false will build the shared objects as well 21 + , vulkanSupport ? false 22 + , mpiSupport ? false # Increases the runtime closure by ~700M 23 + , vulkan-headers 24 + , vulkan-loader 25 + , ninja 26 + , git 27 + , mpi 24 28 }: 25 29 26 30 let ··· 28 32 # otherwise we get libstdc++ errors downstream. 29 33 # cuda imposes an upper bound on the gcc version, e.g. the latest gcc compatible with cudaPackages_11 is gcc11 30 34 effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv; 35 + inherit (lib) cmakeBool cmakeFeature optionals; 36 + 37 + darwinBuildInputs = 38 + with darwin.apple_sdk.frameworks; 39 + [ 40 + Accelerate 41 + CoreVideo 42 + CoreGraphics 43 + ] 44 + ++ optionals metalSupport [ MetalKit ]; 45 + 46 + cudaBuildInputs = with cudaPackages; [ 47 + cuda_cccl.dev # <nv/target> 48 + 49 + # A temporary hack for reducing the closure size, remove once cudaPackages 50 + # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 51 + cuda_cudart.dev 52 + cuda_cudart.lib 53 + cuda_cudart.static 54 + libcublas.dev 55 + libcublas.lib 56 + libcublas.static 57 + ]; 58 + 59 + rocmBuildInputs = with rocmPackages; [ 60 + clr 61 + hipblas 62 + rocblas 63 + ]; 64 + 65 + vulkanBuildInputs = [ 66 + vulkan-headers 67 + vulkan-loader 68 + ]; 31 69 in 32 70 effectiveStdenv.mkDerivation (finalAttrs: { 33 71 pname = "llama-cpp"; 34 - version = "2249"; 72 + version = "2294"; 35 73 36 74 src = fetchFromGitHub { 37 75 owner = "ggerganov"; 38 76 repo = "llama.cpp"; 39 77 rev = "refs/tags/b${finalAttrs.version}"; 40 - hash = "sha256-ikJUToUbA60u/8azR6dPmPyodq/nQe5L2aotlYBclaE="; 78 + hash = "sha256-uZi4Bj03PgfFV+jS5M+A1sMCWC/GMY5IyyrlR1b4Sh4="; 41 79 }; 42 80 43 81 postPatch = '' ··· 45 83 --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" 46 84 ''; 47 85 48 - nativeBuildInputs = [ cmake ] ++ lib.optionals blasSupport [ pkg-config ] ++ lib.optionals cudaSupport [ 86 + nativeBuildInputs = [ cmake ninja pkg-config git ] 87 + ++ optionals cudaSupport [ 49 88 cudaPackages.cuda_nvcc 50 89 51 90 # TODO: Replace with autoAddDriverRunpath ··· 53 92 cudaPackages.autoAddOpenGLRunpathHook 54 93 ]; 55 94 56 - buildInputs = lib.optionals effectiveStdenv.isDarwin 57 - (with darwin.apple_sdk.frameworks; [ 58 - Accelerate 59 - CoreGraphics 60 - CoreVideo 61 - Foundation 62 - ]) 63 - ++ lib.optionals metalSupport (with darwin.apple_sdk.frameworks; [ 64 - MetalKit 65 - ]) 66 - ++ lib.optionals cudaSupport (with cudaPackages; [ 67 - cuda_cccl.dev # <nv/target> 68 - 69 - # A temporary hack for reducing the closure size, remove once cudaPackages 70 - # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 71 - cuda_cudart.dev 72 - cuda_cudart.lib 73 - cuda_cudart.static 74 - libcublas.dev 75 - libcublas.lib 76 - libcublas.static 77 - ]) ++ lib.optionals rocmSupport [ 78 - rocmPackages.clr 79 - rocmPackages.hipblas 80 - rocmPackages.rocblas 81 - ] ++ lib.optionals openclSupport [ 82 - clblast 83 - ] ++ lib.optionals blasSupport [ 84 - openblas 85 - ]; 95 + buildInputs = optionals effectiveStdenv.isDarwin darwinBuildInputs 96 + ++ optionals cudaSupport cudaBuildInputs 97 + ++ optionals mpiSupport mpi 98 + ++ optionals openclSupport [ clblast ] 99 + ++ optionals rocmSupport rocmBuildInputs 100 + ++ optionals vulkanSupport vulkanBuildInputs; 86 101 87 102 cmakeFlags = [ 88 - "-DLLAMA_NATIVE=OFF" 89 - "-DLLAMA_BUILD_SERVER=ON" 103 + # -march=native is non-deterministic; override with platform-specific flags if needed 104 + (cmakeBool "LLAMA_NATIVE" false) 105 + (cmakeBool "BUILD_SHARED_SERVER" true) 106 + (cmakeBool "BUILD_SHARED_LIBS" true) 107 + (cmakeBool "BUILD_SHARED_LIBS" true) 108 + (cmakeBool "LLAMA_BLAS" blasSupport) 109 + (cmakeBool "LLAMA_CLBLAST" openclSupport) 110 + (cmakeBool "LLAMA_CUBLAS" cudaSupport) 111 + (cmakeBool "LLAMA_HIPBLAS" rocmSupport) 112 + (cmakeBool "LLAMA_METAL" metalSupport) 113 + (cmakeBool "LLAMA_MPI" mpiSupport) 114 + (cmakeBool "LLAMA_VULKAN" vulkanSupport) 90 115 ] 91 - ++ lib.optionals metalSupport [ 92 - "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" 93 - "-DLLAMA_METAL=ON" 94 - ] 95 - ++ lib.optionals cudaSupport [ 96 - "-DLLAMA_CUBLAS=ON" 97 - ] 98 - ++ lib.optionals rocmSupport [ 99 - "-DLLAMA_HIPBLAS=1" 100 - "-DCMAKE_C_COMPILER=hipcc" 101 - "-DCMAKE_CXX_COMPILER=hipcc" 102 - "-DCMAKE_POSITION_INDEPENDENT_CODE=ON" 103 - ] 104 - ++ lib.optionals openclSupport [ 105 - "-DLLAMA_CLBLAST=ON" 106 - ] 107 - ++ lib.optionals blasSupport [ 108 - "-DLLAMA_BLAS=ON" 109 - "-DLLAMA_BLAS_VENDOR=OpenBLAS" 110 - ] 111 - ++ lib.optionals (!static) [ 112 - (lib.cmakeBool "BUILD_SHARED_LIBS" true) 113 - ]; 116 + ++ optionals cudaSupport [ 117 + ( 118 + with cudaPackages.flags; 119 + cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( 120 + builtins.concatStringsSep ";" (map dropDot cudaCapabilities) 121 + ) 122 + ) 123 + ] 124 + ++ optionals rocmSupport [ 125 + (cmakeFeature "CMAKE_C_COMPILER" "hipcc") 126 + (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") 114 127 115 - installPhase = '' 116 - runHook preInstall 117 - 118 - mkdir -p $out/bin 119 - ${lib.optionalString (!static) '' 120 - mkdir $out/lib 121 - cp libggml_shared.so $out/lib 122 - cp libllama.so $out/lib 123 - ''} 124 - 125 - for f in bin/*; do 126 - test -x "$f" || continue 127 - ${lib.optionalString (!static) '' 128 - ${patchelf}/bin/patchelf "$f" --set-rpath "$out/lib" 129 - ''} 130 - cp "$f" $out/bin/llama-cpp-"$(basename "$f")" 131 - done 132 - 133 - ${lib.optionalString metalSupport "cp ./bin/ggml-metal.metal $out/bin/ggml-metal.metal"} 128 + # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM 129 + # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt 130 + # and select the line that matches the current nixpkgs version of rocBLAS. 131 + # Should likely use `rocmPackages.clr.gpuTargets`. 132 + "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" 133 + ] 134 + ++ optionals metalSupport [ (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ] 135 + ++ optionals blasSupport [ (cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ]; 134 136 135 - runHook postInstall 137 + # upstream plans on adding targets at the cmakelevel, remove those 138 + # additional steps after that 139 + postInstall = '' 140 + mv $out/bin/main $out/bin/llama 141 + mv $out/bin/server $out/bin/llama-server 142 + mkdir -p $out/include 143 + cp $src/llama.h $out/include/ 136 144 ''; 137 145 138 146 passthru.updateScript = nix-update-script { ··· 144 152 description = "Port of Facebook's LLaMA model in C/C++"; 145 153 homepage = "https://github.com/ggerganov/llama.cpp/"; 146 154 license = licenses.mit; 147 - mainProgram = "llama-cpp-main"; 148 - maintainers = with maintainers; [ dit7ya elohmeier ]; 149 - broken = (effectiveStdenv.isDarwin && effectiveStdenv.isx86_64) || lib.count lib.id [openclSupport blasSupport rocmSupport cudaSupport] == 0; 155 + mainProgram = "llama"; 156 + maintainers = with maintainers; [ dit7ya elohmeier philiptaron ]; 150 157 platforms = platforms.unix; 158 + badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin; 159 + broken = (metalSupport && !effectiveStdenv.isDarwin); 151 160 }; 152 161 })