pkgs/by-name/ll/llama-cpp/package.nix at master · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / by-name / ll / llama-cpp / package.nix
at master 207 lines 5.8 kB view raw
  1{
  2  lib,
  3  autoAddDriverRunpath,
  4  cmake,
  5  fetchFromGitHub,
  6  nix-update-script,
  7  stdenv,
  8
  9  config,
 10  cudaSupport ? config.cudaSupport,
 11  cudaPackages ? { },
 12
 13  rocmSupport ? config.rocmSupport,
 14  rocmPackages ? { },
 15  rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
 16
 17  openclSupport ? false,
 18  clblast,
 19
 20  blasSupport ? builtins.all (x: !x) [
 21    cudaSupport
 22    metalSupport
 23    openclSupport
 24    rocmSupport
 25    vulkanSupport
 26  ],
 27  blas,
 28
 29  pkg-config,
 30  metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && !openclSupport,
 31  vulkanSupport ? false,
 32  rpcSupport ? false,
 33  apple-sdk_14,
 34  curl,
 35  llama-cpp,
 36  shaderc,
 37  vulkan-headers,
 38  vulkan-loader,
 39  ninja,
 40  git,
 41}:
 42
 43let
 44  # It's necessary to consistently use backendStdenv when building with CUDA support,
 45  # otherwise we get libstdc++ errors downstream.
 46  # cuda imposes an upper bound on the gcc version
 47  effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
 48  inherit (lib)
 49    cmakeBool
 50    cmakeFeature
 51    optionals
 52    optionalString
 53    ;
 54
 55  cudaBuildInputs = with cudaPackages; [
 56    cuda_cccl # <nv/target>
 57
 58    # A temporary hack for reducing the closure size, remove once cudaPackages
 59    # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
 60    cuda_cudart
 61    libcublas
 62  ];
 63
 64  rocmBuildInputs = with rocmPackages; [
 65    clr
 66    hipblas
 67    rocblas
 68  ];
 69
 70  vulkanBuildInputs = [
 71    shaderc
 72    vulkan-headers
 73    vulkan-loader
 74  ];
 75in
 76effectiveStdenv.mkDerivation (finalAttrs: {
 77  pname = "llama-cpp";
 78  version = "6479";
 79
 80  src = fetchFromGitHub {
 81    owner = "ggml-org";
 82    repo = "llama.cpp";
 83    tag = "b${finalAttrs.version}";
 84    hash = "sha256-wgfYjG9m/ainCI85FlCb12Dz01R+pZfFeDX613M4xpQ=";
 85    leaveDotGit = true;
 86    postFetch = ''
 87      git -C "$out" rev-parse --short HEAD > $out/COMMIT
 88      find "$out" -name .git -print0 | xargs -0 rm -rf
 89    '';
 90  };
 91
 92  patches = lib.optionals vulkanSupport [ ./disable_bfloat16.patch ];
 93
 94  postPatch = ''
 95    # Workaround for local-ai package which overrides this package to an older llama-cpp
 96    if [ -f ./ggml/src/ggml-metal.m ]; then
 97      substituteInPlace ./ggml/src/ggml-metal.m \
 98        --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
 99    fi
100
101    if [ -f ./ggml/src/ggml-metal/ggml-metal.m ]; then
102      substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
103        --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
104    fi
105  '';
106
107  nativeBuildInputs = [
108    cmake
109    ninja
110    pkg-config
111    git
112  ]
113  ++ optionals cudaSupport [
114    cudaPackages.cuda_nvcc
115    autoAddDriverRunpath
116  ];
117
118  buildInputs =
119    optionals cudaSupport cudaBuildInputs
120    ++ optionals openclSupport [ clblast ]
121    ++ optionals rocmSupport rocmBuildInputs
122    ++ optionals blasSupport [ blas ]
123    ++ optionals vulkanSupport vulkanBuildInputs
124    ++ optionals metalSupport [ apple-sdk_14 ]
125    ++ [ curl ];
126
127  preConfigure = ''
128    prependToVar cmakeFlags "-DLLAMA_BUILD_COMMIT:STRING=$(cat COMMIT)"
129  '';
130
131  cmakeFlags = [
132    # -march=native is non-deterministic; override with platform-specific flags if needed
133    (cmakeBool "GGML_NATIVE" false)
134    (cmakeBool "LLAMA_BUILD_EXAMPLES" false)
135    (cmakeBool "LLAMA_BUILD_SERVER" true)
136    (cmakeBool "LLAMA_BUILD_TESTS" (finalAttrs.finalPackage.doCheck or false))
137    (cmakeBool "LLAMA_CURL" true)
138    (cmakeBool "BUILD_SHARED_LIBS" true)
139    (cmakeBool "GGML_BLAS" blasSupport)
140    (cmakeBool "GGML_CLBLAST" openclSupport)
141    (cmakeBool "GGML_CUDA" cudaSupport)
142    (cmakeBool "GGML_HIP" rocmSupport)
143    (cmakeBool "GGML_METAL" metalSupport)
144    (cmakeBool "GGML_RPC" rpcSupport)
145    (cmakeBool "GGML_VULKAN" vulkanSupport)
146    (cmakeFeature "LLAMA_BUILD_NUMBER" finalAttrs.version)
147  ]
148  ++ optionals cudaSupport [
149    (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
150  ]
151  ++ optionals rocmSupport [
152    (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++")
153    (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
154  ]
155  ++ optionals metalSupport [
156    (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
157    (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true)
158  ]
159  ++ optionals rpcSupport [
160    # This is done so we can move rpc-server out of bin because llama.cpp doesn't
161    # install rpc-server in their install target.
162    (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
163  ];
164
165  # upstream plans on adding targets at the cmakelevel, remove those
166  # additional steps after that
167  postInstall = ''
168    # Match previous binary name for this package
169    ln -sf $out/bin/llama-cli $out/bin/llama
170
171    mkdir -p $out/include
172    cp $src/include/llama.h $out/include/
173  ''
174  + optionalString rpcSupport "cp bin/rpc-server $out/bin/llama-rpc-server";
175
176  # the tests are failing as of 2025-08
177  doCheck = false;
178
179  passthru = {
180    tests = lib.optionalAttrs stdenv.hostPlatform.isDarwin {
181      metal = llama-cpp.override { metalSupport = true; };
182    };
183    updateScript = nix-update-script {
184      attrPath = "llama-cpp";
185      extraArgs = [
186        "--version-regex"
187        "b(.*)"
188      ];
189    };
190  };
191
192  meta = {
193    description = "Inference of Meta's LLaMA model (and others) in pure C/C++";
194    homepage = "https://github.com/ggml-org/llama.cpp";
195    license = lib.licenses.mit;
196    mainProgram = "llama";
197    maintainers = with lib.maintainers; [
198      booxter
199      dit7ya
200      philiptaron
201      xddxdd
202    ];
203    platforms = lib.platforms.unix;
204    badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin;
205    broken = metalSupport && !effectiveStdenv.hostPlatform.isDarwin;
206  };
207})