pkgs/by-name/ll/llama-cpp/package.nix at devShellTools-shell · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / pkgs / by-name / ll / llama-cpp / package.nix
at devShellTools-shell 5.4 kB view raw
  1{
  2  lib,
  3  autoAddDriverRunpath,
  4  cmake,
  5  fetchFromGitHub,
  6  nix-update-script,
  7  stdenv,
  8
  9  config,
 10  cudaSupport ? config.cudaSupport,
 11  cudaPackages ? { },
 12
 13  rocmSupport ? config.rocmSupport,
 14  rocmPackages ? { },
 15
 16  openclSupport ? false,
 17  clblast,
 18
 19  blasSupport ? builtins.all (x: !x) [
 20    cudaSupport
 21    metalSupport
 22    openclSupport
 23    rocmSupport
 24    vulkanSupport
 25  ],
 26  blas,
 27
 28  pkg-config,
 29  metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && !openclSupport,
 30  vulkanSupport ? false,
 31  rpcSupport ? false,
 32  curl,
 33  shaderc,
 34  vulkan-headers,
 35  vulkan-loader,
 36  ninja,
 37  git,
 38}:
 39
 40let
 41  # It's necessary to consistently use backendStdenv when building with CUDA support,
 42  # otherwise we get libstdc++ errors downstream.
 43  # cuda imposes an upper bound on the gcc version, e.g. the latest gcc compatible with cudaPackages_11 is gcc11
 44  effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
 45  inherit (lib)
 46    cmakeBool
 47    cmakeFeature
 48    optionals
 49    optionalString
 50    ;
 51
 52  cudaBuildInputs = with cudaPackages; [
 53    cuda_cccl # <nv/target>
 54
 55    # A temporary hack for reducing the closure size, remove once cudaPackages
 56    # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
 57    cuda_cudart
 58    libcublas
 59  ];
 60
 61  rocmBuildInputs = with rocmPackages; [
 62    clr
 63    hipblas
 64    rocblas
 65  ];
 66
 67  vulkanBuildInputs = [
 68    shaderc
 69    vulkan-headers
 70    vulkan-loader
 71  ];
 72in
 73effectiveStdenv.mkDerivation (finalAttrs: {
 74  pname = "llama-cpp";
 75  version = "5985";
 76
 77  src = fetchFromGitHub {
 78    owner = "ggml-org";
 79    repo = "llama.cpp";
 80    tag = "b${finalAttrs.version}";
 81    hash = "sha256-OoV/p4Es/X/xQW7PpDLq5YLVYjieIE5+1itvtJECH54=";
 82    leaveDotGit = true;
 83    postFetch = ''
 84      git -C "$out" rev-parse --short HEAD > $out/COMMIT
 85      find "$out" -name .git -print0 | xargs -0 rm -rf
 86    '';
 87  };
 88
 89  postPatch = ''
 90    # Workaround for local-ai package which overrides this package to an older llama-cpp
 91    if [ -f ./ggml/src/ggml-metal.m ]; then
 92      substituteInPlace ./ggml/src/ggml-metal.m \
 93        --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
 94    fi
 95
 96    if [ -f ./ggml/src/ggml-metal/ggml-metal.m ]; then
 97      substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
 98        --replace-fail '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
 99    fi
100
101    substituteInPlace ./scripts/build-info.sh \
102      --replace-fail 'build_number="0"' 'build_number="${finalAttrs.version}"' \
103      --replace-fail 'build_commit="unknown"' "build_commit=\"$(cat COMMIT)\""
104  '';
105
106  nativeBuildInputs = [
107    cmake
108    ninja
109    pkg-config
110    git
111  ]
112  ++ optionals cudaSupport [
113    cudaPackages.cuda_nvcc
114    autoAddDriverRunpath
115  ];
116
117  buildInputs =
118    optionals cudaSupport cudaBuildInputs
119    ++ optionals openclSupport [ clblast ]
120    ++ optionals rocmSupport rocmBuildInputs
121    ++ optionals blasSupport [ blas ]
122    ++ optionals vulkanSupport vulkanBuildInputs
123    ++ [ curl ];
124
125  cmakeFlags = [
126    # -march=native is non-deterministic; override with platform-specific flags if needed
127    (cmakeBool "GGML_NATIVE" false)
128    (cmakeBool "LLAMA_BUILD_SERVER" true)
129    (cmakeBool "LLAMA_CURL" true)
130    (cmakeBool "BUILD_SHARED_LIBS" true)
131    (cmakeBool "GGML_BLAS" blasSupport)
132    (cmakeBool "GGML_CLBLAST" openclSupport)
133    (cmakeBool "GGML_CUDA" cudaSupport)
134    (cmakeBool "GGML_HIP" rocmSupport)
135    (cmakeBool "GGML_METAL" metalSupport)
136    (cmakeBool "GGML_RPC" rpcSupport)
137    (cmakeBool "GGML_VULKAN" vulkanSupport)
138  ]
139  ++ optionals cudaSupport [
140    (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
141  ]
142  ++ optionals rocmSupport ([
143    (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++")
144    # TODO: this should become `clr.gpuTargets` in the future.
145    (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmPackages.rocblas.amdgpu_targets)
146  ])
147  ++ optionals metalSupport [
148    (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
149    (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true)
150  ]
151  ++ optionals rpcSupport [
152    # This is done so we can move rpc-server out of bin because llama.cpp doesn't
153    # install rpc-server in their install target.
154    "-DCMAKE_SKIP_BUILD_RPATH=ON"
155  ];
156
157  # upstream plans on adding targets at the cmakelevel, remove those
158  # additional steps after that
159  postInstall = ''
160    # Match previous binary name for this package
161    ln -sf $out/bin/llama-cli $out/bin/llama
162
163    mkdir -p $out/include
164    cp $src/include/llama.h $out/include/
165  ''
166  + optionalString rpcSupport "cp bin/rpc-server $out/bin/llama-rpc-server";
167
168  passthru.updateScript = nix-update-script {
169    attrPath = "llama-cpp";
170    extraArgs = [
171      "--version-regex"
172      "b(.*)"
173    ];
174  };
175
176  meta = with lib; {
177    description = "Inference of Meta's LLaMA model (and others) in pure C/C++";
178    homepage = "https://github.com/ggml-org/llama.cpp";
179    license = licenses.mit;
180    mainProgram = "llama";
181    maintainers = with maintainers; [
182      dit7ya
183      elohmeier
184      philiptaron
185      xddxdd
186    ];
187    platforms = platforms.unix;
188    badPlatforms = optionals (cudaSupport || openclSupport) lib.platforms.darwin;
189    broken = (metalSupport && !effectiveStdenv.hostPlatform.isDarwin);
190  };
191})