pkgs/development/python-modules/vllm/default.nix at fix-function-merge · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / pkgs / development / python-modules / vllm / default.nix
at fix-function-merge 3.9 kB view raw
  1{
  2  lib,
  3  stdenv,
  4  buildPythonPackage,
  5  fetchFromGitHub,
  6  which,
  7  ninja,
  8  packaging,
  9  setuptools,
 10  torch,
 11  outlines,
 12  wheel,
 13  psutil,
 14  ray,
 15  pandas,
 16  pyarrow,
 17  sentencepiece,
 18  numpy,
 19  transformers,
 20  xformers,
 21  fastapi,
 22  uvicorn,
 23  pydantic,
 24  aioprometheus,
 25  pynvml,
 26  cupy,
 27  writeShellScript,
 28
 29  config,
 30
 31  cudaSupport ? config.cudaSupport,
 32  cudaPackages ? { },
 33
 34  rocmSupport ? config.rocmSupport,
 35  rocmPackages ? { },
 36  gpuTargets ? [ ],
 37}:
 38
 39let
 40  stdenv_pkg = stdenv;
 41in
 42
 43buildPythonPackage rec {
 44  pname = "vllm";
 45  version = "0.3.3";
 46  format = "pyproject";
 47
 48  src = fetchFromGitHub {
 49    owner = "vllm-project";
 50    repo = pname;
 51    rev = "v${version}";
 52    hash = "sha256-LU5pCPVv+Ws9dL8oWL1sJGzwQKI1IFk2A1I6TP9gXL4=";
 53  };
 54
 55  # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
 56  PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport (
 57    lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets
 58  );
 59
 60  # cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up
 61  # like that in nixpkgs. Version upgrade is due to upstream shenanigans
 62  # https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363
 63  #
 64  # hipcc --version works badly on NixOS due to unresolved paths.
 65  # Unclear why pythonRelaxDeps doesn't work here, but on last attempt, it didn't.
 66  postPatch =
 67    ''
 68      substituteInPlace requirements.txt \
 69        --replace "xformers == 0.0.23.post1" "xformers"
 70      substituteInPlace requirements.txt \
 71        --replace "cupy-cuda12x == 12.1.0" "cupy"
 72      substituteInPlace requirements-build.txt \
 73        --replace "torch==2.1.2" "torch"
 74      substituteInPlace pyproject.toml \
 75        --replace "torch == 2.1.2" "torch"
 76      substituteInPlace requirements.txt \
 77        --replace "torch == 2.1.2" "torch"
 78    ''
 79    + lib.optionalString rocmSupport ''
 80      substituteInPlace setup.py \
 81        --replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'"
 82    '';
 83
 84  preBuild =
 85    lib.optionalString cudaSupport ''
 86      export CUDA_HOME=${cudaPackages.cuda_nvcc}
 87    ''
 88    + lib.optionalString rocmSupport ''
 89      export ROCM_HOME=${rocmPackages.clr}
 90      export PATH=$PATH:${rocmPackages.hipcc}
 91    '';
 92
 93  nativeBuildInputs = [
 94    ninja
 95    packaging
 96    setuptools
 97    torch
 98    wheel
 99    which
100  ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];
101
102  buildInputs =
103    (lib.optionals cudaSupport (
104      with cudaPackages;
105      [
106        cuda_cudart # cuda_runtime.h, -lcudart
107        cuda_cccl # <thrust/*>
108        libcusparse # cusparse.h
109        libcublas # cublas_v2.h
110        libcusolver # cusolverDn.h
111      ]
112    ))
113    ++ (lib.optionals rocmSupport (
114      with rocmPackages;
115      [
116        clr
117        rocthrust
118        rocprim
119        hipsparse
120        hipblas
121      ]
122    ));
123
124  propagatedBuildInputs =
125    [
126      psutil
127      ray
128      pandas
129      pyarrow
130      sentencepiece
131      numpy
132      torch
133      transformers
134      outlines
135      xformers
136      fastapi
137      uvicorn
138      pydantic
139      aioprometheus
140    ]
141    ++ uvicorn.optional-dependencies.standard
142    ++ aioprometheus.optional-dependencies.starlette
143    ++ lib.optionals cudaSupport [
144      pynvml
145      cupy
146    ];
147
148  stdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv_pkg;
149
150  pythonImportsCheck = [ "vllm" ];
151
152  meta = with lib; {
153    description = "High-throughput and memory-efficient inference and serving engine for LLMs";
154    changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}";
155    homepage = "https://github.com/vllm-project/vllm";
156    license = licenses.asl20;
157    maintainers = with maintainers; [
158      happysalada
159      lach
160    ];
161    broken = !cudaSupport && !rocmSupport;
162  };
163}