pkgs/development/python-modules/vllm/default.nix at 24.11-pre · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / development / python-modules / vllm / default.nix
at 24.11-pre 3.8 kB view raw
  1{
  2  lib,
  3  buildPythonPackage,
  4  fetchFromGitHub,
  5  fetchpatch,
  6  which,
  7  ninja,
  8  packaging,
  9  setuptools,
 10  torch,
 11  outlines,
 12  wheel,
 13  psutil,
 14  ray,
 15  pandas,
 16  pyarrow,
 17  sentencepiece,
 18  numpy,
 19  transformers,
 20  xformers,
 21  fastapi,
 22  uvicorn,
 23  pydantic,
 24  aioprometheus,
 25  pynvml,
 26  cupy,
 27  writeShellScript,
 28
 29  config,
 30
 31  cudaSupport ? config.cudaSupport,
 32  cudaPackages ? { },
 33
 34  rocmSupport ? config.rocmSupport,
 35  rocmPackages ? { },
 36  gpuTargets ? [ ],
 37}:
 38
 39buildPythonPackage rec {
 40  pname = "vllm";
 41  version = "0.3.3";
 42  format = "pyproject";
 43
 44  src = fetchFromGitHub {
 45    owner = "vllm-project";
 46    repo = pname;
 47    rev = "v${version}";
 48    hash = "sha256-LU5pCPVv+Ws9dL8oWL1sJGzwQKI1IFk2A1I6TP9gXL4=";
 49  };
 50
 51  # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
 52  PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport (
 53    lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets
 54  );
 55
 56  # xformers 0.0.23.post1 github release specifies its version as 0.0.24
 57  #
 58  # cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up
 59  # like that in nixpkgs. Version upgrade is due to upstream shenanigans
 60  # https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363
 61  #
 62  # hipcc --version works badly on NixOS due to unresolved paths.
 63  postPatch =
 64    ''
 65      substituteInPlace requirements.txt \
 66        --replace "xformers == 0.0.23.post1" "xformers == 0.0.24"
 67      substituteInPlace requirements.txt \
 68        --replace "cupy-cuda12x == 12.1.0" "cupy == 12.3.0"
 69      substituteInPlace requirements-build.txt \
 70        --replace "torch==2.1.2" "torch == 2.2.1"
 71      substituteInPlace pyproject.toml \
 72        --replace "torch == 2.1.2" "torch == 2.2.1"
 73      substituteInPlace requirements.txt \
 74        --replace "torch == 2.1.2" "torch == 2.2.1"
 75    ''
 76    + lib.optionalString rocmSupport ''
 77      substituteInPlace setup.py \
 78        --replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'"
 79    '';
 80
 81  preBuild =
 82    lib.optionalString cudaSupport ''
 83      export CUDA_HOME=${cudaPackages.cuda_nvcc}
 84    ''
 85    + lib.optionalString rocmSupport ''
 86      export ROCM_HOME=${rocmPackages.clr}
 87      export PATH=$PATH:${rocmPackages.hipcc}
 88    '';
 89
 90  nativeBuildInputs = [
 91    ninja
 92    packaging
 93    setuptools
 94    torch
 95    wheel
 96    which
 97  ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];
 98
 99  buildInputs =
100    (lib.optionals cudaSupport (
101      with cudaPackages;
102      [
103        cuda_cudart # cuda_runtime.h, -lcudart
104        cuda_cccl.dev # <thrust/*>
105        libcusparse.dev # cusparse.h
106        libcublas.dev # cublas_v2.h
107        libcusolver # cusolverDn.h
108      ]
109    ))
110    ++ (lib.optionals rocmSupport (
111      with rocmPackages;
112      [
113        clr
114        rocthrust
115        rocprim
116        hipsparse
117        hipblas
118      ]
119    ));
120
121  propagatedBuildInputs =
122    [
123      psutil
124      ray
125      pandas
126      pyarrow
127      sentencepiece
128      numpy
129      torch
130      transformers
131      outlines
132      xformers
133      fastapi
134      uvicorn
135      pydantic
136      aioprometheus
137    ]
138    ++ uvicorn.optional-dependencies.standard
139    ++ aioprometheus.optional-dependencies.starlette
140    ++ lib.optionals cudaSupport [
141      pynvml
142      cupy
143    ];
144
145  pythonImportsCheck = [ "vllm" ];
146
147  meta = with lib; {
148    description = "A high-throughput and memory-efficient inference and serving engine for LLMs";
149    changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}";
150    homepage = "https://github.com/vllm-project/vllm";
151    license = licenses.asl20;
152    maintainers = with maintainers; [
153      happysalada
154      lach
155    ];
156    broken = !cudaSupport && !rocmSupport;
157  };
158}