1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6 which,
7 ninja,
8 packaging,
9 setuptools,
10 torch,
11 outlines,
12 wheel,
13 psutil,
14 ray,
15 pandas,
16 pyarrow,
17 sentencepiece,
18 numpy,
19 transformers,
20 xformers,
21 fastapi,
22 uvicorn,
23 pydantic,
24 aioprometheus,
25 pynvml,
26 cupy,
27 writeShellScript,
28
29 config,
30
31 cudaSupport ? config.cudaSupport,
32 cudaPackages ? { },
33
34 rocmSupport ? config.rocmSupport,
35 rocmPackages ? { },
36 gpuTargets ? [ ],
37}:
38
39let
40 stdenv_pkg = stdenv;
41in
42
43buildPythonPackage rec {
44 pname = "vllm";
45 version = "0.3.3";
46 format = "pyproject";
47
48 src = fetchFromGitHub {
49 owner = "vllm-project";
50 repo = pname;
51 rev = "v${version}";
52 hash = "sha256-LU5pCPVv+Ws9dL8oWL1sJGzwQKI1IFk2A1I6TP9gXL4=";
53 };
54
55 # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
56 PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport (
57 lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets
58 );
59
60 # cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up
61 # like that in nixpkgs. Version upgrade is due to upstream shenanigans
62 # https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363
63 #
64 # hipcc --version works badly on NixOS due to unresolved paths.
65 # Unclear why pythonRelaxDeps doesn't work here, but on last attempt, it didn't.
66 postPatch =
67 ''
68 substituteInPlace requirements.txt \
69 --replace "xformers == 0.0.23.post1" "xformers"
70 substituteInPlace requirements.txt \
71 --replace "cupy-cuda12x == 12.1.0" "cupy"
72 substituteInPlace requirements-build.txt \
73 --replace "torch==2.1.2" "torch"
74 substituteInPlace pyproject.toml \
75 --replace "torch == 2.1.2" "torch"
76 substituteInPlace requirements.txt \
77 --replace "torch == 2.1.2" "torch"
78 ''
79 + lib.optionalString rocmSupport ''
80 substituteInPlace setup.py \
81 --replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'"
82 '';
83
84 preBuild =
85 lib.optionalString cudaSupport ''
86 export CUDA_HOME=${cudaPackages.cuda_nvcc}
87 ''
88 + lib.optionalString rocmSupport ''
89 export ROCM_HOME=${rocmPackages.clr}
90 export PATH=$PATH:${rocmPackages.hipcc}
91 '';
92
93 nativeBuildInputs = [
94 ninja
95 packaging
96 setuptools
97 torch
98 wheel
99 which
100 ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];
101
102 buildInputs =
103 (lib.optionals cudaSupport (
104 with cudaPackages;
105 [
106 cuda_cudart # cuda_runtime.h, -lcudart
107 cuda_cccl # <thrust/*>
108 libcusparse # cusparse.h
109 libcublas # cublas_v2.h
110 libcusolver # cusolverDn.h
111 ]
112 ))
113 ++ (lib.optionals rocmSupport (
114 with rocmPackages;
115 [
116 clr
117 rocthrust
118 rocprim
119 hipsparse
120 hipblas
121 ]
122 ));
123
124 propagatedBuildInputs =
125 [
126 psutil
127 ray
128 pandas
129 pyarrow
130 sentencepiece
131 numpy
132 torch
133 transformers
134 outlines
135 xformers
136 fastapi
137 uvicorn
138 pydantic
139 aioprometheus
140 ]
141 ++ uvicorn.optional-dependencies.standard
142 ++ aioprometheus.optional-dependencies.starlette
143 ++ lib.optionals cudaSupport [
144 pynvml
145 cupy
146 ];
147
148 stdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv_pkg;
149
150 pythonImportsCheck = [ "vllm" ];
151
152 meta = with lib; {
153 description = "High-throughput and memory-efficient inference and serving engine for LLMs";
154 changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}";
155 homepage = "https://github.com/vllm-project/vllm";
156 license = licenses.asl20;
157 maintainers = with maintainers; [
158 happysalada
159 lach
160 ];
161 broken = !cudaSupport && !rocmSupport;
162 };
163}