1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 fetchpatch,
6 which,
7 ninja,
8 packaging,
9 setuptools,
10 torch,
11 outlines,
12 wheel,
13 psutil,
14 ray,
15 pandas,
16 pyarrow,
17 sentencepiece,
18 numpy,
19 transformers,
20 xformers,
21 fastapi,
22 uvicorn,
23 pydantic,
24 aioprometheus,
25 pynvml,
26 cupy,
27 writeShellScript,
28
29 config,
30
31 cudaSupport ? config.cudaSupport,
32 cudaPackages ? { },
33
34 rocmSupport ? config.rocmSupport,
35 rocmPackages ? { },
36 gpuTargets ? [ ],
37}:
38
39buildPythonPackage rec {
40 pname = "vllm";
41 version = "0.3.3";
42 format = "pyproject";
43
44 src = fetchFromGitHub {
45 owner = "vllm-project";
46 repo = pname;
47 rev = "v${version}";
48 hash = "sha256-LU5pCPVv+Ws9dL8oWL1sJGzwQKI1IFk2A1I6TP9gXL4=";
49 };
50
51 # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
52 PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport (
53 lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets
54 );
55
56 # xformers 0.0.23.post1 github release specifies its version as 0.0.24
57 #
58 # cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up
59 # like that in nixpkgs. Version upgrade is due to upstream shenanigans
60 # https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363
61 #
62 # hipcc --version works badly on NixOS due to unresolved paths.
63 postPatch =
64 ''
65 substituteInPlace requirements.txt \
66 --replace "xformers == 0.0.23.post1" "xformers == 0.0.24"
67 substituteInPlace requirements.txt \
68 --replace "cupy-cuda12x == 12.1.0" "cupy == 12.3.0"
69 substituteInPlace requirements-build.txt \
70 --replace "torch==2.1.2" "torch == 2.2.1"
71 substituteInPlace pyproject.toml \
72 --replace "torch == 2.1.2" "torch == 2.2.1"
73 substituteInPlace requirements.txt \
74 --replace "torch == 2.1.2" "torch == 2.2.1"
75 ''
76 + lib.optionalString rocmSupport ''
77 substituteInPlace setup.py \
78 --replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'"
79 '';
80
81 preBuild =
82 lib.optionalString cudaSupport ''
83 export CUDA_HOME=${cudaPackages.cuda_nvcc}
84 ''
85 + lib.optionalString rocmSupport ''
86 export ROCM_HOME=${rocmPackages.clr}
87 export PATH=$PATH:${rocmPackages.hipcc}
88 '';
89
90 nativeBuildInputs = [
91 ninja
92 packaging
93 setuptools
94 torch
95 wheel
96 which
97 ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];
98
99 buildInputs =
100 (lib.optionals cudaSupport (
101 with cudaPackages;
102 [
103 cuda_cudart # cuda_runtime.h, -lcudart
104 cuda_cccl.dev # <thrust/*>
105 libcusparse.dev # cusparse.h
106 libcublas.dev # cublas_v2.h
107 libcusolver # cusolverDn.h
108 ]
109 ))
110 ++ (lib.optionals rocmSupport (
111 with rocmPackages;
112 [
113 clr
114 rocthrust
115 rocprim
116 hipsparse
117 hipblas
118 ]
119 ));
120
121 propagatedBuildInputs =
122 [
123 psutil
124 ray
125 pandas
126 pyarrow
127 sentencepiece
128 numpy
129 torch
130 transformers
131 outlines
132 xformers
133 fastapi
134 uvicorn
135 pydantic
136 aioprometheus
137 ]
138 ++ uvicorn.optional-dependencies.standard
139 ++ aioprometheus.optional-dependencies.starlette
140 ++ lib.optionals cudaSupport [
141 pynvml
142 cupy
143 ];
144
145 pythonImportsCheck = [ "vllm" ];
146
147 meta = with lib; {
148 description = "A high-throughput and memory-efficient inference and serving engine for LLMs";
149 changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}";
150 homepage = "https://github.com/vllm-project/vllm";
151 license = licenses.asl20;
152 maintainers = with maintainers; [
153 happysalada
154 lach
155 ];
156 broken = !cudaSupport && !rocmSupport;
157 };
158}