Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 fetchFromGitHub, 6 which, 7 ninja, 8 packaging, 9 setuptools, 10 torch, 11 outlines, 12 wheel, 13 psutil, 14 ray, 15 pandas, 16 pyarrow, 17 sentencepiece, 18 numpy, 19 transformers, 20 xformers, 21 fastapi, 22 uvicorn, 23 pydantic, 24 aioprometheus, 25 pynvml, 26 cupy, 27 writeShellScript, 28 29 config, 30 31 cudaSupport ? config.cudaSupport, 32 cudaPackages ? { }, 33 34 rocmSupport ? config.rocmSupport, 35 rocmPackages ? { }, 36 gpuTargets ? [ ], 37}: 38 39let 40 stdenv_pkg = stdenv; 41in 42 43buildPythonPackage rec { 44 pname = "vllm"; 45 version = "0.3.3"; 46 format = "pyproject"; 47 48 src = fetchFromGitHub { 49 owner = "vllm-project"; 50 repo = pname; 51 rev = "v${version}"; 52 hash = "sha256-LU5pCPVv+Ws9dL8oWL1sJGzwQKI1IFk2A1I6TP9gXL4="; 53 }; 54 55 # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing. 56 PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport ( 57 lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets 58 ); 59 60 # cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up 61 # like that in nixpkgs. Version upgrade is due to upstream shenanigans 62 # https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363 63 # 64 # hipcc --version works badly on NixOS due to unresolved paths. 65 # Unclear why pythonRelaxDeps doesn't work here, but on last attempt, it didn't. 66 postPatch = 67 '' 68 substituteInPlace requirements.txt \ 69 --replace "xformers == 0.0.23.post1" "xformers" 70 substituteInPlace requirements.txt \ 71 --replace "cupy-cuda12x == 12.1.0" "cupy" 72 substituteInPlace requirements-build.txt \ 73 --replace "torch==2.1.2" "torch" 74 substituteInPlace pyproject.toml \ 75 --replace "torch == 2.1.2" "torch" 76 substituteInPlace requirements.txt \ 77 --replace "torch == 2.1.2" "torch" 78 '' 79 + lib.optionalString rocmSupport '' 80 substituteInPlace setup.py \ 81 --replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'" 82 ''; 83 84 preBuild = 85 lib.optionalString cudaSupport '' 86 export CUDA_HOME=${cudaPackages.cuda_nvcc} 87 '' 88 + lib.optionalString rocmSupport '' 89 export ROCM_HOME=${rocmPackages.clr} 90 export PATH=$PATH:${rocmPackages.hipcc} 91 ''; 92 93 nativeBuildInputs = [ 94 ninja 95 packaging 96 setuptools 97 torch 98 wheel 99 which 100 ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ]; 101 102 buildInputs = 103 (lib.optionals cudaSupport ( 104 with cudaPackages; 105 [ 106 cuda_cudart # cuda_runtime.h, -lcudart 107 cuda_cccl # <thrust/*> 108 libcusparse # cusparse.h 109 libcublas # cublas_v2.h 110 libcusolver # cusolverDn.h 111 ] 112 )) 113 ++ (lib.optionals rocmSupport ( 114 with rocmPackages; 115 [ 116 clr 117 rocthrust 118 rocprim 119 hipsparse 120 hipblas 121 ] 122 )); 123 124 propagatedBuildInputs = 125 [ 126 psutil 127 ray 128 pandas 129 pyarrow 130 sentencepiece 131 numpy 132 torch 133 transformers 134 outlines 135 xformers 136 fastapi 137 uvicorn 138 pydantic 139 aioprometheus 140 ] 141 ++ uvicorn.optional-dependencies.standard 142 ++ aioprometheus.optional-dependencies.starlette 143 ++ lib.optionals cudaSupport [ 144 pynvml 145 cupy 146 ]; 147 148 stdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv_pkg; 149 150 pythonImportsCheck = [ "vllm" ]; 151 152 meta = with lib; { 153 description = "High-throughput and memory-efficient inference and serving engine for LLMs"; 154 changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}"; 155 homepage = "https://github.com/vllm-project/vllm"; 156 license = licenses.asl20; 157 maintainers = with maintainers; [ 158 happysalada 159 lach 160 ]; 161 broken = !cudaSupport && !rocmSupport; 162 }; 163}