at 24.11-pre 3.8 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 fetchpatch, 6 which, 7 ninja, 8 packaging, 9 setuptools, 10 torch, 11 outlines, 12 wheel, 13 psutil, 14 ray, 15 pandas, 16 pyarrow, 17 sentencepiece, 18 numpy, 19 transformers, 20 xformers, 21 fastapi, 22 uvicorn, 23 pydantic, 24 aioprometheus, 25 pynvml, 26 cupy, 27 writeShellScript, 28 29 config, 30 31 cudaSupport ? config.cudaSupport, 32 cudaPackages ? { }, 33 34 rocmSupport ? config.rocmSupport, 35 rocmPackages ? { }, 36 gpuTargets ? [ ], 37}: 38 39buildPythonPackage rec { 40 pname = "vllm"; 41 version = "0.3.3"; 42 format = "pyproject"; 43 44 src = fetchFromGitHub { 45 owner = "vllm-project"; 46 repo = pname; 47 rev = "v${version}"; 48 hash = "sha256-LU5pCPVv+Ws9dL8oWL1sJGzwQKI1IFk2A1I6TP9gXL4="; 49 }; 50 51 # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing. 52 PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport ( 53 lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets 54 ); 55 56 # xformers 0.0.23.post1 github release specifies its version as 0.0.24 57 # 58 # cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up 59 # like that in nixpkgs. Version upgrade is due to upstream shenanigans 60 # https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363 61 # 62 # hipcc --version works badly on NixOS due to unresolved paths. 63 postPatch = 64 '' 65 substituteInPlace requirements.txt \ 66 --replace "xformers == 0.0.23.post1" "xformers == 0.0.24" 67 substituteInPlace requirements.txt \ 68 --replace "cupy-cuda12x == 12.1.0" "cupy == 12.3.0" 69 substituteInPlace requirements-build.txt \ 70 --replace "torch==2.1.2" "torch == 2.2.1" 71 substituteInPlace pyproject.toml \ 72 --replace "torch == 2.1.2" "torch == 2.2.1" 73 substituteInPlace requirements.txt \ 74 --replace "torch == 2.1.2" "torch == 2.2.1" 75 '' 76 + lib.optionalString rocmSupport '' 77 substituteInPlace setup.py \ 78 --replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'" 79 ''; 80 81 preBuild = 82 lib.optionalString cudaSupport '' 83 export CUDA_HOME=${cudaPackages.cuda_nvcc} 84 '' 85 + lib.optionalString rocmSupport '' 86 export ROCM_HOME=${rocmPackages.clr} 87 export PATH=$PATH:${rocmPackages.hipcc} 88 ''; 89 90 nativeBuildInputs = [ 91 ninja 92 packaging 93 setuptools 94 torch 95 wheel 96 which 97 ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ]; 98 99 buildInputs = 100 (lib.optionals cudaSupport ( 101 with cudaPackages; 102 [ 103 cuda_cudart # cuda_runtime.h, -lcudart 104 cuda_cccl.dev # <thrust/*> 105 libcusparse.dev # cusparse.h 106 libcublas.dev # cublas_v2.h 107 libcusolver # cusolverDn.h 108 ] 109 )) 110 ++ (lib.optionals rocmSupport ( 111 with rocmPackages; 112 [ 113 clr 114 rocthrust 115 rocprim 116 hipsparse 117 hipblas 118 ] 119 )); 120 121 propagatedBuildInputs = 122 [ 123 psutil 124 ray 125 pandas 126 pyarrow 127 sentencepiece 128 numpy 129 torch 130 transformers 131 outlines 132 xformers 133 fastapi 134 uvicorn 135 pydantic 136 aioprometheus 137 ] 138 ++ uvicorn.optional-dependencies.standard 139 ++ aioprometheus.optional-dependencies.starlette 140 ++ lib.optionals cudaSupport [ 141 pynvml 142 cupy 143 ]; 144 145 pythonImportsCheck = [ "vllm" ]; 146 147 meta = with lib; { 148 description = "A high-throughput and memory-efficient inference and serving engine for LLMs"; 149 changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}"; 150 homepage = "https://github.com/vllm-project/vllm"; 151 license = licenses.asl20; 152 maintainers = with maintainers; [ 153 happysalada 154 lach 155 ]; 156 broken = !cudaSupport && !rocmSupport; 157 }; 158}