Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at gcc-offload 183 lines 3.9 kB view raw
1{ 2 lib, 3 stdenv, 4 python, 5 buildPythonPackage, 6 pythonRelaxDepsHook, 7 fetchFromGitHub, 8 which, 9 ninja, 10 cmake, 11 packaging, 12 setuptools, 13 torch, 14 outlines, 15 wheel, 16 psutil, 17 ray, 18 pandas, 19 pyarrow, 20 sentencepiece, 21 numpy, 22 transformers, 23 xformers, 24 fastapi, 25 uvicorn, 26 pydantic, 27 aioprometheus, 28 pynvml, 29 openai, 30 pyzmq, 31 tiktoken, 32 torchvision, 33 py-cpuinfo, 34 lm-format-enforcer, 35 prometheus-fastapi-instrumentator, 36 cupy, 37 writeShellScript, 38 39 config, 40 41 cudaSupport ? config.cudaSupport, 42 cudaPackages ? { }, 43 44 # Has to be either rocm or cuda, default to the free one 45 rocmSupport ? !config.cudaSupport, 46 rocmPackages ? { }, 47 gpuTargets ? [ ], 48}@args: 49 50let 51 cutlass = fetchFromGitHub { 52 owner = "NVIDIA"; 53 repo = "cutlass"; 54 rev = "refs/tags/v3.5.0"; 55 sha256 = "sha256-D/s7eYsa5l/mfx73tE4mnFcTQdYqGmXa9d9TCryw4e4="; 56 }; 57in 58 59buildPythonPackage rec { 60 pname = "vllm"; 61 version = "0.6.2"; 62 pyproject = true; 63 64 stdenv = if cudaSupport then cudaPackages.backendStdenv else args.stdenv; 65 66 src = fetchFromGitHub { 67 owner = "vllm-project"; 68 repo = pname; 69 rev = "refs/tags/v${version}"; 70 hash = "sha256-zUkqAPPhDRdN9rDQ2biCl1B+trV0xIHXub++v9zsQGo="; 71 }; 72 73 patches = [ 74 ./0001-setup.py-don-t-ask-for-hipcc-version.patch 75 ./0002-setup.py-nix-support-respect-cmakeFlags.patch 76 ]; 77 78 # Ignore the python version check because it hard-codes minor versions and 79 # lags behind `ray`'s python interpreter support 80 postPatch = '' 81 substituteInPlace CMakeLists.txt \ 82 --replace-fail \ 83 'set(PYTHON_SUPPORTED_VERSIONS' \ 84 'set(PYTHON_SUPPORTED_VERSIONS "${lib.versions.majorMinor python.version}"' 85 ''; 86 87 nativeBuildInputs = [ 88 cmake 89 ninja 90 pythonRelaxDepsHook 91 which 92 ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ]; 93 94 build-system = [ 95 packaging 96 setuptools 97 wheel 98 ]; 99 100 buildInputs = 101 (lib.optionals cudaSupport ( 102 with cudaPackages; 103 [ 104 cuda_cudart # cuda_runtime.h, -lcudart 105 cuda_cccl 106 libcusparse # cusparse.h 107 libcusolver # cusolverDn.h 108 cuda_nvcc 109 cuda_nvtx 110 libcublas 111 ] 112 )) 113 ++ (lib.optionals rocmSupport ( 114 with rocmPackages; 115 [ 116 clr 117 rocthrust 118 rocprim 119 hipsparse 120 hipblas 121 ] 122 )); 123 124 dependencies = 125 [ 126 aioprometheus 127 fastapi 128 lm-format-enforcer 129 numpy 130 openai 131 outlines 132 pandas 133 prometheus-fastapi-instrumentator 134 psutil 135 py-cpuinfo 136 pyarrow 137 pydantic 138 pyzmq 139 ray 140 sentencepiece 141 tiktoken 142 torch 143 torchvision 144 transformers 145 uvicorn 146 xformers 147 ] 148 ++ uvicorn.optional-dependencies.standard 149 ++ aioprometheus.optional-dependencies.starlette 150 ++ lib.optionals cudaSupport [ 151 cupy 152 pynvml 153 ]; 154 155 dontUseCmakeConfigure = true; 156 cmakeFlags = [ (lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}") ]; 157 158 env = 159 lib.optionalAttrs cudaSupport { CUDA_HOME = "${lib.getDev cudaPackages.cuda_nvcc}"; } 160 // lib.optionalAttrs rocmSupport { 161 # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing. 162 PYTORCH_ROCM_ARCH = lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets; 163 ROCM_HOME = "${rocmPackages.clr}"; 164 }; 165 166 pythonRelaxDeps = true; 167 168 pythonImportsCheck = [ "vllm" ]; 169 170 meta = with lib; { 171 description = "High-throughput and memory-efficient inference and serving engine for LLMs"; 172 changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}"; 173 homepage = "https://github.com/vllm-project/vllm"; 174 license = licenses.asl20; 175 maintainers = with maintainers; [ 176 happysalada 177 lach 178 ]; 179 # RuntimeError: Unknown runtime environment 180 broken = true; 181 # broken = !cudaSupport && !rocmSupport; 182 }; 183}