pkgs/development/python-modules/vllm/default.nix at gcc-offload · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / pkgs / development / python-modules / vllm / default.nix
at gcc-offload 183 lines 3.9 kB view raw
  1{
  2  lib,
  3  stdenv,
  4  python,
  5  buildPythonPackage,
  6  pythonRelaxDepsHook,
  7  fetchFromGitHub,
  8  which,
  9  ninja,
 10  cmake,
 11  packaging,
 12  setuptools,
 13  torch,
 14  outlines,
 15  wheel,
 16  psutil,
 17  ray,
 18  pandas,
 19  pyarrow,
 20  sentencepiece,
 21  numpy,
 22  transformers,
 23  xformers,
 24  fastapi,
 25  uvicorn,
 26  pydantic,
 27  aioprometheus,
 28  pynvml,
 29  openai,
 30  pyzmq,
 31  tiktoken,
 32  torchvision,
 33  py-cpuinfo,
 34  lm-format-enforcer,
 35  prometheus-fastapi-instrumentator,
 36  cupy,
 37  writeShellScript,
 38
 39  config,
 40
 41  cudaSupport ? config.cudaSupport,
 42  cudaPackages ? { },
 43
 44  # Has to be either rocm or cuda, default to the free one
 45  rocmSupport ? !config.cudaSupport,
 46  rocmPackages ? { },
 47  gpuTargets ? [ ],
 48}@args:
 49
 50let
 51  cutlass = fetchFromGitHub {
 52    owner = "NVIDIA";
 53    repo = "cutlass";
 54    rev = "refs/tags/v3.5.0";
 55    sha256 = "sha256-D/s7eYsa5l/mfx73tE4mnFcTQdYqGmXa9d9TCryw4e4=";
 56  };
 57in
 58
 59buildPythonPackage rec {
 60  pname = "vllm";
 61  version = "0.6.2";
 62  pyproject = true;
 63
 64  stdenv = if cudaSupport then cudaPackages.backendStdenv else args.stdenv;
 65
 66  src = fetchFromGitHub {
 67    owner = "vllm-project";
 68    repo = pname;
 69    rev = "refs/tags/v${version}";
 70    hash = "sha256-zUkqAPPhDRdN9rDQ2biCl1B+trV0xIHXub++v9zsQGo=";
 71  };
 72
 73  patches = [
 74    ./0001-setup.py-don-t-ask-for-hipcc-version.patch
 75    ./0002-setup.py-nix-support-respect-cmakeFlags.patch
 76  ];
 77
 78  # Ignore the python version check because it hard-codes minor versions and
 79  # lags behind `ray`'s python interpreter support
 80  postPatch = ''
 81    substituteInPlace CMakeLists.txt \
 82      --replace-fail \
 83        'set(PYTHON_SUPPORTED_VERSIONS' \
 84        'set(PYTHON_SUPPORTED_VERSIONS "${lib.versions.majorMinor python.version}"'
 85  '';
 86
 87  nativeBuildInputs = [
 88    cmake
 89    ninja
 90    pythonRelaxDepsHook
 91    which
 92  ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];
 93
 94  build-system = [
 95    packaging
 96    setuptools
 97    wheel
 98  ];
 99
100  buildInputs =
101    (lib.optionals cudaSupport (
102      with cudaPackages;
103      [
104        cuda_cudart # cuda_runtime.h, -lcudart
105        cuda_cccl
106        libcusparse # cusparse.h
107        libcusolver # cusolverDn.h
108        cuda_nvcc
109        cuda_nvtx
110        libcublas
111      ]
112    ))
113    ++ (lib.optionals rocmSupport (
114      with rocmPackages;
115      [
116        clr
117        rocthrust
118        rocprim
119        hipsparse
120        hipblas
121      ]
122    ));
123
124  dependencies =
125    [
126      aioprometheus
127      fastapi
128      lm-format-enforcer
129      numpy
130      openai
131      outlines
132      pandas
133      prometheus-fastapi-instrumentator
134      psutil
135      py-cpuinfo
136      pyarrow
137      pydantic
138      pyzmq
139      ray
140      sentencepiece
141      tiktoken
142      torch
143      torchvision
144      transformers
145      uvicorn
146      xformers
147    ]
148    ++ uvicorn.optional-dependencies.standard
149    ++ aioprometheus.optional-dependencies.starlette
150    ++ lib.optionals cudaSupport [
151      cupy
152      pynvml
153    ];
154
155  dontUseCmakeConfigure = true;
156  cmakeFlags = [ (lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}") ];
157
158  env =
159    lib.optionalAttrs cudaSupport { CUDA_HOME = "${lib.getDev cudaPackages.cuda_nvcc}"; }
160    // lib.optionalAttrs rocmSupport {
161      # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
162      PYTORCH_ROCM_ARCH = lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets;
163      ROCM_HOME = "${rocmPackages.clr}";
164    };
165
166  pythonRelaxDeps = true;
167
168  pythonImportsCheck = [ "vllm" ];
169
170  meta = with lib; {
171    description = "High-throughput and memory-efficient inference and serving engine for LLMs";
172    changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}";
173    homepage = "https://github.com/vllm-project/vllm";
174    license = licenses.asl20;
175    maintainers = with maintainers; [
176      happysalada
177      lach
178    ];
179    # RuntimeError: Unknown runtime environment
180    broken = true;
181    # broken = !cudaSupport && !rocmSupport;
182  };
183}