pkgs/development/cuda-modules/packages/cutlass.nix at python-updates

tjh.dev / nixpkgs
fork atom
nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
fork atom
nixpkgs / pkgs / development / cuda-modules / packages / cutlass.nix
at python-updates 232 lines 8.0 kB view raw
wrap content
  1{
  2  _cuda,
  3  addDriverRunpath,
  4  backendStdenv,
  5  cmake,
  6  cuda_cudart,
  7  cuda_nvcc,
  8  cuda_nvrtc,
  9  cudaNamePrefix,
 10  cudnn,
 11  fetchFromGitHub,
 12  flags,
 13  gtest,
 14  lib,
 15  libcublas,
 16  libcurand,
 17  ninja,
 18  python3Packages,
 19  # Options
 20  pythonSupport ? true,
 21  enableF16C ? false,
 22  enableTools ? false,
 23  # passthru.updateScript
 24  gitUpdater,
 25}:
 26let
 27  inherit (_cuda.lib) _mkMetaBadPlatforms;
 28  inherit (lib) licenses maintainers teams;
 29  inherit (lib.asserts) assertMsg;
 30  inherit (lib.attrsets) getBin;
 31  inherit (lib.lists) all optionals;
 32  inherit (lib.strings)
 33    cmakeBool
 34    cmakeFeature
 35    optionalString
 36    versionAtLeast
 37    ;
 38  inherit (lib.trivial) flip;
 39in
 40# TODO: Tests.
 41assert assertMsg (!enableTools) "enableTools is not yet implemented";
 42backendStdenv.mkDerivation (finalAttrs: {
 43  __structuredAttrs = true;
 44  strictDeps = true;
 45
 46  # NOTE: Depends on the CUDA package set, so use cudaNamePrefix.
 47  name = "${cudaNamePrefix}-${finalAttrs.pname}-${finalAttrs.version}";
 48  pname = "cutlass";
 49  version = "3.9.2";
 50
 51  src = fetchFromGitHub {
 52    owner = "NVIDIA";
 53    repo = "cutlass";
 54    tag = "v${finalAttrs.version}";
 55    hash = "sha256-teziPNA9csYvhkG5t2ht8W8x5+1YGGbHm8VKx4JoxgI=";
 56  };
 57
 58  # TODO: As a header-only library, we should make sure we have an `include` directory or similar which is not a
 59  # superset of the `out` (`bin`) or `dev` outputs (whih is what the multiple-outputs setup hook does by default).
 60  outputs = [ "out" ] ++ optionals pythonSupport [ "dist" ];
 61
 62  nativeBuildInputs = [
 63    cuda_nvcc
 64    cmake
 65    ninja
 66    python3Packages.python # Python is always required
 67  ]
 68  ++ optionals pythonSupport (
 69    with python3Packages;
 70    [
 71      build
 72      pythonOutputDistHook
 73      setuptools
 74    ]
 75  );
 76
 77  postPatch =
 78    # Prepend some commands to the CUDA.cmake file so it can find the CUDA libraries using CMake's FindCUDAToolkit
 79    # module. These target names are used throughout the project; I (@connorbaker) did not choose them.
 80    ''
 81      nixLog "patching CUDA.cmake to use FindCUDAToolkit"
 82      mv ./CUDA.cmake ./_CUDA_Append.cmake
 83      cat > ./_CUDA_Prepend.cmake <<'EOF'
 84      find_package(CUDAToolkit REQUIRED)
 85      foreach(_target cudart cuda_driver nvrtc)
 86        if (NOT TARGET CUDA::''${_target})
 87          message(FATAL_ERROR "''${_target} Not Found")
 88        endif()
 89        message(STATUS "''${_target} library: ''${CUDA_''${_target}_LIBRARY}")
 90        add_library(''${_target} ALIAS CUDA::''${_target})
 91      endforeach()
 92      EOF
 93      cat ./_CUDA_Prepend.cmake ./_CUDA_Append.cmake > ./CUDA.cmake
 94    ''
 95    # Patch cutlass to use the provided NVCC.
 96    # '_CUDA_INSTALL_PATH = os.getenv("CUDA_INSTALL_PATH", _cuda_install_path_from_nvcc())' \
 97    # '_CUDA_INSTALL_PATH = "${getBin cuda_nvcc}"'
 98    + ''
 99      nixLog "patching python bindings to make cuda_install_path fail"
100      substituteInPlace ./python/cutlass/__init__.py \
101        --replace-fail \
102          'def cuda_install_path():' \
103      '
104      def cuda_install_path():
105          raise RuntimeException("not supported with Nixpkgs CUDA packaging")
106      '
107    ''
108    # Patch the python bindings to use environment variables set by Nixpkgs.
109    # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L80
110    # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L81
111    # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L317
112    # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L319
113    # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L344
114    # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L360
115    + ''
116      nixLog "patching python bindings to use environment variables"
117      substituteInPlace ./python/cutlass/backend/compiler.py \
118        --replace-fail \
119          'self.include_paths = include_paths' \
120          'self.include_paths = include_paths + [root + "/include" for root in os.getenv("CUDAToolkit_ROOT").split(";")]' \
121        --replace-fail \
122          'self.flags = flags' \
123          'self.flags = flags + ["-L" + root + "/lib" for root in os.getenv("CUDAToolkit_ROOT").split(";")]' \
124        --replace-fail \
125          "\''${cuda_install_path}/bin/nvcc" \
126          '${getBin cuda_nvcc}/bin/nvcc' \
127        --replace-fail \
128          '"cuda_install_path": cuda_install_path(),' \
129          "" \
130        --replace-fail \
131          'f"{cuda_install_path()}/bin/nvcc"' \
132          '"${getBin cuda_nvcc}/bin/nvcc"' \
133        --replace-fail \
134          'cuda_install_path() + "/include",' \
135          ""
136    '';
137
138  enableParallelBuilding = true;
139
140  buildInputs = [
141    cuda_cudart
142    cuda_nvrtc
143    libcurand
144  ]
145  ++ optionals enableTools [
146    cudnn
147    libcublas
148  ];
149
150  cmakeFlags = [
151    (cmakeFeature "CUTLASS_NVCC_ARCHS" flags.cmakeCudaArchitecturesString)
152    (cmakeBool "CUTLASS_ENABLE_EXAMPLES" false)
153
154    # Tests.
155    (cmakeBool "CUTLASS_ENABLE_TESTS" finalAttrs.doCheck)
156    (cmakeBool "CUTLASS_ENABLE_GTEST_UNIT_TESTS" finalAttrs.doCheck)
157    (cmakeBool "CUTLASS_USE_SYSTEM_GOOGLETEST" true)
158
159    # NOTE: Both CUDNN and CUBLAS can be used by the examples and the profiler. Since they are large dependencies, they
160    #       are disabled by default.
161    (cmakeBool "CUTLASS_ENABLE_TOOLS" enableTools)
162    (cmakeBool "CUTLASS_ENABLE_CUBLAS" enableTools)
163    (cmakeBool "CUTLASS_ENABLE_CUDNN" enableTools)
164
165    # NOTE: Requires x86_64 and hardware support.
166    (cmakeBool "CUTLASS_ENABLE_F16C" enableF16C)
167
168    # TODO: Unity builds are supposed to reduce build time, but this seems to just reduce the number of tasks
169    # generated?
170    # NOTE: Good explanation of unity builds:
171    #       https://www.methodpark.de/blog/how-to-speed-up-clang-tidy-with-unity-builds.
172    (cmakeBool "CUTLASS_UNITY_BUILD_ENABLED" false)
173  ];
174
175  postBuild = lib.optionalString pythonSupport ''
176    pushd "$NIX_BUILD_TOP/$sourceRoot"
177    nixLog "building Python wheel"
178    pyproject-build \
179      --no-isolation \
180      --outdir "$NIX_BUILD_TOP/$sourceRoot/''${cmakeBuildDir:?}/dist/" \
181      --wheel
182    popd >/dev/null
183  '';
184
185  doCheck = false;
186
187  checkInputs = [ gtest ];
188
189  # NOTE: Because the test cases immediately create and try to run the binaries, we don't have an opportunity
190  # to patch them with autoAddDriverRunpath. To get around this, we add the driver runpath to the environment.
191  # TODO: This would break Jetson when using cuda_compat, as it must come first.
192  preCheck = optionalString finalAttrs.doCheck ''
193    export LD_LIBRARY_PATH="$(readlink -mnv "${addDriverRunpath.driverLink}/lib")"
194  '';
195
196  # This is *not* a derivation you want to build on a small machine.
197  requiredSystemFeatures = optionals finalAttrs.doCheck [
198    "big-parallel"
199    "cuda"
200  ];
201
202  passthru = {
203    updateScript = gitUpdater {
204      inherit (finalAttrs) pname version;
205      rev-prefix = "v";
206    };
207    # TODO:
208    # tests.test = cutlass.overrideAttrs { doCheck = true; };
209
210    # Include required architectures in compatibility check.
211    # https://github.com/NVIDIA/cutlass/tree/main?tab=readme-ov-file#compatibility
212    platformAssertions = [
213      {
214        message = "all capabilities are >= 7.0 (${builtins.toJSON flags.cudaCapabilities})";
215        assertion = all (flip versionAtLeast "7.0") flags.cudaCapabilities;
216      }
217    ];
218  };
219
220  meta = {
221    description = "CUDA Templates for Linear Algebra Subroutines";
222    homepage = "https://github.com/NVIDIA/cutlass";
223    license = licenses.asl20;
224    platforms = [
225      "aarch64-linux"
226      "x86_64-linux"
227    ];
228    badPlatforms = _mkMetaBadPlatforms finalAttrs;
229    maintainers = [ maintainers.connorbaker ];
230    teams = [ teams.cuda ];
231  };
232})