nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at python-updates 232 lines 8.0 kB view raw
1{ 2 _cuda, 3 addDriverRunpath, 4 backendStdenv, 5 cmake, 6 cuda_cudart, 7 cuda_nvcc, 8 cuda_nvrtc, 9 cudaNamePrefix, 10 cudnn, 11 fetchFromGitHub, 12 flags, 13 gtest, 14 lib, 15 libcublas, 16 libcurand, 17 ninja, 18 python3Packages, 19 # Options 20 pythonSupport ? true, 21 enableF16C ? false, 22 enableTools ? false, 23 # passthru.updateScript 24 gitUpdater, 25}: 26let 27 inherit (_cuda.lib) _mkMetaBadPlatforms; 28 inherit (lib) licenses maintainers teams; 29 inherit (lib.asserts) assertMsg; 30 inherit (lib.attrsets) getBin; 31 inherit (lib.lists) all optionals; 32 inherit (lib.strings) 33 cmakeBool 34 cmakeFeature 35 optionalString 36 versionAtLeast 37 ; 38 inherit (lib.trivial) flip; 39in 40# TODO: Tests. 41assert assertMsg (!enableTools) "enableTools is not yet implemented"; 42backendStdenv.mkDerivation (finalAttrs: { 43 __structuredAttrs = true; 44 strictDeps = true; 45 46 # NOTE: Depends on the CUDA package set, so use cudaNamePrefix. 47 name = "${cudaNamePrefix}-${finalAttrs.pname}-${finalAttrs.version}"; 48 pname = "cutlass"; 49 version = "3.9.2"; 50 51 src = fetchFromGitHub { 52 owner = "NVIDIA"; 53 repo = "cutlass"; 54 tag = "v${finalAttrs.version}"; 55 hash = "sha256-teziPNA9csYvhkG5t2ht8W8x5+1YGGbHm8VKx4JoxgI="; 56 }; 57 58 # TODO: As a header-only library, we should make sure we have an `include` directory or similar which is not a 59 # superset of the `out` (`bin`) or `dev` outputs (whih is what the multiple-outputs setup hook does by default). 60 outputs = [ "out" ] ++ optionals pythonSupport [ "dist" ]; 61 62 nativeBuildInputs = [ 63 cuda_nvcc 64 cmake 65 ninja 66 python3Packages.python # Python is always required 67 ] 68 ++ optionals pythonSupport ( 69 with python3Packages; 70 [ 71 build 72 pythonOutputDistHook 73 setuptools 74 ] 75 ); 76 77 postPatch = 78 # Prepend some commands to the CUDA.cmake file so it can find the CUDA libraries using CMake's FindCUDAToolkit 79 # module. These target names are used throughout the project; I (@connorbaker) did not choose them. 80 '' 81 nixLog "patching CUDA.cmake to use FindCUDAToolkit" 82 mv ./CUDA.cmake ./_CUDA_Append.cmake 83 cat > ./_CUDA_Prepend.cmake <<'EOF' 84 find_package(CUDAToolkit REQUIRED) 85 foreach(_target cudart cuda_driver nvrtc) 86 if (NOT TARGET CUDA::''${_target}) 87 message(FATAL_ERROR "''${_target} Not Found") 88 endif() 89 message(STATUS "''${_target} library: ''${CUDA_''${_target}_LIBRARY}") 90 add_library(''${_target} ALIAS CUDA::''${_target}) 91 endforeach() 92 EOF 93 cat ./_CUDA_Prepend.cmake ./_CUDA_Append.cmake > ./CUDA.cmake 94 '' 95 # Patch cutlass to use the provided NVCC. 96 # '_CUDA_INSTALL_PATH = os.getenv("CUDA_INSTALL_PATH", _cuda_install_path_from_nvcc())' \ 97 # '_CUDA_INSTALL_PATH = "${getBin cuda_nvcc}"' 98 + '' 99 nixLog "patching python bindings to make cuda_install_path fail" 100 substituteInPlace ./python/cutlass/__init__.py \ 101 --replace-fail \ 102 'def cuda_install_path():' \ 103 ' 104 def cuda_install_path(): 105 raise RuntimeException("not supported with Nixpkgs CUDA packaging") 106 ' 107 '' 108 # Patch the python bindings to use environment variables set by Nixpkgs. 109 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L80 110 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L81 111 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L317 112 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L319 113 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L344 114 # https://github.com/NVIDIA/cutlass/blob/e94e888df3551224738bfa505787b515eae8352f/python/cutlass/backend/compiler.py#L360 115 + '' 116 nixLog "patching python bindings to use environment variables" 117 substituteInPlace ./python/cutlass/backend/compiler.py \ 118 --replace-fail \ 119 'self.include_paths = include_paths' \ 120 'self.include_paths = include_paths + [root + "/include" for root in os.getenv("CUDAToolkit_ROOT").split(";")]' \ 121 --replace-fail \ 122 'self.flags = flags' \ 123 'self.flags = flags + ["-L" + root + "/lib" for root in os.getenv("CUDAToolkit_ROOT").split(";")]' \ 124 --replace-fail \ 125 "\''${cuda_install_path}/bin/nvcc" \ 126 '${getBin cuda_nvcc}/bin/nvcc' \ 127 --replace-fail \ 128 '"cuda_install_path": cuda_install_path(),' \ 129 "" \ 130 --replace-fail \ 131 'f"{cuda_install_path()}/bin/nvcc"' \ 132 '"${getBin cuda_nvcc}/bin/nvcc"' \ 133 --replace-fail \ 134 'cuda_install_path() + "/include",' \ 135 "" 136 ''; 137 138 enableParallelBuilding = true; 139 140 buildInputs = [ 141 cuda_cudart 142 cuda_nvrtc 143 libcurand 144 ] 145 ++ optionals enableTools [ 146 cudnn 147 libcublas 148 ]; 149 150 cmakeFlags = [ 151 (cmakeFeature "CUTLASS_NVCC_ARCHS" flags.cmakeCudaArchitecturesString) 152 (cmakeBool "CUTLASS_ENABLE_EXAMPLES" false) 153 154 # Tests. 155 (cmakeBool "CUTLASS_ENABLE_TESTS" finalAttrs.doCheck) 156 (cmakeBool "CUTLASS_ENABLE_GTEST_UNIT_TESTS" finalAttrs.doCheck) 157 (cmakeBool "CUTLASS_USE_SYSTEM_GOOGLETEST" true) 158 159 # NOTE: Both CUDNN and CUBLAS can be used by the examples and the profiler. Since they are large dependencies, they 160 # are disabled by default. 161 (cmakeBool "CUTLASS_ENABLE_TOOLS" enableTools) 162 (cmakeBool "CUTLASS_ENABLE_CUBLAS" enableTools) 163 (cmakeBool "CUTLASS_ENABLE_CUDNN" enableTools) 164 165 # NOTE: Requires x86_64 and hardware support. 166 (cmakeBool "CUTLASS_ENABLE_F16C" enableF16C) 167 168 # TODO: Unity builds are supposed to reduce build time, but this seems to just reduce the number of tasks 169 # generated? 170 # NOTE: Good explanation of unity builds: 171 # https://www.methodpark.de/blog/how-to-speed-up-clang-tidy-with-unity-builds. 172 (cmakeBool "CUTLASS_UNITY_BUILD_ENABLED" false) 173 ]; 174 175 postBuild = lib.optionalString pythonSupport '' 176 pushd "$NIX_BUILD_TOP/$sourceRoot" 177 nixLog "building Python wheel" 178 pyproject-build \ 179 --no-isolation \ 180 --outdir "$NIX_BUILD_TOP/$sourceRoot/''${cmakeBuildDir:?}/dist/" \ 181 --wheel 182 popd >/dev/null 183 ''; 184 185 doCheck = false; 186 187 checkInputs = [ gtest ]; 188 189 # NOTE: Because the test cases immediately create and try to run the binaries, we don't have an opportunity 190 # to patch them with autoAddDriverRunpath. To get around this, we add the driver runpath to the environment. 191 # TODO: This would break Jetson when using cuda_compat, as it must come first. 192 preCheck = optionalString finalAttrs.doCheck '' 193 export LD_LIBRARY_PATH="$(readlink -mnv "${addDriverRunpath.driverLink}/lib")" 194 ''; 195 196 # This is *not* a derivation you want to build on a small machine. 197 requiredSystemFeatures = optionals finalAttrs.doCheck [ 198 "big-parallel" 199 "cuda" 200 ]; 201 202 passthru = { 203 updateScript = gitUpdater { 204 inherit (finalAttrs) pname version; 205 rev-prefix = "v"; 206 }; 207 # TODO: 208 # tests.test = cutlass.overrideAttrs { doCheck = true; }; 209 210 # Include required architectures in compatibility check. 211 # https://github.com/NVIDIA/cutlass/tree/main?tab=readme-ov-file#compatibility 212 platformAssertions = [ 213 { 214 message = "all capabilities are >= 7.0 (${builtins.toJSON flags.cudaCapabilities})"; 215 assertion = all (flip versionAtLeast "7.0") flags.cudaCapabilities; 216 } 217 ]; 218 }; 219 220 meta = { 221 description = "CUDA Templates for Linear Algebra Subroutines"; 222 homepage = "https://github.com/NVIDIA/cutlass"; 223 license = licenses.asl20; 224 platforms = [ 225 "aarch64-linux" 226 "x86_64-linux" 227 ]; 228 badPlatforms = _mkMetaBadPlatforms finalAttrs; 229 maintainers = [ maintainers.connorbaker ]; 230 teams = [ teams.cuda ]; 231 }; 232})