python3Packages.warp-lang: fix CUDA build (#419750)

authored by Connor Baker and committed by GitHub c0874d92 a43007ac

+224 -80
+224 -80
pkgs/development/python-modules/warp-lang/default.nix
··· 1 { 2 config, 3 lib, 4 - stdenv, 5 - buildPythonPackage, 6 - fetchurl, 7 - fetchFromGitHub, 8 replaceVars, 9 - build, 10 setuptools, 11 - numpy, 12 - llvmPackages, 13 - cudaPackages, 14 - unittestCheckHook, 15 - jax, 16 torch, 17 - nix-update-script, 18 19 # Use standalone LLVM-based JIT compiler and CPU device support 20 standaloneSupport ? true, ··· 25 # Build Warp with MathDx support (requires CUDA support) 26 # Most linear-algebra tile operations like tile_cholesky(), tile_fft(), 27 # and tile_matmul() require Warp to be built with the MathDx library. 28 - libmathdxSupport ? cudaSupport && stdenv.hostPlatform.isLinux, 29 - }: 30 31 - let 32 version = "1.7.2.post1"; 33 34 - libmathdx = stdenv.mkDerivation (finalAttrs: { 35 pname = "libmathdx"; 36 - version = "0.2.0"; 37 38 src = 39 let 40 - inherit (stdenv.hostPlatform) system; 41 - selectSystem = attrs: attrs.${system} or (throw "Unsupported system: ${system}"); 42 - 43 - suffix = selectSystem { 44 - x86_64-linux = "Linux-x86_64"; 45 - aarch64-linux = "Linux-aarch64"; 46 - x86_64-windows = "win32-x86_64"; 47 - }; 48 - 49 - # nix-hash --type sha256 --to-sri $(nix-prefetch-url "https://...") 50 - hash = selectSystem { 51 - x86_64-linux = "sha256-Lk+PxWFvyQGRClFdmyuo4y7HBdR7pigOhMyEzajqbmg="; 52 - aarch64-linux = "sha256-6tH9YH98kSvDiut9rQEU5potEpeKqma/QtrCHLxwRLo="; 53 - x86_64-windows = "sha256-B8qwj7UzOXEDZh2oT3ip1qW0uqtygMsyfcbhh5Dgc8U="; 54 }; 55 in 56 - fetchurl { 57 - url = "https://developer.nvidia.com/downloads/compute/cublasdx/redist/cublasdx/libmathdx-${suffix}-${finalAttrs.version}.tar.gz"; 58 - inherit hash; 59 - }; 60 - 61 - unpackPhase = '' 62 - runHook preUnpack 63 64 - mkdir unpacked 65 - cd unpacked 66 - tar -xzf $src 67 - export sourceRoot=$(pwd) 68 - 69 - runHook postUnpack 70 - ''; 71 - 72 dontConfigure = true; 73 dontBuild = true; 74 75 installPhase = '' 76 runHook preInstall 77 78 - cp -rT "$sourceRoot" "$out" 79 80 runHook postInstall 81 ''; 82 83 meta = { 84 description = "library used to integrate cuBLASDx and cuFFTDx into Warp"; 85 sourceProvenance = with lib.sourceTypes; [ binaryNativeCode ]; 86 license = with lib.licenses; [ 87 # By downloading and using the software, you agree to fully ··· 104 # license: 105 mit 106 ]; 107 - platforms = with lib.platforms; linux ++ [ "x86_64-windows" ]; 108 maintainers = with lib.maintainers; [ yzx9 ]; 109 }; 110 }); ··· 113 pname = "warp-lang"; 114 inherit version; 115 pyproject = true; 116 117 src = fetchFromGitHub { 118 owner = "NVIDIA"; ··· 122 }; 123 124 patches = 125 - lib.optionals stdenv.hostPlatform.isDarwin [ 126 (replaceVars ./darwin-libcxx.patch { 127 LIBCXX_DEV = llvmPackages.libcxx.dev; 128 LIBCXX_LIB = llvmPackages.libcxx; ··· 140 ]; 141 142 postPatch = 143 - lib.optionalString (!stdenv.cc.isGNU) '' 144 - substituteInPlace warp/build_dll.py \ 145 - --replace-fail "g++" "${lib.getExe stdenv.cc}" 146 '' 147 # Broken tests on aarch64. Since unittest doesn't support disabling a 148 # single test, and pytest isn't compatible, we patch the test file directly 149 # instead. 150 # 151 # See: https://github.com/NVIDIA/warp/issues/552 152 - + lib.optionalString stdenv.hostPlatform.isAarch64 '' 153 - substituteInPlace warp/tests/test_fem.py \ 154 - --replace-fail "add_function_test(TestFem, \"test_integrate_gradient\", test_integrate_gradient, devices=devices)" "" 155 ''; 156 157 build-system = [ 158 - build 159 setuptools 160 ]; 161 ··· 163 numpy 164 ]; 165 166 - nativeBuildInputs = lib.optionals libmathdxSupport [ 167 - libmathdx 168 - cudaPackages.libcublas 169 - cudaPackages.libcufft 170 - cudaPackages.libnvjitlink 171 ]; 172 173 buildInputs = ··· 177 llvmPackages.libcxx 178 ] 179 ++ lib.optionals cudaSupport [ 180 - cudaPackages.cudatoolkit 181 cudaPackages.cuda_cudart 182 cudaPackages.cuda_nvcc 183 cudaPackages.cuda_nvrtc 184 ]; 185 186 preBuild = ··· 190 "--no_standalone" 191 ] 192 ++ lib.optionals cudaSupport [ 193 - "--cuda_path=${cudaPackages.cudatoolkit}" 194 ] 195 ++ lib.optionals libmathdxSupport [ 196 "--libmathdx" ··· 203 buildOptionString = lib.concatStringsSep " " buildOptions; 204 in 205 '' 206 - python build_lib.py ${buildOptionString} 207 ''; 208 209 pythonImportsCheck = [ 210 "warp" 211 ]; 212 213 - # Many unit tests fail with segfaults on aarch64-linux, especially in the sim 214 - # and grad modules. However, other functionality generally works, so we don't 215 - # mark the package as broken. 216 - # 217 - # See: https://www.github.com/NVIDIA/warp/issues/{356,372,552} 218 - doCheck = !(stdenv.hostPlatform.isAarch64 && stdenv.hostPlatform.isLinux); 219 220 - nativeCheckInputs = [ 221 - unittestCheckHook 222 - (jax.override { inherit cudaSupport; }) 223 - (torch.override { inherit cudaSupport; }) 224 225 - # # Disable paddlepaddle interop tests: malloc(): unaligned tcache chunk detected 226 - # (paddlepaddle.override { inherit cudaSupport; }) 227 - ]; 228 229 - preCheck = '' 230 - export WARP_CACHE_PATH=$(mktemp -d) # warp.config.kernel_cache_dir 231 - ''; 232 233 - passthru.updateScript = nix-update-script { }; 234 235 meta = { 236 description = "Python framework for high performance GPU simulation and graphics";
··· 1 { 2 + autoAddDriverRunpath, 3 + buildPythonPackage, 4 config, 5 + cudaPackages, 6 + fetchFromGitHub, 7 + fetchurl, 8 + jax, 9 lib, 10 + llvmPackages, 11 + numpy, 12 + pkgsBuildHost, 13 + python, 14 replaceVars, 15 + runCommand, 16 setuptools, 17 + stdenv, 18 torch, 19 + warp-lang, # Self-reference to this package for passthru.tests 20 + writableTmpDirAsHomeHook, 21 + writeShellApplication, 22 23 # Use standalone LLVM-based JIT compiler and CPU device support 24 standaloneSupport ? true, ··· 29 # Build Warp with MathDx support (requires CUDA support) 30 # Most linear-algebra tile operations like tile_cholesky(), tile_fft(), 31 # and tile_matmul() require Warp to be built with the MathDx library. 32 + # libmathdxSupport ? cudaSupport && stdenv.hostPlatform.isLinux, 33 + libmathdxSupport ? cudaSupport, 34 + }@args: 35 + assert libmathdxSupport -> cudaSupport; 36 + let 37 + effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else args.stdenv; 38 + stdenv = builtins.throw "Use effectiveStdenv instead of stdenv directly, as it may be replaced by cudaPackages.backendStdenv"; 39 40 version = "1.7.2.post1"; 41 42 + libmathdx = effectiveStdenv.mkDerivation (finalAttrs: { 43 + # NOTE: The version used should match the version Warp requires: 44 + # https://github.com/NVIDIA/warp/blob/4ad209076ce09668b18dedc74dce0d5cf8b9e409/deps/libmathdx-deps.packman.xml 45 pname = "libmathdx"; 46 + version = "0.1.2"; 47 + 48 + outputs = [ 49 + "out" 50 + "static" 51 + ]; 52 53 src = 54 let 55 + baseURL = "https://developer.download.nvidia.com/compute/cublasdx/redist/cublasdx"; 56 + name = lib.concatStringsSep "-" [ 57 + finalAttrs.pname 58 + "Linux" 59 + effectiveStdenv.hostPlatform.parsed.cpu.name 60 + finalAttrs.version 61 + ]; 62 + hashes = { 63 + aarch64-linux = "sha256-7HEXfzxPF62q/7pdZidj4eO09u588yxcpSu/bWot/9A="; 64 + x86_64-linux = "sha256-MImBFv+ooRSUqdL/YEe/bJIcVBnHMCk7SLS5eSeh0cQ="; 65 }; 66 in 67 + lib.mapNullable ( 68 + hash: 69 + fetchurl { 70 + inherit hash name; 71 + url = "${baseURL}/${name}.tar.gz"; 72 + } 73 + ) (hashes.${effectiveStdenv.hostPlatform.system} or null); 74 75 + dontUnpack = true; 76 dontConfigure = true; 77 dontBuild = true; 78 79 + # NOTE: The leading component is stripped because the 0.1.2 release is within the `libmathdx` directory. 80 installPhase = '' 81 runHook preInstall 82 83 + mkdir -p "$out" 84 + tar -xzf "$src" --strip-components=1 -C "$out" 85 + 86 + mkdir -p "$static" 87 + moveToOutput "lib/libmathdx_static.a" "$static" 88 89 runHook postInstall 90 ''; 91 92 meta = { 93 description = "library used to integrate cuBLASDx and cuFFTDx into Warp"; 94 + homepage = "https://developer.nvidia.com/cublasdx-downloads"; 95 sourceProvenance = with lib.sourceTypes; [ binaryNativeCode ]; 96 license = with lib.licenses; [ 97 # By downloading and using the software, you agree to fully ··· 114 # license: 115 mit 116 ]; 117 + platforms = [ 118 + "aarch64-linux" 119 + "x86_64-linux" 120 + ]; 121 maintainers = with lib.maintainers; [ yzx9 ]; 122 }; 123 }); ··· 126 pname = "warp-lang"; 127 inherit version; 128 pyproject = true; 129 + 130 + # TODO(@connorbaker): Some CUDA setup hook is failing when __structuredAttrs is false, 131 + # causing a bunch of missing math symbols (like expf) when linking against the static library 132 + # provided by NVCC. 133 + __structuredAttrs = true; 134 + 135 + stdenv = effectiveStdenv; 136 137 src = fetchFromGitHub { 138 owner = "NVIDIA"; ··· 142 }; 143 144 patches = 145 + lib.optionals effectiveStdenv.hostPlatform.isDarwin [ 146 (replaceVars ./darwin-libcxx.patch { 147 LIBCXX_DEV = llvmPackages.libcxx.dev; 148 LIBCXX_LIB = llvmPackages.libcxx; ··· 160 ]; 161 162 postPatch = 163 + # Patch build_dll.py to use our gencode flags rather than NVIDIA's very broad defaults. 164 + # NOTE: After 1.7.2, patching will need to be updated like this: 165 + # https://github.com/ConnorBaker/cuda-packages/blob/2fc8ba8c37acee427a94cdd1def55c2ec701ad82/pkgs/development/python-modules/warp/default.nix#L56-L65 166 + lib.optionalString cudaSupport '' 167 + nixLog "patching $PWD/warp/build_dll.py to use our gencode flags" 168 + substituteInPlace "$PWD/warp/build_dll.py" \ 169 + --replace-fail \ 170 + 'nvcc_opts = gencode_opts + [' \ 171 + 'nvcc_opts = [ ${ 172 + lib.concatMapStringsSep ", " (gencodeString: ''"${gencodeString}"'') cudaPackages.flags.gencode 173 + }, ' 174 + '' 175 + # Patch build_dll.py to use dynamic libraries rather than static ones. 176 + # NOTE: We do not patch the `nvptxcompiler_static` path because it is not available as a dynamic library. 177 + + lib.optionalString cudaSupport '' 178 + nixLog "patching $PWD/warp/build_dll.py to use dynamic libraries" 179 + substituteInPlace "$PWD/warp/build_dll.py" \ 180 + --replace-fail \ 181 + '-lcudart_static' \ 182 + '-lcudart' \ 183 + --replace-fail \ 184 + '-lnvrtc_static' \ 185 + '-lnvrtc' \ 186 + --replace-fail \ 187 + '-lnvrtc-builtins_static' \ 188 + '-lnvrtc-builtins' \ 189 + --replace-fail \ 190 + '-lnvJitLink_static' \ 191 + '-lnvJitLink' \ 192 + --replace-fail \ 193 + '-lmathdx_static' \ 194 + '-lmathdx' 195 + '' 196 + + '' 197 + nixLog "patching $PWD/warp/build_dll.py to use our C++ compiler" 198 + substituteInPlace "$PWD/warp/build_dll.py" \ 199 + --replace-fail "g++" "c++" 200 '' 201 # Broken tests on aarch64. Since unittest doesn't support disabling a 202 # single test, and pytest isn't compatible, we patch the test file directly 203 # instead. 204 # 205 # See: https://github.com/NVIDIA/warp/issues/552 206 + + lib.optionalString effectiveStdenv.hostPlatform.isAarch64 '' 207 + nixLog "patching $PWD/warp/tests/test_fem.py to disable broken tests on aarch64" 208 + substituteInPlace "$PWD/warp/tests/test_fem.py" \ 209 + --replace-fail \ 210 + 'add_function_test(TestFem, "test_integrate_gradient", test_integrate_gradient, devices=devices)' \ 211 + "" 212 + '' 213 + # These tests fail on CPU and CUDA. 214 + + '' 215 + nixLog "patching $PWD/warp/tests/test_reload.py to disable broken tests" 216 + substituteInPlace "$PWD/warp/tests/test_reload.py" \ 217 + --replace-fail \ 218 + 'add_function_test(TestReload, "test_reload", test_reload, devices=devices)' \ 219 + "" \ 220 + --replace-fail \ 221 + 'add_function_test(TestReload, "test_reload_references", test_reload_references, devices=get_test_devices("basic"))' \ 222 + "" 223 ''; 224 225 build-system = [ 226 setuptools 227 ]; 228 ··· 230 numpy 231 ]; 232 233 + # NOTE: While normally we wouldn't include autoAddDriverRunpath for packages built from source, since Warp 234 + # will be loading GPU drivers at runtime, we need to inject the path to our video drivers. 235 + nativeBuildInputs = lib.optionals cudaSupport [ 236 + autoAddDriverRunpath 237 + cudaPackages.cuda_nvcc 238 ]; 239 240 buildInputs = ··· 244 llvmPackages.libcxx 245 ] 246 ++ lib.optionals cudaSupport [ 247 + (lib.getOutput "static" cudaPackages.cuda_nvcc) # dependency on nvptxcompiler_static; no dynamic version available 248 + cudaPackages.cuda_cccl 249 cudaPackages.cuda_cudart 250 cudaPackages.cuda_nvcc 251 cudaPackages.cuda_nvrtc 252 + ] 253 + ++ lib.optionals libmathdxSupport [ 254 + libmathdx 255 + cudaPackages.libcublas 256 + cudaPackages.libcufft 257 + cudaPackages.libcusolver 258 + cudaPackages.libnvjitlink 259 ]; 260 261 preBuild = ··· 265 "--no_standalone" 266 ] 267 ++ lib.optionals cudaSupport [ 268 + # NOTE: The `cuda_path` argument is the directory which contains `bin/nvcc` (i.e., the bin output). 269 + "--cuda_path=${lib.getBin pkgsBuildHost.cudaPackages.cuda_nvcc}" 270 ] 271 ++ lib.optionals libmathdxSupport [ 272 "--libmathdx" ··· 279 buildOptionString = lib.concatStringsSep " " buildOptions; 280 in 281 '' 282 + nixLog "running $PWD/build_lib.py to create components necessary to build the wheel" 283 + "${python.pythonOnBuildForHost.interpreter}" "$PWD/build_lib.py" ${buildOptionString} 284 ''; 285 286 pythonImportsCheck = [ 287 "warp" 288 ]; 289 290 + # See passthru.tests. 291 + doCheck = false; 292 293 + passthru = { 294 + # Make libmathdx available for introspection. 295 + inherit libmathdx; 296 297 + # Scripts which provide test packages and implement test logic. 298 + testers.unit-tests = writeShellApplication { 299 + name = "warp-lang-unit-tests"; 300 + runtimeInputs = [ 301 + # Use the references from args 302 + (python.withPackages (_: [ 303 + warp-lang 304 + jax 305 + torch 306 + ])) 307 + # Disable paddlepaddle interop tests: malloc(): unaligned tcache chunk detected 308 + # (paddlepaddle.override { inherit cudaSupport; }) 309 + ]; 310 + text = '' 311 + python3 -m warp.tests 312 + ''; 313 + }; 314 315 + # Tests run within the Nix sandbox. 316 + tests = 317 + let 318 + mkUnitTests = 319 + { 320 + cudaSupport, 321 + libmathdxSupport, 322 + }: 323 + let 324 + name = 325 + "warp-lang-unit-tests-cpu" # CPU is baseline 326 + + lib.optionalString cudaSupport "-cuda" 327 + + lib.optionalString libmathdxSupport "-libmathdx"; 328 329 + warp-lang' = warp-lang.override { 330 + inherit cudaSupport libmathdxSupport; 331 + # Make sure the warp-lang provided through callPackage is replaced with the override we're making. 332 + warp-lang = warp-lang'; 333 + }; 334 + in 335 + runCommand name 336 + { 337 + nativeBuildInputs = [ 338 + warp-lang'.passthru.testers.unit-tests 339 + writableTmpDirAsHomeHook 340 + ]; 341 + requiredSystemFeatures = lib.optionals cudaSupport [ "cuda" ]; 342 + # Many unit tests fail with segfaults on aarch64-linux, especially in the sim 343 + # and grad modules. However, other functionality generally works, so we don't 344 + # mark the package as broken. 345 + # 346 + # See: https://www.github.com/NVIDIA/warp/issues/{356,372,552} 347 + meta.broken = effectiveStdenv.hostPlatform.isAarch64 && effectiveStdenv.hostPlatform.isLinux; 348 + } 349 + '' 350 + nixLog "running ${name}" 351 + 352 + if warp-lang-unit-tests; then 353 + nixLog "${name} passed" 354 + touch "$out" 355 + else 356 + nixErrorLog "${name} failed" 357 + exit 1 358 + fi 359 + ''; 360 + in 361 + { 362 + cpu = mkUnitTests { 363 + cudaSupport = false; 364 + libmathdxSupport = false; 365 + }; 366 + cuda = { 367 + cudaOnly = mkUnitTests { 368 + cudaSupport = true; 369 + libmathdxSupport = false; 370 + }; 371 + cudaWithLibmathDx = mkUnitTests { 372 + cudaSupport = true; 373 + libmathdxSupport = true; 374 + }; 375 + }; 376 + }; 377 + }; 378 379 meta = { 380 description = "Python framework for high performance GPU simulation and graphics";