Merge pull request #271078 from SomeoneSerge/feat/torch-propagated-cuda

cudaPackages.setupCudaHook: propagate deps and the hook

authored by Connor Baker and committed by GitHub c94fdf82 e816589e

+150 -90
+3 -17
pkgs/development/compilers/cudatoolkit/extension.nix
··· 47 47 ./hooks/mark-for-cudatoolkit-root-hook.sh) 48 48 { }); 49 49 50 - # Normally propagated by cuda_nvcc or cudatoolkit through their depsHostHostPropagated 50 + # Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly 51 51 setupCudaHook = (final.callPackage 52 52 ({ makeSetupHook, backendStdenv }: 53 53 makeSetupHook 54 54 { 55 55 name = "setup-cuda-hook"; 56 56 57 + substitutions.setupCudaHook = placeholder "out"; 58 + 57 59 # Point NVCC at a compatible compiler 58 60 substitutions.ccRoot = "${backendStdenv.cc}"; 59 61 60 62 # Required in addition to ccRoot as otherwise bin/gcc is looked up 61 63 # when building CMakeCUDACompilerId.cu 62 64 substitutions.ccFullPath = "${backendStdenv.cc}/bin/${backendStdenv.cc.targetPrefix}c++"; 63 - 64 - # Required by cmake's enable_language(CUDA) to build a test program 65 - # When implementing cross-compilation support: this is 66 - # final.pkgs.targetPackages.cudaPackages.cuda_cudart 67 - # Given the multiple-outputs each CUDA redist has, we can specify the exact components we 68 - # need from the package. CMake requires: 69 - # - the cuda_runtime.h header, which is in the dev output 70 - # - the dynamic library, which is in the lib output 71 - # - the static library, which is in the static output 72 - substitutions.cudartFlags = let cudart = final.cuda_cudart; in 73 - builtins.concatStringsSep " " (final.lib.optionals (final ? cuda_cudart) ([ 74 - "-I${final.lib.getDev cudart}/include" 75 - "-L${final.lib.getLib cudart}/lib" 76 - ] ++ final.lib.optionals (builtins.elem "static" cudart.outputs) [ 77 - "-L${cudart.static}/lib" 78 - ])); 79 65 } 80 66 ./hooks/setup-cuda-hook.sh) 81 67 { });
+7 -1
pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh
··· 1 1 # shellcheck shell=bash 2 2 3 + # Should we mimick cc-wrapper's "hygiene"? 4 + [[ -z ${strictDeps-} ]] || (( "$hostOffset" < 0 )) || return 0 5 + 6 + echo "Sourcing mark-for-cudatoolkit-root-hook" >&2 7 + 3 8 markForCUDAToolkit_ROOT() { 4 9 mkdir -p "${prefix}/nix-support" 5 - touch "${prefix}/nix-support/include-in-cudatoolkit-root" 10 + [[ -f "${prefix}/nix-support/include-in-cudatoolkit-root" ]] && return 11 + echo "$pname-$output" > "${prefix}/nix-support/include-in-cudatoolkit-root" 6 12 } 7 13 8 14 fixupOutputHooks+=(markForCUDAToolkit_ROOT)
-5
pkgs/development/compilers/cudatoolkit/hooks/nvcc-setup-hook.sh
··· 1 - # shellcheck shell=bash 2 - 3 - # CMake's enable_language(CUDA) runs a compiler test and it doesn't account for 4 - # CUDAToolkit_ROOT. We have to help it locate libcudart 5 - export NVCC_APPEND_FLAGS+=" -L@cudartLib@/lib -L@cudartStatic@/lib -I@cudartInclude@/include"
+84 -17
pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
··· 3 3 # Only run the hook from nativeBuildInputs 4 4 (( "$hostOffset" == -1 && "$targetOffset" == 0)) || return 0 5 5 6 - echo Sourcing setup-cuda-hook >&2 6 + guard=Sourcing 7 + reason= 8 + 9 + [[ -n ${cudaSetupHookOnce-} ]] && guard=Skipping && reason=" because the hook has been propagated more than once" 10 + 11 + if (( "${NIX_DEBUG:-0}" >= 1 )) ; then 12 + echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset setupCudaHook$reason" >&2 13 + else 14 + echo "$guard setup-cuda-hook$reason" >&2 15 + fi 16 + 17 + [[ "$guard" = Sourcing ]] || return 0 18 + 19 + declare -g cudaSetupHookOnce=1 20 + declare -Ag cudaHostPathsSeen=() 21 + declare -Ag cudaOutputToPath=() 22 + 23 + extendcudaHostPathsSeen() { 24 + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "extendcudaHostPathsSeen $1" >&2 25 + 26 + local markerPath="$1/nix-support/include-in-cudatoolkit-root" 27 + [[ ! -f "${markerPath}" ]] && return 28 + [[ -v cudaHostPathsSeen[$1] ]] && return 29 + 30 + cudaHostPathsSeen["$1"]=1 31 + 32 + # E.g. cuda_cudart-lib 33 + local cudaOutputName 34 + read -r cudaOutputName < "$markerPath" 35 + 36 + [[ -z "$cudaOutputName" ]] && return 37 + 38 + local oldPath="${cudaOutputToPath[$cudaOutputName]-}" 39 + [[ -n "$oldPath" ]] && echo "extendcudaHostPathsSeen: warning: overwriting $cudaOutputName from $oldPath to $1" >&2 40 + cudaOutputToPath["$cudaOutputName"]="$1" 41 + } 42 + addEnvHooks "$targetOffset" extendcudaHostPathsSeen 7 43 8 - extendCUDAToolkit_ROOT() { 9 - if [[ -f "$1/nix-support/include-in-cudatoolkit-root" ]] ; then 10 - addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$1" 44 + setupCUDAToolkit_ROOT() { 45 + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "setupCUDAToolkit_ROOT: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 11 46 12 - if [[ -d "$1/include" ]] ; then 13 - addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$1/include" 47 + for path in "${!cudaHostPathsSeen[@]}" ; do 48 + addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path" 49 + if [[ -d "$path/include" ]] ; then 50 + addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$path/include" 14 51 fi 15 - fi 52 + done 53 + 54 + export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT" 16 55 } 17 - 18 - addEnvHooks "$targetOffset" extendCUDAToolkit_ROOT 56 + preConfigureHooks+=(setupCUDAToolkit_ROOT) 19 57 20 58 setupCUDAToolkitCompilers() { 21 59 echo Executing setupCUDAToolkitCompilers >&2 ··· 58 96 59 97 # CMake's enable_language(CUDA) runs a compiler test and it doesn't account for 60 98 # CUDAToolkit_ROOT. We have to help it locate libcudart 61 - local cudartFlags="@cudartFlags@" 62 - if [[ -z "${nvccDontPrependCudartFlags-}" ]] && [[ -n "${cudartFlags:-}" ]] ; then 63 - export NVCC_APPEND_FLAGS+=" $cudartFlags" 99 + if [[ -z "${nvccDontPrependCudartFlags-}" ]] ; then 100 + if [[ ! -v cudaOutputToPath["cuda_cudart-out"] ]] ; then 101 + echo "setupCUDAToolkitCompilers: missing cudaPackages.cuda_cudart. This may become an an error in the future" >&2 102 + # exit 1 103 + fi 104 + for pkg in "${!cudaOutputToPath[@]}" ; do 105 + [[ ! "$pkg" = cuda_cudart* ]] && continue 106 + 107 + local path="${cudaOutputToPath[$pkg]}" 108 + if [[ -d "$path/include" ]] ; then 109 + export NVCC_PREPEND_FLAGS+=" -I$path/include" 110 + fi 111 + if [[ -d "$path/lib" ]] ; then 112 + export NVCC_PREPEND_FLAGS+=" -L$path/lib" 113 + fi 114 + done 64 115 fi 65 116 } 117 + preConfigureHooks+=(setupCUDAToolkitCompilers) 66 118 67 - setupCMakeCUDAToolkit_ROOT() { 68 - export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT" 69 - } 119 + propagateCudaLibraries() { 120 + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "propagateCudaLibraries: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 121 + 122 + [[ -z "${cudaPropagateToOutput-}" ]] && return 123 + 124 + mkdir -p "${!cudaPropagateToOutput}/nix-support" 125 + # One'd expect this should be propagated-bulid-build-deps, but that doesn't seem to work 126 + echo "@setupCudaHook@" >> "${!cudaPropagateToOutput}/nix-support/propagated-native-build-inputs" 127 + 128 + local propagatedBuildInputs=( "${!cudaHostPathsSeen[@]}" ) 129 + for output in $(getAllOutputNames) ; do 130 + if [[ ! "$output" = "$cudaPropagateToOutput" ]] ; then 131 + propagatedBuildInputs+=( "${!output}" ) 132 + fi 133 + break 134 + done 70 135 71 - postHooks+=(setupCUDAToolkitCompilers) 72 - preConfigureHooks+=(setupCMakeCUDAToolkit_ROOT) 136 + # One'd expect this should be propagated-host-host-deps, but that doesn't seem to work 137 + printWords "${propagatedBuildInputs[@]}" >> "${!cudaPropagateToOutput}/nix-support/propagated-build-inputs" 138 + } 139 + postFixupHooks+=(propagateCudaLibraries)
+1 -1
pkgs/development/libraries/cctag/default.nix
··· 49 49 buildInputs = [ 50 50 boost179 51 51 eigen 52 - opencv 52 + opencv.cxxdev 53 53 ]; 54 54 55 55 # Tests are broken on Darwin (linking issue)
+5
pkgs/development/libraries/nvidia-optical-flow-sdk/default.nix
··· 18 18 cp -R * $out/include 19 19 ''; 20 20 21 + postFixup = '' 22 + mkdir -p $out/nix-support 23 + echo $pname >> "$out/nix-support/include-in-cudatoolkit-root" 24 + ''; 25 + 21 26 meta = with lib; { 22 27 description = "Nvidia optical flow headers for computing the relative motion of pixels between images"; 23 28 homepage = "https://developer.nvidia.com/opticalflow-sdk";
+25 -10
pkgs/development/libraries/opencv/4.x.nix
··· 247 247 248 248 outputs = [ 249 249 "out" 250 + "cxxdev" 250 251 "package_tests" 251 252 ]; 253 + cudaPropagateToOutput = "cxxdev"; 252 254 253 255 postUnpack = lib.optionalString buildContrib '' 254 256 cp --no-preserve=mode -r "${contribSrc}/modules" "$NIX_BUILD_TOP/source/opencv_contrib" ··· 328 330 bzip2 AVFoundation Cocoa VideoDecodeAcceleration CoreMedia MediaToolbox Accelerate 329 331 ] 330 332 ++ lib.optionals enableDocs [ doxygen graphviz-nox ] 331 - ++ lib.optionals enableCuda (with cudaPackages; [ 332 - cuda_cudart 333 - cuda_cccl # <thrust/*> 334 - libnpp # npp.h 333 + ++ lib.optionals enableCuda (with cudaPackages; [ 334 + cuda_cudart.lib 335 + cuda_cudart.dev 336 + cuda_cccl.dev # <thrust/*> 337 + libnpp.dev # npp.h 338 + libnpp.lib 339 + libnpp.static 340 + nvidia-optical-flow-sdk 335 341 ] ++ lib.optionals enableCublas [ 336 - libcublas # cublas_v2.h 342 + # May start using the default $out instead once 343 + # https://github.com/NixOS/nixpkgs/issues/271792 344 + # has been addressed 345 + libcublas.static 346 + libcublas.lib 347 + libcublas.dev # cublas_v2.h 337 348 ] ++ lib.optionals enableCudnn [ 338 - cudnn # cudnn.h 349 + cudnn.dev # cudnn.h 350 + cudnn.lib 351 + cudnn.static 339 352 ] ++ lib.optionals enableCufft [ 340 - libcufft # cufft.h 341 - ]); 353 + libcufft.dev # cufft.h 354 + libcufft.lib 355 + libcufft.static 356 + ]); 342 357 343 - propagatedBuildInputs = lib.optional enablePython pythonPackages.numpy 344 - ++ lib.optionals enableCuda [ nvidia-optical-flow-sdk ]; 358 + propagatedBuildInputs = lib.optionals enablePython [ pythonPackages.numpy ]; 345 359 346 360 nativeBuildInputs = [ cmake pkg-config unzip ] 347 361 ++ lib.optionals enablePython [ ··· 458 472 postInstall = '' 459 473 sed -i "s|{exec_prefix}/$out|{exec_prefix}|;s|{prefix}/$out|{prefix}|" \ 460 474 "$out/lib/pkgconfig/opencv4.pc" 475 + mkdir $cxxdev 461 476 '' 462 477 # install python distribution information, so other packages can `import opencv` 463 478 + lib.optionalString enablePython ''
+2 -1
pkgs/development/libraries/openvino/default.nix
··· 122 122 "-DENABLE_CPPLINT:BOOL=OFF" 123 123 "-DBUILD_TESTING:BOOL=OFF" 124 124 "-DENABLE_SAMPLES:BOOL=OFF" 125 + (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true) 125 126 ]; 126 127 127 128 env.NIX_CFLAGS_COMPILE = lib.optionalString stdenv.isAarch64 "-Wno-narrowing"; ··· 133 134 buildInputs = [ 134 135 libusb1 135 136 libxml2 136 - opencv 137 + opencv.cxxdev 137 138 protobuf 138 139 pugixml 139 140 tbb
+14 -3
pkgs/development/python-modules/torch/default.nix
··· 134 134 "out" # output standard python package 135 135 "dev" # output libtorch headers 136 136 "lib" # output libtorch libraries 137 + "cxxdev" # propagated deps for the cmake consumers of torch 137 138 ]; 139 + cudaPropagateToOutput = "cxxdev"; 138 140 139 141 src = fetchFromGitHub { 140 142 owner = "pytorch"; ··· 339 341 cuda_cccl.dev # <thrust/*> 340 342 cuda_cudart.dev # cuda_runtime.h and libraries 341 343 cuda_cudart.lib 344 + cuda_cudart.static 342 345 cuda_cupti.dev # For kineto 343 346 cuda_cupti.lib # For kineto 344 347 cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too ··· 371 374 ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ] 372 375 ++ lib.optionals (cudaSupport || rocmSupport) [ effectiveMagma ] 373 376 ++ lib.optionals stdenv.isLinux [ numactl ] 374 - ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ]; 377 + ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ] 378 + ++ lib.optionals tritonSupport [ openai-triton ] 379 + ++ lib.optionals MPISupport [ mpi ] 380 + ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; 375 381 376 382 propagatedBuildInputs = [ 377 383 cffi ··· 392 398 393 399 # torch/csrc requires `pybind11` at runtime 394 400 pybind11 401 + ] ++ lib.optionals tritonSupport [ openai-triton ]; 402 + 403 + propagatedCxxBuildInputs = [ 395 404 ] 396 - ++ lib.optionals tritonSupport [ openai-triton ] 397 405 ++ lib.optionals MPISupport [ mpi ] 398 406 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; 399 407 ··· 454 462 --replace "/build/source/torch/include" "$dev/include" 455 463 ''; 456 464 457 - postFixup = lib.optionalString stdenv.isDarwin '' 465 + postFixup = '' 466 + mkdir -p "$cxxdev/nix-support" 467 + printWords "''${propagatedCxxBuildInputs[@]}" >> "$cxxdev/nix-support/propagated-build-inputs" 468 + '' + lib.optionalString stdenv.isDarwin '' 458 469 for f in $(ls $lib/lib/*.dylib); do 459 470 install_name_tool -id $lib/lib/$(basename $f) $f || true 460 471 done
+1 -11
pkgs/development/python-modules/torchaudio/default.nix
··· 60 60 ffmpeg-full 61 61 pybind11 62 62 sox 63 - ] ++ lib.optionals cudaSupport [ 64 - cudaPackages.libcurand.dev 65 - cudaPackages.libcurand.lib 66 - cudaPackages.cuda_cudart # cuda_runtime.h and libraries 67 - cudaPackages.cuda_cccl.dev # <thrust/*> 68 - cudaPackages.cuda_nvtx.dev 69 - cudaPackages.cuda_nvtx.lib # -llibNVToolsExt 70 - cudaPackages.libcublas.dev 71 - cudaPackages.libcublas.lib 72 - cudaPackages.libcufft.dev 73 - cudaPackages.libcufft.lib 63 + torch.cxxdev 74 64 ]; 75 65 76 66 propagatedBuildInputs = [
+8 -24
pkgs/development/python-modules/torchvision/default.nix
··· 17 17 inherit (torch) cudaCapabilities cudaPackages cudaSupport; 18 18 inherit (cudaPackages) backendStdenv cudaVersion; 19 19 20 - # NOTE: torchvision doesn't use cudnn; torch does! 21 - # For this reason it is not included. 22 - cuda-common-redist = with cudaPackages; [ 23 - cuda_cccl # <thrust/*> 24 - libcublas # cublas_v2.h 25 - libcusolver # cusolverDn.h 26 - libcusparse # cusparse.h 27 - ]; 28 - 29 - cuda-native-redist = symlinkJoin { 30 - name = "cuda-native-redist-${cudaVersion}"; 31 - paths = with cudaPackages; [ 32 - cuda_cudart # cuda_runtime.h 33 - cuda_nvcc 34 - ] ++ cuda-common-redist; 35 - }; 36 - 37 - cuda-redist = symlinkJoin { 38 - name = "cuda-redist-${cudaVersion}"; 39 - paths = cuda-common-redist; 40 - }; 41 - 42 20 pname = "torchvision"; 43 21 version = "0.16.1"; 44 22 in ··· 52 30 hash = "sha256-TsYBDtedTQ3+F3LM4JwzkGH2XOr0WSp1Au5YoR07rSA="; 53 31 }; 54 32 55 - nativeBuildInputs = [ libpng ninja which ] ++ lib.optionals cudaSupport [ cuda-native-redist ]; 33 + nativeBuildInputs = [ 34 + libpng 35 + ninja 36 + which 37 + ] ++ lib.optionals cudaSupport [ 38 + cudaPackages.cuda_nvcc 39 + ]; 56 40 57 - buildInputs = [ libjpeg_turbo libpng ] ++ lib.optionals cudaSupport [ cuda-redist ]; 41 + buildInputs = [ libjpeg_turbo libpng torch.cxxdev ]; 58 42 59 43 propagatedBuildInputs = [ numpy pillow torch scipy ]; 60 44