lol

Merge pull request #166784 from samuela/samuela/cudnn3

fix `pytorchWithCuda`, fix `cupy`, upgrade `cudnn`

authored by

Frederik Rietdijk and committed by
GitHub
c7728141 e79298b7

+93 -52
+4
pkgs/development/compilers/cudatoolkit/default.nix
··· 86 86 gcc = gcc10; # can bump to 11 along with stdenv.cc 87 87 }; 88 88 89 + # Make sure to only ever update this to a version that is compatible with the 90 + # latest cudnn, nccl, cutensor, etc! It sometimes happens that CUDA versions 91 + # are released prior to compatibility with the rest of the ecosystem. And 92 + # don't forget to request a review from @NixOS/cuda-maintainers! 89 93 cudatoolkit_11 = cudatoolkit_11_5; 90 94 }
+18 -22
pkgs/development/libraries/science/math/cudnn/default.nix
··· 81 81 cudnn_8_1_cudatoolkit_11_2 = cudnn_8_1_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_2; }; 82 82 83 83 cudnn_8_1_cudatoolkit_10 = cudnn_8_1_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_10; }; 84 - cudnn_8_1_cudatoolkit_11 = cudnn_8_1_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11; }; 85 84 86 85 # cuDNN 8.3 is necessary for the latest jaxlib, esp. jaxlib-bin. See 87 86 # https://github.com/google/jax/discussions/9455 for more info. 88 - cudnn_8_3_cudatoolkit_10_2 = 89 - generic 90 - rec { 91 - version = "8.3.2"; 92 - cudatoolkit = cudatoolkit_10_2; 93 - # See https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-832/support-matrix/index.html#cudnn-cuda-hardware-versions. 94 - minCudaVersion = "10.2.00000"; 95 - maxCudaVersion = "11.5.99999"; 96 - mkSrc = cudatoolkit: 97 - let v = if lib.versions.majorMinor cudatoolkit.version == "10.2" then "10.2" else "11.5"; in 98 - fetchurl { 99 - # Starting at version 8.3.1 there's a new directory layout including 100 - # a subdirectory `local_installers`. 101 - url = "https://developer.download.nvidia.com/compute/redist/cudnn/v${version}/local_installers/${v}/cudnn-linux-x86_64-8.3.2.44_cuda${v}-archive.tar.xz"; 102 - hash = { 103 - "10.2" = "sha256-1vVu+cqM+PketzIQumw9ykm6REbBZhv6/lXB7EC2aaw="; 104 - "11.5" = "sha256-VQCVPAjF5dHd3P2iNPnvvdzb5DpTsm3AqCxyP6FwxFc="; 105 - }."${v}"; 106 - }; 107 - } 108 - ; 87 + cudnn_8_3_cudatoolkit_10_2 = generic rec { 88 + version = "8.3.2"; 89 + cudatoolkit = cudatoolkit_10_2; 90 + # See https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-832/support-matrix/index.html#cudnn-cuda-hardware-versions. 91 + minCudaVersion = "10.2.00000"; 92 + maxCudaVersion = "11.5.99999"; 93 + mkSrc = cudatoolkit: 94 + let v = if lib.versions.majorMinor cudatoolkit.version == "10.2" then "10.2" else "11.5"; in 95 + fetchurl { 96 + # Starting at version 8.3.1 there's a new directory layout including 97 + # a subdirectory `local_installers`. 98 + url = "https://developer.download.nvidia.com/compute/redist/cudnn/v${version}/local_installers/${v}/cudnn-linux-x86_64-8.3.2.44_cuda${v}-archive.tar.xz"; 99 + hash = { 100 + "10.2" = "sha256-1vVu+cqM+PketzIQumw9ykm6REbBZhv6/lXB7EC2aaw="; 101 + "11.5" = "sha256-VQCVPAjF5dHd3P2iNPnvvdzb5DpTsm3AqCxyP6FwxFc="; 102 + }."${v}"; 103 + }; 104 + }; 109 105 cudnn_8_3_cudatoolkit_11_0 = cudnn_8_3_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_0; }; 110 106 cudnn_8_3_cudatoolkit_11_1 = cudnn_8_3_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_1; }; 111 107 cudnn_8_3_cudatoolkit_11_2 = cudnn_8_3_cudatoolkit_10_2.override { cudatoolkit = cudatoolkit_11_2; };
+23 -24
pkgs/development/libraries/science/math/cutensor/default.nix
··· 1 1 { callPackage 2 - , cudatoolkit_10_1, cudatoolkit_10_2 3 - , cudatoolkit_11_0, cudatoolkit_11_1, cudatoolkit_11_2, cudatoolkit_11_3, cudatoolkit_11_4 2 + , cudatoolkit_10_1 3 + , cudatoolkit_10_2 4 + , cudatoolkit_11 5 + , cudatoolkit_11_0 6 + , cudatoolkit_11_1 7 + , cudatoolkit_11_2 8 + , cudatoolkit_11_3 9 + , cudatoolkit_11_4 10 + , cudatoolkit_11_5 11 + , cudatoolkit_11_6 4 12 }: 5 13 6 14 rec { ··· 8 16 version = "1.2.2.5"; 9 17 libPath = "lib/10.1"; 10 18 cudatoolkit = cudatoolkit_10_1; 11 - # 1.2.2 is compatible with CUDA 11.0, 11.1, and 11.2: 12 - # ephemeral doc at https://developer.nvidia.com/cutensor/downloads 13 - sha256 = "1dl9bd71frhac9cb8lvnh71zfsnqxbxbfhndvva2zf6nh0my4klm"; 19 + # 1.2.2 is compatible with CUDA 10.1, 10.2, and 11.x. 20 + # See https://docs.nvidia.com/cuda/cutensor/release_notes.html#cutensor-v1-2-2. 21 + hash = "sha256-lU7iK4DWuC/U3s1Ct/rq2Gr3w4F2U7RYYgpmF05bibY="; 14 22 }; 15 23 16 24 cutensor_cudatoolkit_10_2 = cutensor_cudatoolkit_10_1.override { 17 25 version = "1.3.1.3"; 18 26 libPath = "lib/10.2"; 19 27 cudatoolkit = cudatoolkit_10_2; 20 - # 1.3.1 is compatible with CUDA 11.0, 11.1, and 11.2: 21 - # ephemeral doc at https://developer.nvidia.com/cutensor/downloads 22 - sha256 = "sha256-mNlVnabB2IC3HnYY0mb06RLqQzDxN9ePGVeBy3hkBC8="; 28 + # 1.3.1 is compatible with CUDA 10.2 and 11.x. 29 + # See https://docs.nvidia.com/cuda/cutensor/release_notes.html#cutensor-v1-3-1. 30 + hash = "sha256-mNlVnabB2IC3HnYY0mb06RLqQzDxN9ePGVeBy3hkBC8="; 23 31 }; 24 32 25 33 cutensor_cudatoolkit_10 = cutensor_cudatoolkit_10_2; ··· 29 37 cudatoolkit = cudatoolkit_11_0; 30 38 }; 31 39 32 - cutensor_cudatoolkit_11_1 = cutensor_cudatoolkit_11_0.override { 33 - cudatoolkit = cudatoolkit_11_1; 34 - }; 40 + cutensor_cudatoolkit_11_1 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_1; }; 41 + cutensor_cudatoolkit_11_2 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_2; }; 42 + cutensor_cudatoolkit_11_3 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_3; }; 43 + cutensor_cudatoolkit_11_4 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_4; }; 44 + cutensor_cudatoolkit_11_5 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_5; }; 45 + cutensor_cudatoolkit_11_6 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11_6; }; 35 46 36 - cutensor_cudatoolkit_11_2 = cutensor_cudatoolkit_11_0.override { 37 - cudatoolkit = cudatoolkit_11_2; 38 - }; 39 - 40 - cutensor_cudatoolkit_11_3 = cutensor_cudatoolkit_11_0.override { 41 - cudatoolkit = cudatoolkit_11_3; 42 - }; 43 - 44 - cutensor_cudatoolkit_11_4 = cutensor_cudatoolkit_11_0.override { 45 - cudatoolkit = cudatoolkit_11_4; 46 - }; 47 - 48 - cutensor_cudatoolkit_11 = cutensor_cudatoolkit_11_4; 47 + cutensor_cudatoolkit_11 = cutensor_cudatoolkit_11_0.override { cudatoolkit = cudatoolkit_11; }; 49 48 }
+2 -2
pkgs/development/libraries/science/math/cutensor/generic.nix
··· 7 7 , addOpenGLRunpath 8 8 9 9 , version 10 - , sha256 10 + , hash 11 11 }: 12 12 13 13 let ··· 21 21 22 22 src = fetchurl { 23 23 url = "https://developer.download.nvidia.com/compute/cutensor/${mostOfVersion}/local_installers/libcutensor-${stdenv.hostPlatform.parsed.kernel.name}-${stdenv.hostPlatform.parsed.cpu.name}-${version}.tar.gz"; 24 - inherit sha256; 24 + inherit hash; 25 25 }; 26 26 27 27 outputs = [ "out" "dev" ];
+4
pkgs/development/libraries/science/math/nccl/default.nix
··· 38 38 39 39 enableParallelBuilding = true; 40 40 41 + passthru = { 42 + inherit cudatoolkit; 43 + }; 44 + 41 45 meta = with lib; { 42 46 description = "Multi-GPU and multi-node collective communication primitives for NVIDIA GPUs"; 43 47 homepage = "https://developer.nvidia.com/nccl";
+11
pkgs/development/python-modules/cupy/default.nix
··· 5 5 , addOpenGLRunpath 6 6 }: 7 7 8 + assert cudnn.cudatoolkit == cudatoolkit; 9 + assert cutensor.cudatoolkit == cudatoolkit; 10 + assert nccl.cudatoolkit == cudatoolkit; 11 + 8 12 buildPythonPackage rec { 9 13 pname = "cupy"; 10 14 version = "10.2.0"; ··· 15 19 sha256 = "sha256-5ovvA76QGOsOnVztMfDgLerks5nJrKR08rLc+ArmWA8="; 16 20 }; 17 21 22 + # See https://docs.cupy.dev/en/v10.2.0/reference/environment.html. Seting both 23 + # CUPY_NUM_BUILD_JOBS and CUPY_NUM_NVCC_THREADS to NIX_BUILD_CORES results in 24 + # a small amount of thrashing but it turns out there are a large number of 25 + # very short builds and a few extremely long ones, so setting both ends up 26 + # working nicely in practice. 18 27 preConfigure = '' 19 28 export CUDA_PATH=${cudatoolkit} 29 + export CUPY_NUM_BUILD_JOBS="$NIX_BUILD_CORES" 30 + export CUPY_NUM_NVCC_THREADS="$NIX_BUILD_CORES" 20 31 ''; 21 32 22 33 nativeBuildInputs = [
+7 -3
pkgs/top-level/all-packages.nix
··· 4647 4647 cudnn_8_1_cudatoolkit_11_1 4648 4648 cudnn_8_1_cudatoolkit_11_2 4649 4649 cudnn_8_1_cudatoolkit_10 4650 - cudnn_8_1_cudatoolkit_11 4651 4650 cudnn_8_3_cudatoolkit_10_2 4652 4651 cudnn_8_3_cudatoolkit_11_0 4653 4652 cudnn_8_3_cudatoolkit_11_1 ··· 4658 4657 cudnn_8_3_cudatoolkit_10 4659 4658 cudnn_8_3_cudatoolkit_11; 4660 4659 4661 - # TODO(samuela): This is old and should be upgraded to 8.3 at some point. 4662 - cudnn = cudnn_7_6_cudatoolkit_10_1; 4660 + # Make sure to keep this in sync with the `cudatoolkit` version! 4661 + cudnn = cudnn_8_3_cudatoolkit_10; 4663 4662 4664 4663 cutensorPackages = callPackages ../development/libraries/science/math/cutensor { }; 4665 4664 inherit (cutensorPackages) ··· 32962 32961 ### SCIENCE / MATH 32963 32962 32964 32963 caffe = callPackage ../applications/science/math/caffe ({ 32964 + cudaSupport = config.cudaSupport or false; 32965 + cudatoolkit = cudatoolkit_10_1; 32966 + cudnn = cudnn_7_6_cudatoolkit_10_1; 32965 32967 opencv3 = opencv3WithoutCuda; # Used only for image loading. 32966 32968 blas = openblas; 32967 32969 inherit (darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo; 32968 32970 } // (config.caffe or {})); 32971 + 32972 + caffeWithCuda = caffe.override { cudaSupport = true; }; 32969 32973 32970 32974 caffe2 = callPackage ../development/libraries/science/math/caffe2 (rec { 32971 32975 inherit (python3Packages) python future six numpy pydot;
+24 -1
pkgs/top-level/python-packages.nix
··· 1459 1459 inherit (self) python numpy boost; 1460 1460 }); 1461 1461 1462 + caffeWithCuda = toPythonModule (pkgs.caffeWithCuda.override { 1463 + pythonSupport = true; 1464 + inherit (self) python numpy boost; 1465 + }); 1466 + 1462 1467 cairocffi = callPackage ../development/python-modules/cairocffi { }; 1463 1468 1464 1469 cairosvg = callPackage ../development/python-modules/cairosvg { }; ··· 1961 1966 1962 1967 cupy = callPackage ../development/python-modules/cupy { 1963 1968 cudatoolkit = pkgs.cudatoolkit_11; 1964 - cudnn = pkgs.cudnn_8_1_cudatoolkit_11; 1969 + cudnn = pkgs.cudnn_8_3_cudatoolkit_11; 1965 1970 nccl = pkgs.nccl_cudatoolkit_11; 1966 1971 cutensor = pkgs.cutensor_cudatoolkit_11; 1967 1972 }; ··· 8366 8371 8367 8372 pytorch = callPackage ../development/python-modules/pytorch { 8368 8373 cudaSupport = pkgs.config.cudaSupport or false; 8374 + 8375 + # TODO: next time pytorch is updated (to 1.11.0, currently in staging as of 8376 + # 2022-03-31), make the following changes: 8377 + 8378 + # -> cudatoolk_11 8379 + cudatoolkit = pkgs.cudatoolkit_10; 8380 + 8381 + # -> cudnn_8_3_cudatoolkit_11 8382 + cudnn = pkgs.cudnn_8_1_cudatoolkit_10; 8383 + 8384 + # -> cutensor_cudatoolkit_11 (cutensor is a new dependency in v1.11.0) 8385 + # cutensor = pkgs.cutensor_cudatoolkit_11; 8386 + 8387 + # -> setting a custom magma should be unnecessary with v1.11.0 8388 + magma = pkgs.magma.override { cudatoolkit = pkgs.cudatoolkit_10; }; 8389 + 8390 + # -> nccl_cudatoolkit_11 8391 + nccl = pkgs.nccl.override { cudatoolkit = pkgs.cudatoolkit_10; }; 8369 8392 }; 8370 8393 8371 8394 pytorch-bin = callPackage ../development/python-modules/pytorch/bin.nix { };