cudaPackages: use -Xfatbin=-compress-all; prune default cudaCapabilities

+85 -25
+5 -1
pkgs/development/compilers/cudatoolkit/common.nix
··· 151 # Refer to comments in the overrides for cuda_nvcc for explanation 152 # CUDA_TOOLKIT_ROOT_DIR is legacy, 153 # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables 154 '' 155 mkdir -p $out/nix-support 156 cat <<EOF >> $out/nix-support/setup-hook ··· 160 if [ -z "\''${CUDAHOSTCXX-}" ]; then 161 export CUDAHOSTCXX=${backendStdenv.cc}/bin; 162 fi 163 - export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin' 164 EOF 165 166 # Move some libraries to the lib output so that programs that
··· 151 # Refer to comments in the overrides for cuda_nvcc for explanation 152 # CUDA_TOOLKIT_ROOT_DIR is legacy, 153 # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables 154 + # NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the compiled 155 + # binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as 156 + # the default set of CUDA capabilities we build can regularly cause this to occur (for 157 + # example, with Magma). 158 '' 159 mkdir -p $out/nix-support 160 cat <<EOF >> $out/nix-support/setup-hook ··· 164 if [ -z "\''${CUDAHOSTCXX-}" ]; then 165 export CUDAHOSTCXX=${backendStdenv.cc}/bin; 166 fi 167 + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin -Xfatbin=-compress-all' 168 EOF 169 170 # Move some libraries to the lib output so that programs that
+23 -9
pkgs/development/compilers/cudatoolkit/flags.nix
··· 4 }: 5 6 # Type aliases 7 - # Gpu = { 8 - # archName: String, # e.g., "Hopper" 9 - # computeCapability: String, # e.g., "9.0" 10 - # minCudaVersion: String, # e.g., "11.8" 11 - # maxCudaVersion: String, # e.g., "12.0" 12 - # } 13 14 let 15 inherit (lib) attrsets lists strings trivial versions; ··· 34 # gpus :: List Gpu 35 gpus = builtins.import ./gpus.nix; 36 37 - # isVersionIn :: Gpu -> Bool 38 isSupported = gpu: 39 let 40 inherit (gpu) minCudaVersion maxCudaVersion; 41 lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion; 42 - upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion); 43 in 44 lowerBoundSatisfied && upperBoundSatisfied; 45 46 # supportedGpus :: List Gpu 47 # GPUs which are supported by the provided CUDA version. 48 supportedGpus = builtins.filter isSupported gpus; 49 50 # supportedCapabilities :: List Capability 51 supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus; 52 53 # cudaArchNameToVersions :: AttrSet String (List String) 54 # Maps the name of a GPU architecture to different versions of that architecture. ··· 151 # dropDot :: String -> String 152 inherit dropDot; 153 } // formatCapabilities { 154 - cudaCapabilities = config.cudaCapabilities or supportedCapabilities; 155 enableForwardCompat = config.cudaForwardCompat or true; 156 }
··· 4 }: 5 6 # Type aliases 7 + # Gpu :: AttrSet 8 + # - See the documentation in ./gpus.nix. 9 10 let 11 inherit (lib) attrsets lists strings trivial versions; ··· 30 # gpus :: List Gpu 31 gpus = builtins.import ./gpus.nix; 32 33 + # isSupported :: Gpu -> Bool 34 isSupported = gpu: 35 let 36 inherit (gpu) minCudaVersion maxCudaVersion; 37 lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion; 38 + upperBoundSatisfied = (maxCudaVersion == null) 39 + || !(strings.versionOlder maxCudaVersion cudaVersion); 40 in 41 lowerBoundSatisfied && upperBoundSatisfied; 42 43 + # isDefault :: Gpu -> Bool 44 + isDefault = gpu: 45 + let 46 + inherit (gpu) dontDefaultAfter; 47 + newGpu = dontDefaultAfter == null; 48 + recentGpu = newGpu || strings.versionAtLeast dontDefaultAfter cudaVersion; 49 + in 50 + recentGpu; 51 + 52 # supportedGpus :: List Gpu 53 # GPUs which are supported by the provided CUDA version. 54 supportedGpus = builtins.filter isSupported gpus; 55 56 + # defaultGpus :: List Gpu 57 + # GPUs which are supported by the provided CUDA version and we want to build for by default. 58 + defaultGpus = builtins.filter isDefault supportedGpus; 59 + 60 # supportedCapabilities :: List Capability 61 supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus; 62 + 63 + # defaultCapabilities :: List Capability 64 + # The default capabilities to target, if not overridden by the user. 65 + defaultCapabilities = lists.map (gpu: gpu.computeCapability) defaultGpus; 66 67 # cudaArchNameToVersions :: AttrSet String (List String) 68 # Maps the name of a GPU architecture to different versions of that architecture. ··· 165 # dropDot :: String -> String 166 inherit dropDot; 167 } // formatCapabilities { 168 + cudaCapabilities = config.cudaCapabilities or defaultCapabilities; 169 enableForwardCompat = config.cudaForwardCompat or true; 170 }
+52 -14
pkgs/development/compilers/cudatoolkit/gpus.nix
··· 1 [ 2 { 3 archName = "Kepler"; 4 computeCapability = "3.0"; 5 minCudaVersion = "10.0"; 6 maxCudaVersion = "10.2"; 7 } 8 { 9 archName = "Kepler"; 10 computeCapability = "3.2"; 11 minCudaVersion = "10.0"; 12 maxCudaVersion = "10.2"; 13 } 14 { 15 archName = "Kepler"; 16 computeCapability = "3.5"; 17 minCudaVersion = "10.0"; 18 maxCudaVersion = "11.8"; 19 } 20 { 21 archName = "Kepler"; 22 computeCapability = "3.7"; 23 minCudaVersion = "10.0"; 24 maxCudaVersion = "11.8"; 25 } 26 { 27 archName = "Maxwell"; 28 computeCapability = "5.0"; 29 minCudaVersion = "10.0"; 30 - maxCudaVersion = "12.0"; 31 } 32 { 33 archName = "Maxwell"; 34 computeCapability = "5.2"; 35 minCudaVersion = "10.0"; 36 - maxCudaVersion = "12.0"; 37 } 38 { 39 archName = "Maxwell"; 40 computeCapability = "5.3"; 41 minCudaVersion = "10.0"; 42 - maxCudaVersion = "12.0"; 43 } 44 { 45 archName = "Pascal"; 46 computeCapability = "6.0"; 47 minCudaVersion = "10.0"; 48 - maxCudaVersion = "12.0"; 49 } 50 { 51 archName = "Pascal"; 52 computeCapability = "6.1"; 53 minCudaVersion = "10.0"; 54 - maxCudaVersion = "12.0"; 55 } 56 { 57 archName = "Pascal"; 58 computeCapability = "6.2"; 59 minCudaVersion = "10.0"; 60 - maxCudaVersion = "12.0"; 61 } 62 { 63 archName = "Volta"; 64 computeCapability = "7.0"; 65 minCudaVersion = "10.0"; 66 - maxCudaVersion = "12.0"; 67 } 68 { 69 archName = "Volta"; 70 computeCapability = "7.2"; 71 minCudaVersion = "10.0"; 72 - maxCudaVersion = "12.0"; 73 } 74 { 75 archName = "Turing"; 76 computeCapability = "7.5"; 77 minCudaVersion = "10.0"; 78 - maxCudaVersion = "12.0"; 79 } 80 { 81 archName = "Ampere"; 82 computeCapability = "8.0"; 83 minCudaVersion = "11.2"; 84 - maxCudaVersion = "12.0"; 85 } 86 { 87 archName = "Ampere"; 88 computeCapability = "8.6"; 89 minCudaVersion = "11.2"; 90 - maxCudaVersion = "12.0"; 91 } 92 { 93 archName = "Ampere"; 94 computeCapability = "8.7"; 95 minCudaVersion = "11.5"; 96 - maxCudaVersion = "12.0"; 97 } 98 { 99 archName = "Ada"; 100 computeCapability = "8.9"; 101 minCudaVersion = "11.8"; 102 - maxCudaVersion = "12.0"; 103 } 104 { 105 archName = "Hopper"; 106 computeCapability = "9.0"; 107 minCudaVersion = "11.8"; 108 - maxCudaVersion = "12.0"; 109 } 110 ]
··· 1 [ 2 + # Type alias 3 + # Gpu = { 4 + # archName: String 5 + # - The name of the microarchitecture. 6 + # computeCapability: String 7 + # - The compute capability of the GPU. 8 + # minCudaVersion: String 9 + # - The minimum (inclusive) CUDA version that supports this GPU. 10 + # dontDefaultAfter: null | String 11 + # - The CUDA version after which to exclude this GPU from the list of default capabilities 12 + # we build. null means we always include this GPU in the default capabilities if it is 13 + # supported. 14 + # maxCudaVersion: null | String 15 + # - The maximum (exclusive) CUDA version that supports this GPU. null means there is no 16 + # maximum. 17 + # } 18 { 19 archName = "Kepler"; 20 computeCapability = "3.0"; 21 minCudaVersion = "10.0"; 22 + dontDefaultAfter = "10.2"; 23 maxCudaVersion = "10.2"; 24 } 25 { 26 archName = "Kepler"; 27 computeCapability = "3.2"; 28 minCudaVersion = "10.0"; 29 + dontDefaultAfter = "10.2"; 30 maxCudaVersion = "10.2"; 31 } 32 { 33 archName = "Kepler"; 34 computeCapability = "3.5"; 35 minCudaVersion = "10.0"; 36 + dontDefaultAfter = "11.0"; 37 maxCudaVersion = "11.8"; 38 } 39 { 40 archName = "Kepler"; 41 computeCapability = "3.7"; 42 minCudaVersion = "10.0"; 43 + dontDefaultAfter = "11.0"; 44 maxCudaVersion = "11.8"; 45 } 46 { 47 archName = "Maxwell"; 48 computeCapability = "5.0"; 49 minCudaVersion = "10.0"; 50 + dontDefaultAfter = "11.0"; 51 + maxCudaVersion = null; 52 } 53 { 54 archName = "Maxwell"; 55 computeCapability = "5.2"; 56 minCudaVersion = "10.0"; 57 + dontDefaultAfter = "11.0"; 58 + maxCudaVersion = null; 59 } 60 { 61 archName = "Maxwell"; 62 computeCapability = "5.3"; 63 minCudaVersion = "10.0"; 64 + dontDefaultAfter = "11.0"; 65 + maxCudaVersion = null; 66 } 67 { 68 archName = "Pascal"; 69 computeCapability = "6.0"; 70 minCudaVersion = "10.0"; 71 + dontDefaultAfter = null; 72 + maxCudaVersion = null; 73 } 74 { 75 archName = "Pascal"; 76 computeCapability = "6.1"; 77 minCudaVersion = "10.0"; 78 + dontDefaultAfter = null; 79 + maxCudaVersion = null; 80 } 81 { 82 archName = "Pascal"; 83 computeCapability = "6.2"; 84 minCudaVersion = "10.0"; 85 + dontDefaultAfter = null; 86 + maxCudaVersion = null; 87 } 88 { 89 archName = "Volta"; 90 computeCapability = "7.0"; 91 minCudaVersion = "10.0"; 92 + dontDefaultAfter = null; 93 + maxCudaVersion = null; 94 } 95 { 96 archName = "Volta"; 97 computeCapability = "7.2"; 98 minCudaVersion = "10.0"; 99 + dontDefaultAfter = null; 100 + maxCudaVersion = null; 101 } 102 { 103 archName = "Turing"; 104 computeCapability = "7.5"; 105 minCudaVersion = "10.0"; 106 + dontDefaultAfter = null; 107 + maxCudaVersion = null; 108 } 109 { 110 archName = "Ampere"; 111 computeCapability = "8.0"; 112 minCudaVersion = "11.2"; 113 + dontDefaultAfter = null; 114 + maxCudaVersion = null; 115 } 116 { 117 archName = "Ampere"; 118 computeCapability = "8.6"; 119 minCudaVersion = "11.2"; 120 + dontDefaultAfter = null; 121 + maxCudaVersion = null; 122 } 123 { 124 archName = "Ampere"; 125 computeCapability = "8.7"; 126 minCudaVersion = "11.5"; 127 + # NOTE: This is purposefully before 11.5 to ensure it is never a capability we target by 128 + # default. 8.7 is the Jetson Orin series of devices which are a very specific platform. 129 + # We keep this entry here in case we ever want to target it explicitly, but we don't 130 + # want to target it by default. 131 + dontDefaultAfter = "11.4"; 132 + maxCudaVersion = null; 133 } 134 { 135 archName = "Ada"; 136 computeCapability = "8.9"; 137 minCudaVersion = "11.8"; 138 + dontDefaultAfter = null; 139 + maxCudaVersion = null; 140 } 141 { 142 archName = "Hopper"; 143 computeCapability = "9.0"; 144 minCudaVersion = "11.8"; 145 + dontDefaultAfter = null; 146 + maxCudaVersion = null; 147 } 148 ]
+5 -1
pkgs/development/compilers/cudatoolkit/redist/overrides.nix
··· 41 # uses the last --compiler-bindir it gets on the command line. 42 # FIXME: this results in "incompatible redefinition" warnings. 43 # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin 44 postInstall = (oldAttrs.postInstall or "") + '' 45 mkdir -p $out/nix-support 46 cat <<EOF >> $out/nix-support/setup-hook ··· 49 if [ -z "\''${CUDAHOSTCXX-}" ]; then 50 export CUDAHOSTCXX=${cc}/bin; 51 fi 52 - export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin' 53 EOF 54 ''; 55 });
··· 41 # uses the last --compiler-bindir it gets on the command line. 42 # FIXME: this results in "incompatible redefinition" warnings. 43 # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin 44 + # NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the 45 + # compiled binaries. If binaries grow over 2GB, they will fail to link. This is a problem 46 + # for us, as the default set of CUDA capabilities we build can regularly cause this to 47 + # occur (for example, with Magma). 48 postInstall = (oldAttrs.postInstall or "") + '' 49 mkdir -p $out/nix-support 50 cat <<EOF >> $out/nix-support/setup-hook ··· 53 if [ -z "\''${CUDAHOSTCXX-}" ]; then 54 export CUDAHOSTCXX=${cc}/bin; 55 fi 56 + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin -Xfatbin=-compress-all' 57 EOF 58 ''; 59 });