cudaPackages: use -Xfatbin=-compress-all; prune default cudaCapabilities

+85 -25
+5 -1
pkgs/development/compilers/cudatoolkit/common.nix
··· 151 151 # Refer to comments in the overrides for cuda_nvcc for explanation 152 152 # CUDA_TOOLKIT_ROOT_DIR is legacy, 153 153 # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables 154 + # NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the compiled 155 + # binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as 156 + # the default set of CUDA capabilities we build can regularly cause this to occur (for 157 + # example, with Magma). 154 158 '' 155 159 mkdir -p $out/nix-support 156 160 cat <<EOF >> $out/nix-support/setup-hook ··· 160 164 if [ -z "\''${CUDAHOSTCXX-}" ]; then 161 165 export CUDAHOSTCXX=${backendStdenv.cc}/bin; 162 166 fi 163 - export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin' 167 + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin -Xfatbin=-compress-all' 164 168 EOF 165 169 166 170 # Move some libraries to the lib output so that programs that
+23 -9
pkgs/development/compilers/cudatoolkit/flags.nix
··· 4 4 }: 5 5 6 6 # Type aliases 7 - # Gpu = { 8 - # archName: String, # e.g., "Hopper" 9 - # computeCapability: String, # e.g., "9.0" 10 - # minCudaVersion: String, # e.g., "11.8" 11 - # maxCudaVersion: String, # e.g., "12.0" 12 - # } 7 + # Gpu :: AttrSet 8 + # - See the documentation in ./gpus.nix. 13 9 14 10 let 15 11 inherit (lib) attrsets lists strings trivial versions; ··· 34 30 # gpus :: List Gpu 35 31 gpus = builtins.import ./gpus.nix; 36 32 37 - # isVersionIn :: Gpu -> Bool 33 + # isSupported :: Gpu -> Bool 38 34 isSupported = gpu: 39 35 let 40 36 inherit (gpu) minCudaVersion maxCudaVersion; 41 37 lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion; 42 - upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion); 38 + upperBoundSatisfied = (maxCudaVersion == null) 39 + || !(strings.versionOlder maxCudaVersion cudaVersion); 43 40 in 44 41 lowerBoundSatisfied && upperBoundSatisfied; 45 42 43 + # isDefault :: Gpu -> Bool 44 + isDefault = gpu: 45 + let 46 + inherit (gpu) dontDefaultAfter; 47 + newGpu = dontDefaultAfter == null; 48 + recentGpu = newGpu || strings.versionAtLeast dontDefaultAfter cudaVersion; 49 + in 50 + recentGpu; 51 + 46 52 # supportedGpus :: List Gpu 47 53 # GPUs which are supported by the provided CUDA version. 48 54 supportedGpus = builtins.filter isSupported gpus; 49 55 56 + # defaultGpus :: List Gpu 57 + # GPUs which are supported by the provided CUDA version and we want to build for by default. 58 + defaultGpus = builtins.filter isDefault supportedGpus; 59 + 50 60 # supportedCapabilities :: List Capability 51 61 supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus; 62 + 63 + # defaultCapabilities :: List Capability 64 + # The default capabilities to target, if not overridden by the user. 65 + defaultCapabilities = lists.map (gpu: gpu.computeCapability) defaultGpus; 52 66 53 67 # cudaArchNameToVersions :: AttrSet String (List String) 54 68 # Maps the name of a GPU architecture to different versions of that architecture. ··· 151 165 # dropDot :: String -> String 152 166 inherit dropDot; 153 167 } // formatCapabilities { 154 - cudaCapabilities = config.cudaCapabilities or supportedCapabilities; 168 + cudaCapabilities = config.cudaCapabilities or defaultCapabilities; 155 169 enableForwardCompat = config.cudaForwardCompat or true; 156 170 }
+52 -14
pkgs/development/compilers/cudatoolkit/gpus.nix
··· 1 1 [ 2 + # Type alias 3 + # Gpu = { 4 + # archName: String 5 + # - The name of the microarchitecture. 6 + # computeCapability: String 7 + # - The compute capability of the GPU. 8 + # minCudaVersion: String 9 + # - The minimum (inclusive) CUDA version that supports this GPU. 10 + # dontDefaultAfter: null | String 11 + # - The CUDA version after which to exclude this GPU from the list of default capabilities 12 + # we build. null means we always include this GPU in the default capabilities if it is 13 + # supported. 14 + # maxCudaVersion: null | String 15 + # - The maximum (exclusive) CUDA version that supports this GPU. null means there is no 16 + # maximum. 17 + # } 2 18 { 3 19 archName = "Kepler"; 4 20 computeCapability = "3.0"; 5 21 minCudaVersion = "10.0"; 22 + dontDefaultAfter = "10.2"; 6 23 maxCudaVersion = "10.2"; 7 24 } 8 25 { 9 26 archName = "Kepler"; 10 27 computeCapability = "3.2"; 11 28 minCudaVersion = "10.0"; 29 + dontDefaultAfter = "10.2"; 12 30 maxCudaVersion = "10.2"; 13 31 } 14 32 { 15 33 archName = "Kepler"; 16 34 computeCapability = "3.5"; 17 35 minCudaVersion = "10.0"; 36 + dontDefaultAfter = "11.0"; 18 37 maxCudaVersion = "11.8"; 19 38 } 20 39 { 21 40 archName = "Kepler"; 22 41 computeCapability = "3.7"; 23 42 minCudaVersion = "10.0"; 43 + dontDefaultAfter = "11.0"; 24 44 maxCudaVersion = "11.8"; 25 45 } 26 46 { 27 47 archName = "Maxwell"; 28 48 computeCapability = "5.0"; 29 49 minCudaVersion = "10.0"; 30 - maxCudaVersion = "12.0"; 50 + dontDefaultAfter = "11.0"; 51 + maxCudaVersion = null; 31 52 } 32 53 { 33 54 archName = "Maxwell"; 34 55 computeCapability = "5.2"; 35 56 minCudaVersion = "10.0"; 36 - maxCudaVersion = "12.0"; 57 + dontDefaultAfter = "11.0"; 58 + maxCudaVersion = null; 37 59 } 38 60 { 39 61 archName = "Maxwell"; 40 62 computeCapability = "5.3"; 41 63 minCudaVersion = "10.0"; 42 - maxCudaVersion = "12.0"; 64 + dontDefaultAfter = "11.0"; 65 + maxCudaVersion = null; 43 66 } 44 67 { 45 68 archName = "Pascal"; 46 69 computeCapability = "6.0"; 47 70 minCudaVersion = "10.0"; 48 - maxCudaVersion = "12.0"; 71 + dontDefaultAfter = null; 72 + maxCudaVersion = null; 49 73 } 50 74 { 51 75 archName = "Pascal"; 52 76 computeCapability = "6.1"; 53 77 minCudaVersion = "10.0"; 54 - maxCudaVersion = "12.0"; 78 + dontDefaultAfter = null; 79 + maxCudaVersion = null; 55 80 } 56 81 { 57 82 archName = "Pascal"; 58 83 computeCapability = "6.2"; 59 84 minCudaVersion = "10.0"; 60 - maxCudaVersion = "12.0"; 85 + dontDefaultAfter = null; 86 + maxCudaVersion = null; 61 87 } 62 88 { 63 89 archName = "Volta"; 64 90 computeCapability = "7.0"; 65 91 minCudaVersion = "10.0"; 66 - maxCudaVersion = "12.0"; 92 + dontDefaultAfter = null; 93 + maxCudaVersion = null; 67 94 } 68 95 { 69 96 archName = "Volta"; 70 97 computeCapability = "7.2"; 71 98 minCudaVersion = "10.0"; 72 - maxCudaVersion = "12.0"; 99 + dontDefaultAfter = null; 100 + maxCudaVersion = null; 73 101 } 74 102 { 75 103 archName = "Turing"; 76 104 computeCapability = "7.5"; 77 105 minCudaVersion = "10.0"; 78 - maxCudaVersion = "12.0"; 106 + dontDefaultAfter = null; 107 + maxCudaVersion = null; 79 108 } 80 109 { 81 110 archName = "Ampere"; 82 111 computeCapability = "8.0"; 83 112 minCudaVersion = "11.2"; 84 - maxCudaVersion = "12.0"; 113 + dontDefaultAfter = null; 114 + maxCudaVersion = null; 85 115 } 86 116 { 87 117 archName = "Ampere"; 88 118 computeCapability = "8.6"; 89 119 minCudaVersion = "11.2"; 90 - maxCudaVersion = "12.0"; 120 + dontDefaultAfter = null; 121 + maxCudaVersion = null; 91 122 } 92 123 { 93 124 archName = "Ampere"; 94 125 computeCapability = "8.7"; 95 126 minCudaVersion = "11.5"; 96 - maxCudaVersion = "12.0"; 127 + # NOTE: This is purposefully before 11.5 to ensure it is never a capability we target by 128 + # default. 8.7 is the Jetson Orin series of devices which are a very specific platform. 129 + # We keep this entry here in case we ever want to target it explicitly, but we don't 130 + # want to target it by default. 131 + dontDefaultAfter = "11.4"; 132 + maxCudaVersion = null; 97 133 } 98 134 { 99 135 archName = "Ada"; 100 136 computeCapability = "8.9"; 101 137 minCudaVersion = "11.8"; 102 - maxCudaVersion = "12.0"; 138 + dontDefaultAfter = null; 139 + maxCudaVersion = null; 103 140 } 104 141 { 105 142 archName = "Hopper"; 106 143 computeCapability = "9.0"; 107 144 minCudaVersion = "11.8"; 108 - maxCudaVersion = "12.0"; 145 + dontDefaultAfter = null; 146 + maxCudaVersion = null; 109 147 } 110 148 ]
+5 -1
pkgs/development/compilers/cudatoolkit/redist/overrides.nix
··· 41 41 # uses the last --compiler-bindir it gets on the command line. 42 42 # FIXME: this results in "incompatible redefinition" warnings. 43 43 # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin 44 + # NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the 45 + # compiled binaries. If binaries grow over 2GB, they will fail to link. This is a problem 46 + # for us, as the default set of CUDA capabilities we build can regularly cause this to 47 + # occur (for example, with Magma). 44 48 postInstall = (oldAttrs.postInstall or "") + '' 45 49 mkdir -p $out/nix-support 46 50 cat <<EOF >> $out/nix-support/setup-hook ··· 49 53 if [ -z "\''${CUDAHOSTCXX-}" ]; then 50 54 export CUDAHOSTCXX=${cc}/bin; 51 55 fi 52 - export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin' 56 + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin -Xfatbin=-compress-all' 53 57 EOF 54 58 ''; 55 59 });