···151151 # Refer to comments in the overrides for cuda_nvcc for explanation
152152 # CUDA_TOOLKIT_ROOT_DIR is legacy,
153153 # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
154154+ # NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the compiled
155155+ # binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as
156156+ # the default set of CUDA capabilities we build can regularly cause this to occur (for
157157+ # example, with Magma).
154158 ''
155159 mkdir -p $out/nix-support
156160 cat <<EOF >> $out/nix-support/setup-hook
···160164 if [ -z "\''${CUDAHOSTCXX-}" ]; then
161165 export CUDAHOSTCXX=${backendStdenv.cc}/bin;
162166 fi
163163- export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin'
167167+ export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin -Xfatbin=-compress-all'
164168 EOF
165169166170 # Move some libraries to the lib output so that programs that
+23-9
pkgs/development/compilers/cudatoolkit/flags.nix
···44}:
5566# Type aliases
77-# Gpu = {
88-# archName: String, # e.g., "Hopper"
99-# computeCapability: String, # e.g., "9.0"
1010-# minCudaVersion: String, # e.g., "11.8"
1111-# maxCudaVersion: String, # e.g., "12.0"
1212-# }
77+# Gpu :: AttrSet
88+# - See the documentation in ./gpus.nix.
1391410let
1511 inherit (lib) attrsets lists strings trivial versions;
···3430 # gpus :: List Gpu
3531 gpus = builtins.import ./gpus.nix;
36323737- # isVersionIn :: Gpu -> Bool
3333+ # isSupported :: Gpu -> Bool
3834 isSupported = gpu:
3935 let
4036 inherit (gpu) minCudaVersion maxCudaVersion;
4137 lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion;
4242- upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion);
3838+ upperBoundSatisfied = (maxCudaVersion == null)
3939+ || !(strings.versionOlder maxCudaVersion cudaVersion);
4340 in
4441 lowerBoundSatisfied && upperBoundSatisfied;
45424343+ # isDefault :: Gpu -> Bool
4444+ isDefault = gpu:
4545+ let
4646+ inherit (gpu) dontDefaultAfter;
4747+ newGpu = dontDefaultAfter == null;
4848+ recentGpu = newGpu || strings.versionAtLeast dontDefaultAfter cudaVersion;
4949+ in
5050+ recentGpu;
5151+4652 # supportedGpus :: List Gpu
4753 # GPUs which are supported by the provided CUDA version.
4854 supportedGpus = builtins.filter isSupported gpus;
49555656+ # defaultGpus :: List Gpu
5757+ # GPUs which are supported by the provided CUDA version and we want to build for by default.
5858+ defaultGpus = builtins.filter isDefault supportedGpus;
5959+5060 # supportedCapabilities :: List Capability
5161 supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus;
6262+6363+ # defaultCapabilities :: List Capability
6464+ # The default capabilities to target, if not overridden by the user.
6565+ defaultCapabilities = lists.map (gpu: gpu.computeCapability) defaultGpus;
52665367 # cudaArchNameToVersions :: AttrSet String (List String)
5468 # Maps the name of a GPU architecture to different versions of that architecture.
···151165 # dropDot :: String -> String
152166 inherit dropDot;
153167} // formatCapabilities {
154154- cudaCapabilities = config.cudaCapabilities or supportedCapabilities;
168168+ cudaCapabilities = config.cudaCapabilities or defaultCapabilities;
155169 enableForwardCompat = config.cudaForwardCompat or true;
156170}
+52-14
pkgs/development/compilers/cudatoolkit/gpus.nix
···11[
22+ # Type alias
33+ # Gpu = {
44+ # archName: String
55+ # - The name of the microarchitecture.
66+ # computeCapability: String
77+ # - The compute capability of the GPU.
88+ # minCudaVersion: String
99+ # - The minimum (inclusive) CUDA version that supports this GPU.
1010+ # dontDefaultAfter: null | String
1111+ # - The CUDA version after which to exclude this GPU from the list of default capabilities
1212+ # we build. null means we always include this GPU in the default capabilities if it is
1313+ # supported.
1414+ # maxCudaVersion: null | String
1515+ # - The maximum (exclusive) CUDA version that supports this GPU. null means there is no
1616+ # maximum.
1717+ # }
218 {
319 archName = "Kepler";
420 computeCapability = "3.0";
521 minCudaVersion = "10.0";
2222+ dontDefaultAfter = "10.2";
623 maxCudaVersion = "10.2";
724 }
825 {
926 archName = "Kepler";
1027 computeCapability = "3.2";
1128 minCudaVersion = "10.0";
2929+ dontDefaultAfter = "10.2";
1230 maxCudaVersion = "10.2";
1331 }
1432 {
1533 archName = "Kepler";
1634 computeCapability = "3.5";
1735 minCudaVersion = "10.0";
3636+ dontDefaultAfter = "11.0";
1837 maxCudaVersion = "11.8";
1938 }
2039 {
2140 archName = "Kepler";
2241 computeCapability = "3.7";
2342 minCudaVersion = "10.0";
4343+ dontDefaultAfter = "11.0";
2444 maxCudaVersion = "11.8";
2545 }
2646 {
2747 archName = "Maxwell";
2848 computeCapability = "5.0";
2949 minCudaVersion = "10.0";
3030- maxCudaVersion = "12.0";
5050+ dontDefaultAfter = "11.0";
5151+ maxCudaVersion = null;
3152 }
3253 {
3354 archName = "Maxwell";
3455 computeCapability = "5.2";
3556 minCudaVersion = "10.0";
3636- maxCudaVersion = "12.0";
5757+ dontDefaultAfter = "11.0";
5858+ maxCudaVersion = null;
3759 }
3860 {
3961 archName = "Maxwell";
4062 computeCapability = "5.3";
4163 minCudaVersion = "10.0";
4242- maxCudaVersion = "12.0";
6464+ dontDefaultAfter = "11.0";
6565+ maxCudaVersion = null;
4366 }
4467 {
4568 archName = "Pascal";
4669 computeCapability = "6.0";
4770 minCudaVersion = "10.0";
4848- maxCudaVersion = "12.0";
7171+ dontDefaultAfter = null;
7272+ maxCudaVersion = null;
4973 }
5074 {
5175 archName = "Pascal";
5276 computeCapability = "6.1";
5377 minCudaVersion = "10.0";
5454- maxCudaVersion = "12.0";
7878+ dontDefaultAfter = null;
7979+ maxCudaVersion = null;
5580 }
5681 {
5782 archName = "Pascal";
5883 computeCapability = "6.2";
5984 minCudaVersion = "10.0";
6060- maxCudaVersion = "12.0";
8585+ dontDefaultAfter = null;
8686+ maxCudaVersion = null;
6187 }
6288 {
6389 archName = "Volta";
6490 computeCapability = "7.0";
6591 minCudaVersion = "10.0";
6666- maxCudaVersion = "12.0";
9292+ dontDefaultAfter = null;
9393+ maxCudaVersion = null;
6794 }
6895 {
6996 archName = "Volta";
7097 computeCapability = "7.2";
7198 minCudaVersion = "10.0";
7272- maxCudaVersion = "12.0";
9999+ dontDefaultAfter = null;
100100+ maxCudaVersion = null;
73101 }
74102 {
75103 archName = "Turing";
76104 computeCapability = "7.5";
77105 minCudaVersion = "10.0";
7878- maxCudaVersion = "12.0";
106106+ dontDefaultAfter = null;
107107+ maxCudaVersion = null;
79108 }
80109 {
81110 archName = "Ampere";
82111 computeCapability = "8.0";
83112 minCudaVersion = "11.2";
8484- maxCudaVersion = "12.0";
113113+ dontDefaultAfter = null;
114114+ maxCudaVersion = null;
85115 }
86116 {
87117 archName = "Ampere";
88118 computeCapability = "8.6";
89119 minCudaVersion = "11.2";
9090- maxCudaVersion = "12.0";
120120+ dontDefaultAfter = null;
121121+ maxCudaVersion = null;
91122 }
92123 {
93124 archName = "Ampere";
94125 computeCapability = "8.7";
95126 minCudaVersion = "11.5";
9696- maxCudaVersion = "12.0";
127127+ # NOTE: This is purposefully before 11.5 to ensure it is never a capability we target by
128128+ # default. 8.7 is the Jetson Orin series of devices which are a very specific platform.
129129+ # We keep this entry here in case we ever want to target it explicitly, but we don't
130130+ # want to target it by default.
131131+ dontDefaultAfter = "11.4";
132132+ maxCudaVersion = null;
97133 }
98134 {
99135 archName = "Ada";
100136 computeCapability = "8.9";
101137 minCudaVersion = "11.8";
102102- maxCudaVersion = "12.0";
138138+ dontDefaultAfter = null;
139139+ maxCudaVersion = null;
103140 }
104141 {
105142 archName = "Hopper";
106143 computeCapability = "9.0";
107144 minCudaVersion = "11.8";
108108- maxCudaVersion = "12.0";
145145+ dontDefaultAfter = null;
146146+ maxCudaVersion = null;
109147 }
110148]
···4141 # uses the last --compiler-bindir it gets on the command line.
4242 # FIXME: this results in "incompatible redefinition" warnings.
4343 # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
4444+ # NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the
4545+ # compiled binaries. If binaries grow over 2GB, they will fail to link. This is a problem
4646+ # for us, as the default set of CUDA capabilities we build can regularly cause this to
4747+ # occur (for example, with Magma).
4448 postInstall = (oldAttrs.postInstall or "") + ''
4549 mkdir -p $out/nix-support
4650 cat <<EOF >> $out/nix-support/setup-hook
···4953 if [ -z "\''${CUDAHOSTCXX-}" ]; then
5054 export CUDAHOSTCXX=${cc}/bin;
5155 fi
5252- export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin'
5656+ export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin -Xfatbin=-compress-all'
5357 EOF
5458 '';
5559 });