···72 env.autoPatchelfIgnoreMissingDeps =
73 prevAttrs.env.autoPatchelfIgnoreMissingDeps + " libnvrm_gpu.so libnvrm_mem.so libnvdla_runtime.so";
74 # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices.
75- brokenConditions = prevAttrs.brokenConditions // {
76 "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" =
77 !final.flags.isJetsonBuild;
78 };
···72 env.autoPatchelfIgnoreMissingDeps =
73 prevAttrs.env.autoPatchelfIgnoreMissingDeps + " libnvrm_gpu.so libnvrm_mem.so libnvdla_runtime.so";
74 # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices.
75+ badPlatformsConditions = prevAttrs.badPlatformsConditions // {
76 "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" =
77 !final.flags.isJetsonBuild;
78 };
+13-5
pkgs/development/cuda-modules/cudnn/shims.nix
···1# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
2-{package, redistArch}:
3{
4- featureRelease.${redistArch}.outputs = {
5- lib = true;
6- static = true;
7- dev = true;
0000000008 };
9 redistribRelease = {
10 name = "NVIDIA CUDA Deep Neural Network library (cuDNN)";
···1# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
02{
3+ lib,
4+ package,
5+ # redistArch :: String
6+ # String is "unsupported" if the given architecture is unsupported.
7+ redistArch,
8+}:
9+{
10+ featureRelease = lib.optionalAttrs (redistArch != "unsupported") {
11+ ${redistArch}.outputs = {
12+ lib = true;
13+ static = true;
14+ dev = true;
15+ };
16 };
17 redistribRelease = {
18 name = "NVIDIA CUDA Deep Neural Network library (cuDNN)";
···92 # A release is supported if it has a libPath that matches our CUDA version for our platform.
93 # LibPath are not constant across the same release -- one platform may support fewer
94 # CUDA versions than another.
095 redistArch = flags.getRedistArch hostPlatform.system;
96 # platformIsSupported :: Manifests -> Boolean
97 platformIsSupported =
···92 # A release is supported if it has a libPath that matches our CUDA version for our platform.
93 # LibPath are not constant across the same release -- one platform may support fewer
94 # CUDA versions than another.
95+ # redistArch :: String
96 redistArch = flags.getRedistArch hostPlatform.system;
97 # platformIsSupported :: Manifests -> Boolean
98 platformIsSupported =
+20-30
pkgs/development/cuda-modules/flags.nix
···131 # `linux-aarch64` redist (which is for Jetson devices) if we're building any Jetson devices.
132 # Since both are based on aarch64, we can only have one or the other, otherwise there's an
133 # ambiguity as to which should be used.
000134 # getRedistArch :: String -> String
135- getRedistArch =
136- nixSystem:
137- if nixSystem == "aarch64-linux" then
138- if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa"
139- else if nixSystem == "x86_64-linux" then
140- "linux-x86_64"
141- else if nixSystem == "ppc64le-linux" then
142- "linux-ppc64le"
143- else if nixSystem == "x86_64-windows" then
144- "windows-x86_64"
145- else
146- "unsupported";
147148 # Maps NVIDIA redist arch to Nix system.
149- # It is imperative that we include the boolean condition based on jetsonTargets to ensure
150- # we don't advertise availability of packages only available on server-grade ARM
151- # as being available for the Jetson, since both `linux-sbsa` and `linux-aarch64` are
152- # mapped to the Nix system `aarch64-linux`.
153- getNixSystem =
154- redistArch:
155- if redistArch == "linux-sbsa" && jetsonTargets == [] then
156- "aarch64-linux"
157- else if redistArch == "linux-aarch64" && jetsonTargets != [] then
158- "aarch64-linux"
159- else if redistArch == "linux-x86_64" then
160- "x86_64-linux"
161- else if redistArch == "linux-ppc64le" then
162- "ppc64le-linux"
163- else if redistArch == "windows-x86_64" then
164- "x86_64-windows"
165- else
166- "unsupported-${redistArch}";
167168 formatCapabilities =
169 {
···131 # `linux-aarch64` redist (which is for Jetson devices) if we're building any Jetson devices.
132 # Since both are based on aarch64, we can only have one or the other, otherwise there's an
133 # ambiguity as to which should be used.
134+ # NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of
135+ # `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported
136+ # systems gracefully.
137 # getRedistArch :: String -> String
138+ getRedistArch = nixSystem: attrsets.attrByPath [ nixSystem ] "unsupported" {
139+ aarch64-linux = if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa";
140+ x86_64-linux = "linux-x86_64";
141+ ppc64le-linux = "linux-ppc64le";
142+ x86_64-windows = "windows-x86_64";
143+ };
000000144145 # Maps NVIDIA redist arch to Nix system.
146+ # NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of
147+ # `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported
148+ # systems gracefully.
149+ # getNixSystem :: String -> String
150+ getNixSystem = redistArch: attrsets.attrByPath [ redistArch ] "unsupported-${redistArch}" {
151+ linux-sbsa = "aarch64-linux";
152+ linux-aarch64 = "aarch64-linux";
153+ linux-x86_64 = "x86_64-linux";
154+ linux-ppc64le = "ppc64le-linux";
155+ windows-x86_64 = "x86_64-windows";
156+ };
0000000157158 formatCapabilities =
159 {
···43 # Get the redist architectures for which package provides distributables.
44 # These are used by meta.platforms.
45 supportedRedistArchs = builtins.attrNames featureRelease;
00046 redistArch = flags.getRedistArch hostPlatform.system;
47in
48backendStdenv.mkDerivation (
···87 "sample"
88 "python"
89 ];
0000000090 additionalOutputs =
91- if redistArch == "unsupported" then possibleOutputs else builtins.filter hasOutput possibleOutputs;
0092 # The out output is special -- it's the default output and we always include it.
93 outputs = [ "out" ] ++ additionalOutputs;
94 in
···112 python = ["**/*.whl"];
113 };
114115- # Useful for introspecting why something went wrong.
116- # Maps descriptions of why the derivation would be marked broken to
117- # booleans indicating whether that description is true.
118- brokenConditions = {};
119120- src = fetchurl {
121- url =
122- if (builtins.hasAttr redistArch redistribRelease) then
123- "https://developer.download.nvidia.com/compute/${redistName}/redist/${
124- redistribRelease.${redistArch}.relative_path
125- }"
126- else
127- "cannot-construct-an-url-for-the-${redistArch}-platform";
128- sha256 = redistribRelease.${redistArch}.sha256 or lib.fakeHash;
129- };
0000000000000130131 # Handle the pkg-config files:
132 # 1. No FHS
···297 meta = {
298 description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}";
299 sourceProvenance = [sourceTypes.binaryNativeCode];
300- platforms =
301- lists.concatMap
302- (
303- redistArch:
304- let
305- nixSystem = flags.getNixSystem redistArch;
306- in
307- lists.optionals (!(strings.hasPrefix "unsupported-" nixSystem)) [ nixSystem ]
308- )
309- supportedRedistArchs;
310 broken = lists.any trivial.id (attrsets.attrValues finalAttrs.brokenConditions);
00000000000311 license = licenses.unfree;
312 maintainers = teams.cuda.members;
313 # Force the use of the default, fat output by default (even though `dev` exists, which
···43 # Get the redist architectures for which package provides distributables.
44 # These are used by meta.platforms.
45 supportedRedistArchs = builtins.attrNames featureRelease;
46+ # redistArch :: String
47+ # The redistArch is the name of the architecture for which the redistributable is built.
48+ # It is `"unsupported"` if the redistributable is not supported on the target platform.
49 redistArch = flags.getRedistArch hostPlatform.system;
50in
51backendStdenv.mkDerivation (
···90 "sample"
91 "python"
92 ];
93+ # Filter out outputs that don't exist in the redistributable.
94+ # NOTE: In the case the redistributable isn't supported on the target platform,
95+ # we will have `outputs = [ "out" ] ++ possibleOutputs`. This is of note because platforms which
96+ # aren't supported would otherwise have evaluation errors when trying to access outputs other than `out`.
97+ # The alternative would be to have `outputs = [ "out" ]` when`redistArch = "unsupported"`, but that would
98+ # require adding guards throughout the entirety of the CUDA package set to ensure `cudaSupport` is true --
99+ # recall that OfBorg will evaluate packages marked as broken and that `cudaPackages` will be evaluated with
100+ # `cudaSupport = false`!
101 additionalOutputs =
102+ if redistArch == "unsupported"
103+ then possibleOutputs
104+ else builtins.filter hasOutput possibleOutputs;
105 # The out output is special -- it's the default output and we always include it.
106 outputs = [ "out" ] ++ additionalOutputs;
107 in
···125 python = ["**/*.whl"];
126 };
127128+ # Useful for introspecting why something went wrong. Maps descriptions of why the derivation would be marked as
129+ # broken on have badPlatforms include the current platform.
00130131+ # brokenConditions :: AttrSet Bool
132+ # Sets `meta.broken = true` if any of the conditions are true.
133+ # Example: Broken on a specific version of CUDA or when a dependency has a specific version.
134+ brokenConditions = { };
135+136+ # badPlatformsConditions :: AttrSet Bool
137+ # Sets `meta.badPlatforms = meta.platforms` if any of the conditions are true.
138+ # Example: Broken on a specific architecture when some condition is met (like targeting Jetson).
139+ badPlatformsConditions = { };
140+141+ # src :: Optional Derivation
142+ src = trivial.pipe redistArch [
143+ # If redistArch doesn't exist in redistribRelease, return null.
144+ (redistArch: redistribRelease.${redistArch} or null)
145+ # If the release is non-null, fetch the source; otherwise, return null.
146+ (trivial.mapNullable (
147+ { relative_path, sha256, ... }:
148+ fetchurl {
149+ url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}";
150+ inherit sha256;
151+ }
152+ ))
153+ ];
154155 # Handle the pkg-config files:
156 # 1. No FHS
···321 meta = {
322 description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}";
323 sourceProvenance = [sourceTypes.binaryNativeCode];
0000000000324 broken = lists.any trivial.id (attrsets.attrValues finalAttrs.brokenConditions);
325+ platforms = trivial.pipe supportedRedistArchs [
326+ # Map each redist arch to the equivalent nix system or null if there is no equivalent.
327+ (builtins.map flags.getNixSystem)
328+ # Filter out unsupported systems
329+ (builtins.filter (nixSystem: !(strings.hasPrefix "unsupported-" nixSystem)))
330+ ];
331+ badPlatforms =
332+ let
333+ isBadPlatform = lists.any trivial.id (attrsets.attrValues finalAttrs.badPlatformsConditions);
334+ in
335+ lists.optionals isBadPlatform finalAttrs.meta.platforms;
336 license = licenses.unfree;
337 maintainers = teams.cuda.members;
338 # Force the use of the default, fat output by default (even though `dev` exists, which
···20 # The featureRelease is used to populate meta.platforms (by way of looking at the attribute names)
21 # and to determine the outputs of the package.
22 # shimFn :: {package, redistArch} -> AttrSet
23- shimsFn ? ({package, redistArch}: throw "shimsFn must be provided"),
24 # fixupFn :: Path
25 # A path (or nix expression) to be evaluated with callPackage and then
26 # provided to the package's overrideAttrs function.
···29 # - cudaVersion
30 # - mkVersionedPackageName
31 # - package
32- fixupFn ? (
33- {
34- final,
35- cudaVersion,
36- mkVersionedPackageName,
37- package,
38- ...
39- }:
40- throw "fixupFn must be provided"
41- ),
42}:
43let
44 inherit (lib)
···80 && strings.versionAtLeast package.maxCudaVersion cudaVersion;
8182 # Get all of the packages for our given platform.
0083 redistArch = flags.getRedistArch hostPlatform.system;
8485- allReleases = builtins.concatMap (xs: xs) (builtins.attrValues releaseSets);
8687 # All the supported packages we can build for our platform.
88 # perSystemReleases :: List Package
···20 # The featureRelease is used to populate meta.platforms (by way of looking at the attribute names)
21 # and to determine the outputs of the package.
22 # shimFn :: {package, redistArch} -> AttrSet
23+ shimsFn ? (throw "shimsFn must be provided"),
24 # fixupFn :: Path
25 # A path (or nix expression) to be evaluated with callPackage and then
26 # provided to the package's overrideAttrs function.
···29 # - cudaVersion
30 # - mkVersionedPackageName
31 # - package
32+ # - ...
33+ fixupFn ? (throw "fixupFn must be provided"),
0000000034}:
35let
36 inherit (lib)
···72 && strings.versionAtLeast package.maxCudaVersion cudaVersion;
7374 # Get all of the packages for our given platform.
75+ # redistArch :: String
76+ # Value is `"unsupported"` if the platform is not supported.
77 redistArch = flags.getRedistArch hostPlatform.system;
7879+ allReleases = lists.flatten (builtins.attrValues releaseSets);
8081 # All the supported packages we can build for our platform.
82 # perSystemReleases :: List Package
···100 homepage = "https://developer.nvidia.com/nccl";
101 license = licenses.bsd3;
102 platforms = platforms.linux;
103+ # NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication.
104+ # https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9
105+ badPlatforms = lib.optionals cudaFlags.isJetsonBuild [ "aarch64-linux" ];
106 maintainers =
107 with maintainers;
108 [
+7-8
pkgs/development/cuda-modules/tensorrt/fixup.nix
···11}:
12let
13 inherit (lib)
014 maintainers
15 meta
16 strings
17 versions
18 ;
19- targetArch =
20- if hostPlatform.isx86_64 then
21- "x86_64-linux-gnu"
22- else if hostPlatform.isAarch64 then
23- "aarch64-linux-gnu"
24- else
25- "unsupported";
26in
27finalAttrs: prevAttrs: {
28 # Useful for inspecting why something went wrong.
···6970 preInstall =
71 (prevAttrs.preInstall or "")
72- + ''
73 # Replace symlinks to bin and lib with the actual directories from targets.
74 for dir in bin lib; do
75 rm "$dir"
···11}:
12let
13 inherit (lib)
14+ attrsets
15 maintainers
16 meta
17 strings
18 versions
19 ;
20+ # targetArch :: String
21+ targetArch = attrsets.attrByPath [ hostPlatform.system ] "unsupported" {
22+ x86_64-linux = "x86_64-linux-gnu";
23+ aarch64-linux = "aarch64-linux-gnu";
24+ };
0025in
26finalAttrs: prevAttrs: {
27 # Useful for inspecting why something went wrong.
···6869 preInstall =
70 (prevAttrs.preInstall or "")
71+ + strings.optionalString (targetArch != "unsupported") ''
72 # Replace symlinks to bin and lib with the actual directories from targets.
73 for dir in bin lib; do
74 rm "$dir"
+16-8
pkgs/development/cuda-modules/tensorrt/shims.nix
···1# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
2-{package, redistArch}:
3{
4- featureRelease.${redistArch}.outputs = {
5- bin = true;
6- lib = true;
7- static = true;
8- dev = true;
9- sample = true;
10- python = true;
00000000011 };
12 redistribRelease = {
13 name = "TensorRT: a high-performance deep learning interface";
···1# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
02{
3+ lib,
4+ package,
5+ # redistArch :: String
6+ # String is `"unsupported"` if the given architecture is unsupported.
7+ redistArch,
8+}:
9+{
10+ featureRelease = lib.optionalAttrs (redistArch != "unsupported") {
11+ ${redistArch}.outputs = {
12+ bin = true;
13+ lib = true;
14+ static = true;
15+ dev = true;
16+ sample = true;
17+ python = true;
18+ };
19 };
20 redistribRelease = {
21 name = "TensorRT: a high-performance deep learning interface";
···14, rPackages
15}@inputs:
1617-assert ncclSupport -> cudaSupport;
18# Disable regular tests when building the R package
19# because 1) the R package runs its own tests and
20# 2) the R package creates a different binary shared
···14, rPackages
15}@inputs:
1617+assert ncclSupport -> (cudaSupport && !cudaPackages.nccl.meta.unsupported);
18# Disable regular tests when building the R package
19# because 1) the R package runs its own tests and
20# 2) the R package creates a different binary shared
···64 # aarch64-darwin is broken because of https://github.com/bazelbuild/rules_cc/pull/136
65 # however even with that fix applied, it doesn't work for everyone:
66 # https://github.com/NixOS/nixpkgs/pull/184395#issuecomment-1207287129
67- broken = stdenv.isDarwin;
068 };
6970 cudatoolkit_joined = symlinkJoin {
···64 # aarch64-darwin is broken because of https://github.com/bazelbuild/rules_cc/pull/136
65 # however even with that fix applied, it doesn't work for everyone:
66 # https://github.com/NixOS/nixpkgs/pull/184395#issuecomment-1207287129
67+ # NOTE: We always build with NCCL; if it is unsupported, then our build is broken.
68+ broken = stdenv.isDarwin || nccl.meta.unsupported;
69 };
7071 cudatoolkit_joined = symlinkJoin {
+8-5
pkgs/development/python-modules/torch/default.nix
···7 magma,
8 magma-hip,
9 magma-cuda-static,
10- useSystemNccl ? true,
011 MPISupport ? false, mpi,
12 buildDocs ? false,
13···273 PYTORCH_BUILD_VERSION = version;
274 PYTORCH_BUILD_NUMBER = 0;
275276- USE_NCCL = setBool (cudaSupport && cudaPackages ? nccl);
277- USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL
278- USE_STATIC_NCCL = setBool useSystemNccl;
00279280 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
281 # (upstream seems to have fixed this in the wrong place?)
···363 ] ++ lists.optionals (cudaPackages ? cudnn) [
364 cudnn.dev
365 cudnn.lib
366- ] ++ lists.optionals (useSystemNccl && cudaPackages ? nccl) [
367 # Some platforms do not support NCCL (i.e., Jetson)
368 nccl.dev # Provides nccl.h AND a static copy of NCCL!
369 ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
···7 magma,
8 magma-hip,
9 magma-cuda-static,
10+ # Use the system NCCL as long as we're targeting CUDA on a supported platform.
11+ useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported),
12 MPISupport ? false, mpi,
13 buildDocs ? false,
14···274 PYTORCH_BUILD_VERSION = version;
275 PYTORCH_BUILD_NUMBER = 0;
276277+ # In-tree builds of NCCL are not supported.
278+ # Use NCCL when cudaSupport is enabled and nccl is available.
279+ USE_NCCL = setBool useSystemNccl;
280+ USE_SYSTEM_NCCL = USE_NCCL;
281+ USE_STATIC_NCCL = USE_NCCL;
282283 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
284 # (upstream seems to have fixed this in the wrong place?)
···366 ] ++ lists.optionals (cudaPackages ? cudnn) [
367 cudnn.dev
368 cudnn.lib
369+ ] ++ lists.optionals useSystemNccl [
370 # Some platforms do not support NCCL (i.e., Jetson)
371 nccl.dev # Provides nccl.h AND a static copy of NCCL!
372 ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
+1-8
pkgs/top-level/cuda-packages.nix
···7273 # Loose packages
74 cudatoolkit = final.callPackage ../development/cuda-modules/cudatoolkit {};
75- # SaxPy is only available after 11.4 because it requires redistributable versions of CUDA libraries.
76- saxpy = attrsets.optionalAttrs (strings.versionAtLeast cudaVersion "11.4") (
77- final.callPackage ../development/cuda-modules/saxpy {}
78- );
79- }
80- # NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication.
81- # https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9
82- // attrsets.optionalAttrs (!flags.isJetsonBuild) {
83 nccl = final.callPackage ../development/cuda-modules/nccl {};
84 nccl-tests = final.callPackage ../development/cuda-modules/nccl-tests {};
85 }