···7272 env.autoPatchelfIgnoreMissingDeps =
7373 prevAttrs.env.autoPatchelfIgnoreMissingDeps + " libnvrm_gpu.so libnvrm_mem.so libnvdla_runtime.so";
7474 # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices.
7575- brokenConditions = prevAttrs.brokenConditions // {
7575+ badPlatformsConditions = prevAttrs.badPlatformsConditions // {
7676 "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" =
7777 !final.flags.isJetsonBuild;
7878 };
+13-5
pkgs/development/cuda-modules/cudnn/shims.nix
···11# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
22-{package, redistArch}:
32{
44- featureRelease.${redistArch}.outputs = {
55- lib = true;
66- static = true;
77- dev = true;
33+ lib,
44+ package,
55+ # redistArch :: String
66+ # String is "unsupported" if the given architecture is unsupported.
77+ redistArch,
88+}:
99+{
1010+ featureRelease = lib.optionalAttrs (redistArch != "unsupported") {
1111+ ${redistArch}.outputs = {
1212+ lib = true;
1313+ static = true;
1414+ dev = true;
1515+ };
816 };
917 redistribRelease = {
1018 name = "NVIDIA CUDA Deep Neural Network library (cuDNN)";
···9292 # A release is supported if it has a libPath that matches our CUDA version for our platform.
9393 # LibPath are not constant across the same release -- one platform may support fewer
9494 # CUDA versions than another.
9595+ # redistArch :: String
9596 redistArch = flags.getRedistArch hostPlatform.system;
9697 # platformIsSupported :: Manifests -> Boolean
9798 platformIsSupported =
+20-30
pkgs/development/cuda-modules/flags.nix
···131131 # `linux-aarch64` redist (which is for Jetson devices) if we're building any Jetson devices.
132132 # Since both are based on aarch64, we can only have one or the other, otherwise there's an
133133 # ambiguity as to which should be used.
134134+ # NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of
135135+ # `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported
136136+ # systems gracefully.
134137 # getRedistArch :: String -> String
135135- getRedistArch =
136136- nixSystem:
137137- if nixSystem == "aarch64-linux" then
138138- if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa"
139139- else if nixSystem == "x86_64-linux" then
140140- "linux-x86_64"
141141- else if nixSystem == "ppc64le-linux" then
142142- "linux-ppc64le"
143143- else if nixSystem == "x86_64-windows" then
144144- "windows-x86_64"
145145- else
146146- "unsupported";
138138+ getRedistArch = nixSystem: attrsets.attrByPath [ nixSystem ] "unsupported" {
139139+ aarch64-linux = if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa";
140140+ x86_64-linux = "linux-x86_64";
141141+ ppc64le-linux = "linux-ppc64le";
142142+ x86_64-windows = "windows-x86_64";
143143+ };
147144148145 # Maps NVIDIA redist arch to Nix system.
149149- # It is imperative that we include the boolean condition based on jetsonTargets to ensure
150150- # we don't advertise availability of packages only available on server-grade ARM
151151- # as being available for the Jetson, since both `linux-sbsa` and `linux-aarch64` are
152152- # mapped to the Nix system `aarch64-linux`.
153153- getNixSystem =
154154- redistArch:
155155- if redistArch == "linux-sbsa" && jetsonTargets == [] then
156156- "aarch64-linux"
157157- else if redistArch == "linux-aarch64" && jetsonTargets != [] then
158158- "aarch64-linux"
159159- else if redistArch == "linux-x86_64" then
160160- "x86_64-linux"
161161- else if redistArch == "linux-ppc64le" then
162162- "ppc64le-linux"
163163- else if redistArch == "windows-x86_64" then
164164- "x86_64-windows"
165165- else
166166- "unsupported-${redistArch}";
146146+ # NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of
147147+ # `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported
148148+ # systems gracefully.
149149+ # getNixSystem :: String -> String
150150+ getNixSystem = redistArch: attrsets.attrByPath [ redistArch ] "unsupported-${redistArch}" {
151151+ linux-sbsa = "aarch64-linux";
152152+ linux-aarch64 = "aarch64-linux";
153153+ linux-x86_64 = "x86_64-linux";
154154+ linux-ppc64le = "ppc64le-linux";
155155+ windows-x86_64 = "x86_64-windows";
156156+ };
167157168158 formatCapabilities =
169159 {
···4343 # Get the redist architectures for which package provides distributables.
4444 # These are used by meta.platforms.
4545 supportedRedistArchs = builtins.attrNames featureRelease;
4646+ # redistArch :: String
4747+ # The redistArch is the name of the architecture for which the redistributable is built.
4848+ # It is `"unsupported"` if the redistributable is not supported on the target platform.
4649 redistArch = flags.getRedistArch hostPlatform.system;
4750in
4851backendStdenv.mkDerivation (
···8790 "sample"
8891 "python"
8992 ];
9393+ # Filter out outputs that don't exist in the redistributable.
9494+ # NOTE: In the case the redistributable isn't supported on the target platform,
9595+ # we will have `outputs = [ "out" ] ++ possibleOutputs`. This is of note because platforms which
9696+ # aren't supported would otherwise have evaluation errors when trying to access outputs other than `out`.
9797+ # The alternative would be to have `outputs = [ "out" ]` when`redistArch = "unsupported"`, but that would
9898+ # require adding guards throughout the entirety of the CUDA package set to ensure `cudaSupport` is true --
9999+ # recall that OfBorg will evaluate packages marked as broken and that `cudaPackages` will be evaluated with
100100+ # `cudaSupport = false`!
90101 additionalOutputs =
9191- if redistArch == "unsupported" then possibleOutputs else builtins.filter hasOutput possibleOutputs;
102102+ if redistArch == "unsupported"
103103+ then possibleOutputs
104104+ else builtins.filter hasOutput possibleOutputs;
92105 # The out output is special -- it's the default output and we always include it.
93106 outputs = [ "out" ] ++ additionalOutputs;
94107 in
···112125 python = ["**/*.whl"];
113126 };
114127115115- # Useful for introspecting why something went wrong.
116116- # Maps descriptions of why the derivation would be marked broken to
117117- # booleans indicating whether that description is true.
118118- brokenConditions = {};
128128+ # Useful for introspecting why something went wrong. Maps descriptions of why the derivation would be marked as
129129+ # broken on have badPlatforms include the current platform.
119130120120- src = fetchurl {
121121- url =
122122- if (builtins.hasAttr redistArch redistribRelease) then
123123- "https://developer.download.nvidia.com/compute/${redistName}/redist/${
124124- redistribRelease.${redistArch}.relative_path
125125- }"
126126- else
127127- "cannot-construct-an-url-for-the-${redistArch}-platform";
128128- sha256 = redistribRelease.${redistArch}.sha256 or lib.fakeHash;
129129- };
131131+ # brokenConditions :: AttrSet Bool
132132+ # Sets `meta.broken = true` if any of the conditions are true.
133133+ # Example: Broken on a specific version of CUDA or when a dependency has a specific version.
134134+ brokenConditions = { };
135135+136136+ # badPlatformsConditions :: AttrSet Bool
137137+ # Sets `meta.badPlatforms = meta.platforms` if any of the conditions are true.
138138+ # Example: Broken on a specific architecture when some condition is met (like targeting Jetson).
139139+ badPlatformsConditions = { };
140140+141141+ # src :: Optional Derivation
142142+ src = trivial.pipe redistArch [
143143+ # If redistArch doesn't exist in redistribRelease, return null.
144144+ (redistArch: redistribRelease.${redistArch} or null)
145145+ # If the release is non-null, fetch the source; otherwise, return null.
146146+ (trivial.mapNullable (
147147+ { relative_path, sha256, ... }:
148148+ fetchurl {
149149+ url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}";
150150+ inherit sha256;
151151+ }
152152+ ))
153153+ ];
130154131155 # Handle the pkg-config files:
132156 # 1. No FHS
···297321 meta = {
298322 description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}";
299323 sourceProvenance = [sourceTypes.binaryNativeCode];
300300- platforms =
301301- lists.concatMap
302302- (
303303- redistArch:
304304- let
305305- nixSystem = flags.getNixSystem redistArch;
306306- in
307307- lists.optionals (!(strings.hasPrefix "unsupported-" nixSystem)) [ nixSystem ]
308308- )
309309- supportedRedistArchs;
310324 broken = lists.any trivial.id (attrsets.attrValues finalAttrs.brokenConditions);
325325+ platforms = trivial.pipe supportedRedistArchs [
326326+ # Map each redist arch to the equivalent nix system or null if there is no equivalent.
327327+ (builtins.map flags.getNixSystem)
328328+ # Filter out unsupported systems
329329+ (builtins.filter (nixSystem: !(strings.hasPrefix "unsupported-" nixSystem)))
330330+ ];
331331+ badPlatforms =
332332+ let
333333+ isBadPlatform = lists.any trivial.id (attrsets.attrValues finalAttrs.badPlatformsConditions);
334334+ in
335335+ lists.optionals isBadPlatform finalAttrs.meta.platforms;
311336 license = licenses.unfree;
312337 maintainers = teams.cuda.members;
313338 # Force the use of the default, fat output by default (even though `dev` exists, which
···2020 # The featureRelease is used to populate meta.platforms (by way of looking at the attribute names)
2121 # and to determine the outputs of the package.
2222 # shimFn :: {package, redistArch} -> AttrSet
2323- shimsFn ? ({package, redistArch}: throw "shimsFn must be provided"),
2323+ shimsFn ? (throw "shimsFn must be provided"),
2424 # fixupFn :: Path
2525 # A path (or nix expression) to be evaluated with callPackage and then
2626 # provided to the package's overrideAttrs function.
···2929 # - cudaVersion
3030 # - mkVersionedPackageName
3131 # - package
3232- fixupFn ? (
3333- {
3434- final,
3535- cudaVersion,
3636- mkVersionedPackageName,
3737- package,
3838- ...
3939- }:
4040- throw "fixupFn must be provided"
4141- ),
3232+ # - ...
3333+ fixupFn ? (throw "fixupFn must be provided"),
4234}:
4335let
4436 inherit (lib)
···8072 && strings.versionAtLeast package.maxCudaVersion cudaVersion;
81738274 # Get all of the packages for our given platform.
7575+ # redistArch :: String
7676+ # Value is `"unsupported"` if the platform is not supported.
8377 redistArch = flags.getRedistArch hostPlatform.system;
84788585- allReleases = builtins.concatMap (xs: xs) (builtins.attrValues releaseSets);
7979+ allReleases = lists.flatten (builtins.attrValues releaseSets);
86808781 # All the supported packages we can build for our platform.
8882 # perSystemReleases :: List Package
+3
pkgs/development/cuda-modules/nccl/default.nix
···100100 homepage = "https://developer.nvidia.com/nccl";
101101 license = licenses.bsd3;
102102 platforms = platforms.linux;
103103+ # NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication.
104104+ # https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9
105105+ badPlatforms = lib.optionals cudaFlags.isJetsonBuild [ "aarch64-linux" ];
103106 maintainers =
104107 with maintainers;
105108 [
+7-8
pkgs/development/cuda-modules/tensorrt/fixup.nix
···1111}:
1212let
1313 inherit (lib)
1414+ attrsets
1415 maintainers
1516 meta
1617 strings
1718 versions
1819 ;
1919- targetArch =
2020- if hostPlatform.isx86_64 then
2121- "x86_64-linux-gnu"
2222- else if hostPlatform.isAarch64 then
2323- "aarch64-linux-gnu"
2424- else
2525- "unsupported";
2020+ # targetArch :: String
2121+ targetArch = attrsets.attrByPath [ hostPlatform.system ] "unsupported" {
2222+ x86_64-linux = "x86_64-linux-gnu";
2323+ aarch64-linux = "aarch64-linux-gnu";
2424+ };
2625in
2726finalAttrs: prevAttrs: {
2827 # Useful for inspecting why something went wrong.
···69687069 preInstall =
7170 (prevAttrs.preInstall or "")
7272- + ''
7171+ + strings.optionalString (targetArch != "unsupported") ''
7372 # Replace symlinks to bin and lib with the actual directories from targets.
7473 for dir in bin lib; do
7574 rm "$dir"
+16-8
pkgs/development/cuda-modules/tensorrt/shims.nix
···11# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
22-{package, redistArch}:
32{
44- featureRelease.${redistArch}.outputs = {
55- bin = true;
66- lib = true;
77- static = true;
88- dev = true;
99- sample = true;
1010- python = true;
33+ lib,
44+ package,
55+ # redistArch :: String
66+ # String is `"unsupported"` if the given architecture is unsupported.
77+ redistArch,
88+}:
99+{
1010+ featureRelease = lib.optionalAttrs (redistArch != "unsupported") {
1111+ ${redistArch}.outputs = {
1212+ bin = true;
1313+ lib = true;
1414+ static = true;
1515+ dev = true;
1616+ sample = true;
1717+ python = true;
1818+ };
1119 };
1220 redistribRelease = {
1321 name = "TensorRT: a high-performance deep learning interface";
···1414, rPackages
1515}@inputs:
16161717-assert ncclSupport -> cudaSupport;
1717+assert ncclSupport -> (cudaSupport && !cudaPackages.nccl.meta.unsupported);
1818# Disable regular tests when building the R package
1919# because 1) the R package runs its own tests and
2020# 2) the R package creates a different binary shared
···6464 # aarch64-darwin is broken because of https://github.com/bazelbuild/rules_cc/pull/136
6565 # however even with that fix applied, it doesn't work for everyone:
6666 # https://github.com/NixOS/nixpkgs/pull/184395#issuecomment-1207287129
6767- broken = stdenv.isDarwin;
6767+ # NOTE: We always build with NCCL; if it is unsupported, then our build is broken.
6868+ broken = stdenv.isDarwin || nccl.meta.unsupported;
6869 };
69707071 cudatoolkit_joined = symlinkJoin {
+8-5
pkgs/development/python-modules/torch/default.nix
···77 magma,
88 magma-hip,
99 magma-cuda-static,
1010- useSystemNccl ? true,
1010+ # Use the system NCCL as long as we're targeting CUDA on a supported platform.
1111+ useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported),
1112 MPISupport ? false, mpi,
1213 buildDocs ? false,
1314···273274 PYTORCH_BUILD_VERSION = version;
274275 PYTORCH_BUILD_NUMBER = 0;
275276276276- USE_NCCL = setBool (cudaSupport && cudaPackages ? nccl);
277277- USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL
278278- USE_STATIC_NCCL = setBool useSystemNccl;
277277+ # In-tree builds of NCCL are not supported.
278278+ # Use NCCL when cudaSupport is enabled and nccl is available.
279279+ USE_NCCL = setBool useSystemNccl;
280280+ USE_SYSTEM_NCCL = USE_NCCL;
281281+ USE_STATIC_NCCL = USE_NCCL;
279282280283 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
281284 # (upstream seems to have fixed this in the wrong place?)
···363366 ] ++ lists.optionals (cudaPackages ? cudnn) [
364367 cudnn.dev
365368 cudnn.lib
366366- ] ++ lists.optionals (useSystemNccl && cudaPackages ? nccl) [
369369+ ] ++ lists.optionals useSystemNccl [
367370 # Some platforms do not support NCCL (i.e., Jetson)
368371 nccl.dev # Provides nccl.h AND a static copy of NCCL!
369372 ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
+1-8
pkgs/top-level/cuda-packages.nix
···72727373 # Loose packages
7474 cudatoolkit = final.callPackage ../development/cuda-modules/cudatoolkit {};
7575- # SaxPy is only available after 11.4 because it requires redistributable versions of CUDA libraries.
7676- saxpy = attrsets.optionalAttrs (strings.versionAtLeast cudaVersion "11.4") (
7777- final.callPackage ../development/cuda-modules/saxpy {}
7878- );
7979- }
8080- # NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication.
8181- # https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9
8282- // attrsets.optionalAttrs (!flags.isJetsonBuild) {
7575+ saxpy = final.callPackage ../development/cuda-modules/saxpy {};
8376 nccl = final.callPackage ../development/cuda-modules/nccl {};
8477 nccl-tests = final.callPackage ../development/cuda-modules/nccl-tests {};
8578 }