Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)

tree-wide: cudaPackages should not break default eval

cudaPackages: guard expressions against null values

+119 -98
+1 -1
pkgs/applications/science/math/caffe/default.nix
··· 153 153 || cudaSupport 154 154 || !(leveldbSupport -> (leveldb != null && snappy != null)) 155 155 || !(cudnnSupport -> (hasCudnn && cudaSupport)) 156 - || !(ncclSupport -> cudaSupport) 156 + || !(ncclSupport -> (cudaSupport && !nccl.meta.unsupported)) 157 157 || !(pythonSupport -> (python != null && numpy != null)) 158 158 ; 159 159 license = licenses.bsd2;
+13 -5
pkgs/development/cuda-modules/cudnn/shims.nix
··· 1 1 # Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix 2 - {package, redistArch}: 3 2 { 4 - featureRelease.${redistArch}.outputs = { 5 - lib = true; 6 - static = true; 7 - dev = true; 3 + lib, 4 + package, 5 + # redistArch :: String 6 + # String is "unsupported" if the given architecture is unsupported. 7 + redistArch, 8 + }: 9 + { 10 + featureRelease = lib.optionalAttrs (redistArch != "unsupported") { 11 + ${redistArch}.outputs = { 12 + lib = true; 13 + static = true; 14 + dev = true; 15 + }; 8 16 }; 9 17 redistribRelease = { 10 18 name = "NVIDIA CUDA Deep Neural Network library (cuDNN)";
+1
pkgs/development/cuda-modules/cutensor/extension.nix
··· 92 92 # A release is supported if it has a libPath that matches our CUDA version for our platform. 93 93 # LibPath are not constant across the same release -- one platform may support fewer 94 94 # CUDA versions than another. 95 + # redistArch :: String 95 96 redistArch = flags.getRedistArch hostPlatform.system; 96 97 # platformIsSupported :: Manifests -> Boolean 97 98 platformIsSupported =
+20 -30
pkgs/development/cuda-modules/flags.nix
··· 131 131 # `linux-aarch64` redist (which is for Jetson devices) if we're building any Jetson devices. 132 132 # Since both are based on aarch64, we can only have one or the other, otherwise there's an 133 133 # ambiguity as to which should be used. 134 + # NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of 135 + # `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported 136 + # systems gracefully. 134 137 # getRedistArch :: String -> String 135 - getRedistArch = 136 - nixSystem: 137 - if nixSystem == "aarch64-linux" then 138 - if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa" 139 - else if nixSystem == "x86_64-linux" then 140 - "linux-x86_64" 141 - else if nixSystem == "ppc64le-linux" then 142 - "linux-ppc64le" 143 - else if nixSystem == "x86_64-windows" then 144 - "windows-x86_64" 145 - else 146 - "unsupported"; 138 + getRedistArch = nixSystem: attrsets.attrByPath [ nixSystem ] "unsupported" { 139 + aarch64-linux = if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa"; 140 + x86_64-linux = "linux-x86_64"; 141 + ppc64le-linux = "linux-ppc64le"; 142 + x86_64-windows = "windows-x86_64"; 143 + }; 147 144 148 145 # Maps NVIDIA redist arch to Nix system. 149 - # It is imperative that we include the boolean condition based on jetsonTargets to ensure 150 - # we don't advertise availability of packages only available on server-grade ARM 151 - # as being available for the Jetson, since both `linux-sbsa` and `linux-aarch64` are 152 - # mapped to the Nix system `aarch64-linux`. 153 - getNixSystem = 154 - redistArch: 155 - if redistArch == "linux-sbsa" && jetsonTargets == [] then 156 - "aarch64-linux" 157 - else if redistArch == "linux-aarch64" && jetsonTargets != [] then 158 - "aarch64-linux" 159 - else if redistArch == "linux-x86_64" then 160 - "x86_64-linux" 161 - else if redistArch == "linux-ppc64le" then 162 - "ppc64le-linux" 163 - else if redistArch == "windows-x86_64" then 164 - "x86_64-windows" 165 - else 166 - "unsupported-${redistArch}"; 146 + # NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of 147 + # `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported 148 + # systems gracefully. 149 + # getNixSystem :: String -> String 150 + getNixSystem = redistArch: attrsets.attrByPath [ redistArch ] "unsupported-${redistArch}" { 151 + linux-sbsa = "aarch64-linux"; 152 + linux-aarch64 = "aarch64-linux"; 153 + linux-x86_64 = "x86_64-linux"; 154 + linux-ppc64le = "ppc64le-linux"; 155 + windows-x86_64 = "x86_64-windows"; 156 + }; 167 157 168 158 formatCapabilities = 169 159 {
+40 -22
pkgs/development/cuda-modules/generic-builders/manifest.nix
··· 42 42 # Get the redist architectures for which package provides distributables. 43 43 # These are used by meta.platforms. 44 44 supportedRedistArchs = builtins.attrNames featureRelease; 45 + # redistArch :: String 46 + # The redistArch is the name of the architecture for which the redistributable is built. 47 + # It is `"unsupported"` if the redistributable is not supported on the target platform. 45 48 redistArch = flags.getRedistArch hostPlatform.system; 46 49 in 47 50 backendStdenv.mkDerivation ( ··· 86 89 "sample" 87 90 "python" 88 91 ]; 92 + # Filter out outputs that don't exist in the redistributable. 93 + # NOTE: In the case the redistributable isn't supported on the target platform, 94 + # we will have `outputs = [ "out" ] ++ possibleOutputs`. This is of note because platforms which 95 + # aren't supported would otherwise have evaluation errors when trying to access outputs other than `out`. 96 + # The alternative would be to have `outputs = [ "out" ]` when`redistArch = "unsupported"`, but that would 97 + # require adding guards throughout the entirety of the CUDA package set to ensure `cudaSupport` is true -- 98 + # recall that OfBorg will evaluate packages marked as broken and that `cudaPackages` will be evaluated with 99 + # `cudaSupport = false`! 89 100 additionalOutputs = 90 - if redistArch == "unsupported" then possibleOutputs else builtins.filter hasOutput possibleOutputs; 101 + if redistArch == "unsupported" 102 + then possibleOutputs 103 + else builtins.filter hasOutput possibleOutputs; 91 104 # The out output is special -- it's the default output and we always include it. 92 105 outputs = [ "out" ] ++ additionalOutputs; 93 106 in ··· 114 127 # Useful for introspecting why something went wrong. 115 128 # Maps descriptions of why the derivation would be marked broken to 116 129 # booleans indicating whether that description is true. 117 - brokenConditions = {}; 118 - 119 - src = fetchurl { 120 - url = 121 - if (builtins.hasAttr redistArch redistribRelease) then 122 - "https://developer.download.nvidia.com/compute/${redistName}/redist/${ 123 - redistribRelease.${redistArch}.relative_path 124 - }" 125 - else 126 - "cannot-construct-an-url-for-the-${redistArch}-platform"; 127 - sha256 = redistribRelease.${redistArch}.sha256 or lib.fakeHash; 130 + # brokenConditions :: AttrSet Bool 131 + brokenConditions = { 132 + # Using an unrecognized redistArch 133 + "Unrecognized NixOS platform ${hostPlatform.system}" = redistArch == "unsupported"; 134 + # Trying to build for a platform that doesn't have a redistributable 135 + "Unsupported NixOS platform (or configuration) ${hostPlatform.system}" = finalAttrs.src == null; 128 136 }; 137 + 138 + # src :: Optional Derivation 139 + src = trivial.pipe redistArch [ 140 + # If redistArch doesn't exist in redistribRelease, return null. 141 + (redistArch: redistribRelease.${redistArch} or null) 142 + # If the release is non-null, fetch the source; otherwise, return null. 143 + (trivial.mapNullable ( 144 + { relative_path, sha256, ... }: 145 + fetchurl { 146 + url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}"; 147 + inherit sha256; 148 + } 149 + )) 150 + ]; 129 151 130 152 postPatch = '' 131 153 if [[ -d pkg-config ]] ; then ··· 284 306 meta = { 285 307 description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}"; 286 308 sourceProvenance = [sourceTypes.binaryNativeCode]; 287 - platforms = 288 - lists.concatMap 289 - ( 290 - redistArch: 291 - let 292 - nixSystem = flags.getNixSystem redistArch; 293 - in 294 - lists.optionals (!(strings.hasPrefix "unsupported-" nixSystem)) [ nixSystem ] 295 - ) 296 - supportedRedistArchs; 309 + platforms = trivial.pipe supportedRedistArchs [ 310 + # Map each redist arch to the equivalent nix system or null if there is no equivalent. 311 + (builtins.map flags.getNixSystem) 312 + # Filter out unsupported systems 313 + (builtins.filter (nixSystem: !(strings.hasPrefix "unsupported-" nixSystem))) 314 + ]; 297 315 broken = lists.any trivial.id (attrsets.attrValues finalAttrs.brokenConditions); 298 316 license = licenses.unfree; 299 317 maintainers = teams.cuda.members;
+6 -12
pkgs/development/cuda-modules/generic-builders/multiplex.nix
··· 20 20 # The featureRelease is used to populate meta.platforms (by way of looking at the attribute names) 21 21 # and to determine the outputs of the package. 22 22 # shimFn :: {package, redistArch} -> AttrSet 23 - shimsFn ? ({package, redistArch}: throw "shimsFn must be provided"), 23 + shimsFn ? (throw "shimsFn must be provided"), 24 24 # fixupFn :: Path 25 25 # A path (or nix expression) to be evaluated with callPackage and then 26 26 # provided to the package's overrideAttrs function. ··· 29 29 # - cudaVersion 30 30 # - mkVersionedPackageName 31 31 # - package 32 - fixupFn ? ( 33 - { 34 - final, 35 - cudaVersion, 36 - mkVersionedPackageName, 37 - package, 38 - ... 39 - }: 40 - throw "fixupFn must be provided" 41 - ), 32 + # - ... 33 + fixupFn ? (throw "fixupFn must be provided"), 42 34 }: 43 35 let 44 36 inherit (lib) ··· 80 72 && strings.versionAtLeast package.maxCudaVersion cudaVersion; 81 73 82 74 # Get all of the packages for our given platform. 75 + # redistArch :: String 76 + # Value is `"unsupported"` if the platform is not supported. 83 77 redistArch = flags.getRedistArch hostPlatform.system; 84 78 85 - allReleases = builtins.concatMap (xs: xs) (builtins.attrValues releaseSets); 79 + allReleases = lists.flatten (builtins.attrValues releaseSets); 86 80 87 81 # All the supported packages we can build for our platform. 88 82 # perSystemReleases :: List Package
+3
pkgs/development/cuda-modules/nccl/default.nix
··· 100 100 homepage = "https://developer.nvidia.com/nccl"; 101 101 license = licenses.bsd3; 102 102 platforms = platforms.linux; 103 + # NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication. 104 + # https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9 105 + badPlatforms = lib.optionals cudaFlags.isJetsonBuild [ "aarch64-linux" ]; 103 106 maintainers = 104 107 with maintainers; 105 108 [
+7 -8
pkgs/development/cuda-modules/tensorrt/fixup.nix
··· 11 11 }: 12 12 let 13 13 inherit (lib) 14 + attrsets 14 15 maintainers 15 16 meta 16 17 strings 17 18 versions 18 19 ; 19 - targetArch = 20 - if hostPlatform.isx86_64 then 21 - "x86_64-linux-gnu" 22 - else if hostPlatform.isAarch64 then 23 - "aarch64-linux-gnu" 24 - else 25 - "unsupported"; 20 + # targetArch :: String 21 + targetArch = attrsets.attrByPath [ hostPlatform.system ] "unsupported" { 22 + x86_64-linux = "x86_64-linux-gnu"; 23 + aarch64-linux = "aarch64-linux-gnu"; 24 + }; 26 25 in 27 26 finalAttrs: prevAttrs: { 28 27 # Useful for inspecting why something went wrong. ··· 69 68 70 69 preInstall = 71 70 (prevAttrs.preInstall or "") 72 - + '' 71 + + strings.optionalString (targetArch != "unsupported") '' 73 72 # Replace symlinks to bin and lib with the actual directories from targets. 74 73 for dir in bin lib; do 75 74 rm "$dir"
+16 -8
pkgs/development/cuda-modules/tensorrt/shims.nix
··· 1 1 # Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix 2 - {package, redistArch}: 3 2 { 4 - featureRelease.${redistArch}.outputs = { 5 - bin = true; 6 - lib = true; 7 - static = true; 8 - dev = true; 9 - sample = true; 10 - python = true; 3 + lib, 4 + package, 5 + # redistArch :: String 6 + # String is `"unsupported"` if the given architecture is unsupported. 7 + redistArch, 8 + }: 9 + { 10 + featureRelease = lib.optionalAttrs (redistArch != "unsupported") { 11 + ${redistArch}.outputs = { 12 + bin = true; 13 + lib = true; 14 + static = true; 15 + dev = true; 16 + sample = true; 17 + python = true; 18 + }; 11 19 }; 12 20 redistribRelease = { 13 21 name = "TensorRT: a high-performance deep learning interface";
+1 -1
pkgs/development/libraries/science/math/magma/generic.nix
··· 159 159 description = "Matrix Algebra on GPU and Multicore Architectures"; 160 160 license = licenses.bsd3; 161 161 homepage = "http://icl.cs.utk.edu/magma/index.html"; 162 - platforms = platforms.unix; 162 + platforms = platforms.linux; 163 163 maintainers = with maintainers; [ connorbaker ]; 164 164 165 165 # Cf. https://bitbucket.org/icl/magma/src/fcfe5aa61c1a4c664b36a73ebabbdbab82765e9f/CMakeLists.txt#lines-20
+1 -1
pkgs/development/libraries/xgboost/default.nix
··· 14 14 , rPackages 15 15 }@inputs: 16 16 17 - assert ncclSupport -> cudaSupport; 17 + assert ncclSupport -> (cudaSupport && !cudaPackages.nccl.meta.unsupported); 18 18 # Disable regular tests when building the R package 19 19 # because 1) the R package runs its own tests and 20 20 # 2) the R package creates a different binary shared
+2 -1
pkgs/development/python-modules/jaxlib/default.nix
··· 64 64 # aarch64-darwin is broken because of https://github.com/bazelbuild/rules_cc/pull/136 65 65 # however even with that fix applied, it doesn't work for everyone: 66 66 # https://github.com/NixOS/nixpkgs/pull/184395#issuecomment-1207287129 67 - broken = stdenv.isDarwin; 67 + # NOTE: We always build with NCCL; if it is unsupported, then our build is broken. 68 + broken = stdenv.isDarwin || nccl.meta.unsupported; 68 69 }; 69 70 70 71 cudatoolkit_joined = symlinkJoin {
+8 -5
pkgs/development/python-modules/torch/default.nix
··· 7 7 magma, 8 8 magma-hip, 9 9 magma-cuda-static, 10 - useSystemNccl ? true, 10 + # Use the system NCCL as long as it is supported. 11 + useSystemNccl ? !cudaPackages.nccl.meta.unsupported, 11 12 MPISupport ? false, mpi, 12 13 buildDocs ? false, 13 14 ··· 57 58 let 58 59 inherit (lib) attrsets lists strings trivial; 59 60 inherit (cudaPackages) cudaFlags cudnn nccl; 61 + ncclSupported = cudaSupport && !cudaPackages.nccl.meta.unsupported; 60 62 61 63 setBool = v: if v then "1" else "0"; 62 64 ··· 121 123 "Unsupported CUDA version" = cudaSupport && !(builtins.elem cudaPackages.cudaMajorVersion [ "11" "12" ]); 122 124 "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit); 123 125 "Magma cudaPackages does not match cudaPackages" = cudaSupport && (effectiveMagma.cudaPackages != cudaPackages); 126 + "Requested system NCCL, but cudaPackages.nccl is not supported" = useSystemNccl && !ncclSupported; 124 127 }; 125 128 in buildPythonPackage rec { 126 129 pname = "torch"; ··· 273 276 PYTORCH_BUILD_VERSION = version; 274 277 PYTORCH_BUILD_NUMBER = 0; 275 278 276 - USE_NCCL = setBool (cudaSupport && cudaPackages ? nccl); 277 - USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL 278 - USE_STATIC_NCCL = setBool useSystemNccl; 279 + USE_NCCL = setBool (cudaSupport && ncclSupported); 280 + USE_SYSTEM_NCCL = setBool (cudaSupport && useSystemNccl); # don't build pytorch's third_party NCCL 281 + USE_STATIC_NCCL = setBool (cudaSupport && useSystemNccl); 279 282 280 283 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch 281 284 # (upstream seems to have fixed this in the wrong place?) ··· 363 366 ] ++ lists.optionals (cudaPackages ? cudnn) [ 364 367 cudnn.dev 365 368 cudnn.lib 366 - ] ++ lists.optionals (useSystemNccl && cudaPackages ? nccl) [ 369 + ] ++ lists.optionals (useSystemNccl && ncclSupported) [ 367 370 # Some platforms do not support NCCL (i.e., Jetson) 368 371 nccl.dev # Provides nccl.h AND a static copy of NCCL! 369 372 ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
-4
pkgs/top-level/cuda-packages.nix
··· 73 73 # Loose packages 74 74 cudatoolkit = final.callPackage ../development/cuda-modules/cudatoolkit {}; 75 75 saxpy = final.callPackage ../development/cuda-modules/saxpy {}; 76 - } 77 - # NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication. 78 - # https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9 79 - // attrsets.optionalAttrs (!flags.isJetsonBuild) { 80 76 nccl = final.callPackage ../development/cuda-modules/nccl {}; 81 77 nccl-tests = final.callPackage ../development/cuda-modules/nccl-tests {}; 82 78 }