···3232 }});
3333in callPackage { inherit cudaPackages; };
3434```
3535+3636+The CUDA NVCC compiler requires flags to determine which hardware you
3737+want to target for in terms of SASS (real hardware) or PTX (JIT kernels).
3838+3939+Nixpkgs tries to target support real architecture defaults based on the
4040+CUDA toolkit version with PTX support for future hardware. Experienced
4141+users may optmize this configuration for a variety of reasons such as
4242+reducing binary size and compile time, supporting legacy hardware, or
4343+optimizing for specific hardware.
4444+4545+You may provide capabilities to add support or reduce binary size through
4646+`config` using `cudaCapabilities = [ "6.0" "7.0" ];` and
4747+`cudaForwardCompat = true;` if you want PTX support for future hardware.
4848+4949+Please consult [GPUs supported](https://en.wikipedia.org/wiki/CUDA#GPUs_supported)
5050+for your specific card(s).
5151+5252+Library maintainers should consult [NVCC Docs](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/)
5353+and release notes for their software package.
···33 mklDnnSupport ? true, useSystemNccl ? true,
44 MPISupport ? false, mpi,
55 buildDocs ? false,
66- cudaArchList ? null,
7687 # Native build inputs
98 cmake, util-linux, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo,
···3332 isPy3k, pythonOlder }:
34333534let
3636- inherit (cudaPackages) cudatoolkit cudnn nccl;
3535+ inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
3736in
38373938# assert that everything needed for cuda is present and that the correct cuda versions are used
···5251 paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ];
5352 };
54535555- # Give an explicit list of supported architectures for the build, See:
5656- # - pytorch bug report: https://github.com/pytorch/pytorch/issues/23573
5757- # - pytorch-1.2.0 build on nixpks: https://github.com/NixOS/nixpkgs/pull/65041
5858- #
5959- # This list was selected by omitting the TORCH_CUDA_ARCH_LIST parameter,
6060- # observing the fallback option (which selected all architectures known
6161- # from cudatoolkit_10_0, pytorch-1.2, and python-3.6), and doing a binary
6262- # searching to find offending architectures.
6363- #
6464- # NOTE: Because of sandboxing, this derivation can't auto-detect the hardware's
6565- # cuda architecture, so there is also now a problem around new architectures
6666- # not being supported until explicitly added to this derivation.
6767- #
6868- # FIXME: CMake is throwing the following warning on python-1.2:
6969- #
7070- # ```
7171- # CMake Warning at cmake/public/utils.cmake:172 (message):
7272- # In the future we will require one to explicitly pass TORCH_CUDA_ARCH_LIST
7373- # to cmake instead of implicitly setting it as an env variable. This will
7474- # become a FATAL_ERROR in future version of pytorch.
7575- # ```
7676- # If this is causing problems for your build, this derivation may have to strip
7777- # away the standard `buildPythonPackage` and use the
7878- # [*Adjust Build Options*](https://github.com/pytorch/pytorch/tree/v1.2.0#adjust-build-options-optional)
7979- # instructions. This will also add more flexibility around configurations
8080- # (allowing FBGEMM to be built in pytorch-1.1), and may future proof this
8181- # derivation.
8282- brokenArchs = [ "3.0" ]; # this variable is only used as documentation.
8383-8484- cudaCapabilities = rec {
8585- cuda9 = [
8686- "3.5"
8787- "5.0"
8888- "5.2"
8989- "6.0"
9090- "6.1"
9191- "7.0"
9292- "7.0+PTX" # I am getting a "undefined architecture compute_75" on cuda 9
9393- # which leads me to believe this is the final cuda-9-compatible architecture.
9494- ];
9595-9696- cuda10 = cuda9 ++ [
9797- "7.5"
9898- "7.5+PTX" # < most recent architecture as of cudatoolkit_10_0 and pytorch-1.2.0
9999- ];
100100-101101- cuda11 = cuda10 ++ [
102102- "8.0"
103103- "8.0+PTX" # < CUDA toolkit 11.0
104104- "8.6"
105105- "8.6+PTX" # < CUDA toolkit 11.1
106106- ];
107107- };
108108- final_cudaArchList =
109109- if !cudaSupport || cudaArchList != null
110110- then cudaArchList
111111- else cudaCapabilities."cuda${lib.versions.major cudatoolkit.version}";
112112-11354 # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via
11455 # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub
11556 # libcuda.so from cudatoolkit for running tests, so that we don’t have
···15394 ];
1549515596 preConfigure = lib.optionalString cudaSupport ''
156156- export TORCH_CUDA_ARCH_LIST="${lib.strings.concatStringsSep ";" final_cudaArchList}"
9797+ export TORCH_CUDA_ARCH_LIST="${cudaFlags.cudaCapabilitiesSemiColonString}"
15798 export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
15899 '' + lib.optionalString (cudaSupport && cudnn != null) ''
159100 export CUDNN_INCLUDE_DIR=${cudnn}/include
···308249309250 passthru = {
310251 inherit cudaSupport cudaPackages;
311311- cudaArchList = final_cudaArchList;
312252 # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability.
313253 blasProvider = blas.provider;
314254 };