{
  _cuda,
  backendStdenv,
  cuda_cccl,
  cuda_cudart,
  cuda_nvcc,
  cudaAtLeast,
  cudaNamePrefix,
  fetchFromGitHub,
  flags,
  lib,
  python3,
  removeReferencesTo,
  which,
  # passthru.updateScript
  gitUpdater,
}:
let
  inherit (_cuda.lib) _mkMetaBadPlatforms;
  inherit (backendStdenv) hasJetsonCudaCapability requestedJetsonCudaCapabilities;
  inherit (lib)
    all
    flip
    getAttr
    getBin
    getInclude
    getLib
    licenses
    maintainers
    optionalString
    teams
    versionAtLeast
    versionOlder
    ;
in
backendStdenv.mkDerivation (finalAttrs: {
  __structuredAttrs = true;
  strictDeps = true;

  # NOTE: Depends on the CUDA package set, so use cudaNamePrefix.
  name = "${cudaNamePrefix}-${finalAttrs.pname}-${finalAttrs.version}";
  pname = "nccl";

  # NOTE:
  #   Compilation errors resulting from newer versions of NCCL on older releases of CUDA seem to be caused (mostly)
  #   by differences in assumed version of CCCL: using a newer CCCL with an older release of CUDA can (sometimes) allow
  #   newer versions of NCCL than what we provide here.
  version =
    if cudaAtLeast "11.7" then
      "2.28.7-1"
    else if cudaAtLeast "11.6" then
      "2.26.6-1"
    else
      "2.25.1-1";

  src = fetchFromGitHub {
    owner = "NVIDIA";
    repo = "nccl";
    tag = "v${finalAttrs.version}";
    hash = getAttr finalAttrs.version {
      "2.28.7-1" = "sha256-NM19OiBBGmv3cGoVoRLKSh9Y59hiDoei9NIrRnTqWeA=";
      "2.26.6-1" = "sha256-vkWMGXCy+dIpYCecdafmOAGlnfRxIQ5Y2ZQuMjinraI=";
      "2.25.1-1" = "sha256-3snh0xdL9I5BYqdbqdl+noizJoI38mZRVOJChgEE1I8=";
    };
  };

  outputs = [
    "out"
    "dev"
    "static"
  ];

  nativeBuildInputs = [
    cuda_nvcc
    python3
    removeReferencesTo
    which
  ];

  buildInputs = [
    (getInclude cuda_nvcc)
    cuda_cccl
    cuda_cudart
  ];

  env.NIX_CFLAGS_COMPILE = toString [ "-Wno-unused-function" ];

  postPatch = ''
    patchShebangs ./src/device/generate.py
    patchShebangs ./src/device/symmetric/generate.py

    nixLog "patching $PWD/makefiles/common.mk to remove NVIDIA's ccbin declaration"
    substituteInPlace ./makefiles/common.mk \
      --replace-fail \
        '-ccbin $(CXX)' \
        ""
  ''
  # 2.27.3-1 was the first to introuce CXXSTD
  + optionalString (versionOlder finalAttrs.version "2.27.3-1") ''
    nixLog "patching $PWD/makefiles/common.mk to remove NVIDIA's std hardcoding"
    substituteInPlace ./makefiles/common.mk \
      --replace-fail \
        '-std=c++11' \
        '$(CXXSTD)'
  '';

  # TODO: This would likely break under cross; need to delineate between build and host packages.
  makeFlags = [
    "CXXSTD=-std=c++17"
    "CUDA_HOME=${getBin cuda_nvcc}"
    "CUDA_INC=${getInclude cuda_cudart}/include"
    "CUDA_LIB=${getLib cuda_cudart}/lib"
    "NVCC_GENCODE=${flags.gencodeString}"
    "PREFIX=$(out)"
  ];

  enableParallelBuilding = true;

  postFixup = ''
    _overrideFirst outputStatic "static" "lib" "out"
    moveToOutput lib/libnccl_static.a "''${!outputStatic:?}"
  ''
  # Since CUDA 12.8, the cuda_nvcc path leaks in:
  # - libnccl.so's .nv_fatbin section
  # - libnccl_static.a
  # &devrt -L /nix/store/00000000000000000000000000000000-...nvcc-.../bin/...
  # This string makes cuda_nvcc a runtime dependency of nccl.
  # See https://github.com/NixOS/nixpkgs/pull/457803
  + ''
    remove-references-to -t "${lib.getBin cuda_nvcc}" \
      ''${!outputLib}/lib/libnccl.so.* \
      ''${!outputStatic}/lib/*.a
  '';

  # C.f. remove-references-to above. Ensure *all* references to cuda_nvcc are removed
  disallowedRequisites = [ (lib.getBin cuda_nvcc) ];

  passthru = {
    platformAssertions = [
      {
        message = "Pre-Thor Jetson devices (CUDA capabilities < 10.1) are not supported by NCCL";
        assertion =
          !hasJetsonCudaCapability || all (flip versionAtLeast "10.1") requestedJetsonCudaCapabilities;
      }
    ];

    updateScript = gitUpdater {
      inherit (finalAttrs) pname version;
      rev-prefix = "v";
    };
  };

  meta = {
    description = "Multi-GPU and multi-node collective communication primitives for NVIDIA GPUs";
    homepage = "https://developer.nvidia.com/nccl";
    license = licenses.bsd3;
    platforms = [
      "aarch64-linux"
      "x86_64-linux"
    ];
    # NCCL is not supported on Pre-Thor Jetsons, because it does not use NVLink or PCI-e for inter-GPU communication.
    # https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9
    badPlatforms = _mkMetaBadPlatforms finalAttrs;
    maintainers = with maintainers; [
      mdaiter
    ];
    teams = [ teams.cuda ];
  };
})