pkgs/development/cuda-modules/packages/libnvshmem.nix at python-updates

tjh.dev / nixpkgs
fork atom
nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
fork atom
nixpkgs / pkgs / development / cuda-modules / packages / libnvshmem.nix
at python-updates 193 lines 5.5 kB view raw
wrap content
  1{
  2  _cuda,
  3  backendStdenv,
  4  buildPackages,
  5  cmake,
  6  cuda_cccl,
  7  cuda_cudart,
  8  cuda_nvcc,
  9  cuda_nvml_dev,
 10  cuda_nvtx,
 11  cudaAtLeast,
 12  cudaMajorMinorVersion,
 13  cudaNamePrefix,
 14  fetchFromGitHub,
 15  flags,
 16  gdrcopy,
 17  lib,
 18  libfabric,
 19  mpi,
 20  nccl,
 21  ninja,
 22  pmix,
 23  python3Packages,
 24  rdma-core,
 25  ucx,
 26  # passthru.updateScript
 27  gitUpdater,
 28}:
 29let
 30  inherit (lib)
 31    cmakeBool
 32    cmakeFeature
 33    getBin
 34    getDev
 35    getExe
 36    getLib
 37    licenses
 38    maintainers
 39    teams
 40    ;
 41in
 42backendStdenv.mkDerivation (finalAttrs: {
 43  __structuredAttrs = true;
 44  strictDeps = true;
 45
 46  # NOTE: Depends on the CUDA package set, so use cudaNamePrefix.
 47  name = "${cudaNamePrefix}-${finalAttrs.pname}-${finalAttrs.version}";
 48  pname = "libnvshmem";
 49  version = "3.4.5-0";
 50
 51  src = fetchFromGitHub {
 52    owner = "NVIDIA";
 53    repo = "nvshmem";
 54    tag = "v${finalAttrs.version}";
 55    hash = "sha256-RHZzjDMYlL7vAVP1/UXM/Pt4bhajeWdCi3ihICeD2mc=";
 56  };
 57
 58  outputs = [ "out" ];
 59
 60  nativeBuildInputs = [
 61    cuda_nvcc
 62    cmake
 63    ninja
 64
 65    # NOTE: mpi is in nativeBuildInputs because it contains compilers and is only discoverable by CMake
 66    # when a nativeBuildInput.
 67    mpi
 68
 69    # NOTE: Python is required even if not building nvshmem4py:
 70    # https://github.com/NVIDIA/nvshmem/blob/131da55f643ac87c810ba0bc51d359258bf433a1/CMakeLists.txt#L173
 71    python3Packages.python
 72  ];
 73
 74  # NOTE: Hardcoded standard versions mean CMake doesn't respect values we provide, so we need to patch the files.
 75  postPatch = ''
 76    for standardName in {CXX,CUDA}_STANDARD
 77    do
 78      while IFS= read -r cmakeFileToPatch
 79      do
 80        nixLog "patching $PWD/$cmakeFileToPatch to fix $standardName"
 81        substituteInPlace "$PWD/$cmakeFileToPatch" \
 82          --replace-fail \
 83            "$standardName 11" \
 84            "$standardName 17"
 85      done < <(grep --recursive --files-with-matches "$standardName 11")
 86    done
 87    unset -v cmakeFileToPatch
 88    unset -v standardName
 89  '';
 90
 91  enableParallelBuilding = true;
 92
 93  buildInputs = [
 94    cuda_cccl
 95    cuda_cudart
 96    cuda_nvml_dev
 97    cuda_nvtx
 98    gdrcopy
 99    libfabric
100    nccl
101    pmix
102    rdma-core
103    ucx
104  ];
105
106  # NOTE: This *must* be an environment variable NVIDIA saw fit to *configure and build CMake projects* while *inside*
107  # a CMake build and didn't correctly thread arguments through, so the environment is the only way to get
108  # configurations to the nested build.
109  env.CUDA_HOME = (getBin cuda_nvcc).outPath;
110
111  # https://docs.nvidia.com/nvshmem/release-notes-install-guide/install-guide/nvshmem-install-proc.html#other-distributions
112  cmakeFlags = [
113    (cmakeFeature "NVSHMEM_PREFIX" (placeholder "out"))
114
115    (cmakeFeature "CUDA_HOME" (getBin cuda_nvcc).outPath)
116    (cmakeFeature "CMAKE_CUDA_COMPILER" (getExe cuda_nvcc))
117
118    (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" flags.cmakeCudaArchitecturesString)
119
120    (cmakeBool "NVSHMEM_USE_NCCL" true)
121    (cmakeFeature "NCCL_HOME" (getDev nccl).outPath)
122
123    (cmakeBool "NVSHMEM_USE_GDRCOPY" true)
124    (cmakeFeature "GDRCOPY_HOME" (getDev gdrcopy).outPath)
125
126    # NOTE: Make sure to use mpi from buildPackages to match the spliced version created through nativeBuildInputs.
127    (cmakeBool "NVSHMEM_MPI_SUPPORT" true)
128    (cmakeFeature "MPI_HOME" (getLib buildPackages.mpi).outPath)
129
130    # TODO: Doesn't UCX need to be built with some argument when we want to use it with libnvshmem?
131    (cmakeBool "NVSHMEM_UCX_SUPPORT" true)
132    (cmakeFeature "UCX_HOME" (getDev ucx).outPath)
133
134    (cmakeBool "NVSHMEM_LIBFABRIC_SUPPORT" true)
135    (cmakeFeature "LIBFABRIC_HOME" (getDev libfabric).outPath)
136
137    (cmakeBool "NVSHMEM_IBGDA_SUPPORT" true)
138    # NOTE: no corresponding _HOME variable for IBGDA.
139
140    (cmakeBool "NVSHMEM_PMIX_SUPPORT" true)
141    (cmakeFeature "PMIX_HOME" (getDev pmix).outPath)
142
143    (cmakeBool "NVSHMEM_BUILD_TESTS" true)
144    (cmakeBool "NVSHMEM_BUILD_EXAMPLES" true)
145
146    (cmakeBool "NVSHMEM_BUILD_DEB_PACKAGE" false)
147    (cmakeBool "NVSHMEM_BUILD_RPM_PACKAGE" false)
148
149    # TODO: Looks like a nightmare to package and depends on things we haven't packaged yet
150    # https://github.com/NVIDIA/nvshmem/tree/131da55f643ac87c810ba0bc51d359258bf433a1/nvshmem4py
151    (cmakeBool "NVSHMEM_BUILD_PYTHON_LIB" false)
152
153    # NOTE: unsupported because it requires Clang
154    (cmakeBool "NVSHMEM_BUILD_BITCODE_LIBRARY" false)
155  ];
156
157  postInstall = ''
158    nixLog "moving top-level files in $out to $out/share"
159    mv -v "$out"/{changelog,git_commit.txt,License.txt,version.txt} "$out/share/"
160  '';
161
162  doCheck = false;
163
164  passthru = {
165    updateScript = gitUpdater {
166      inherit (finalAttrs) pname version;
167      rev-prefix = "v";
168    };
169
170    brokenAssertions = [
171      # CUDA pre-11.7 yeilds macro/type errors in src/include/internal/host_transport/cudawrap.h.
172      {
173        message = "NVSHMEM does not support CUDA releases earlier than 11.7 (found ${cudaMajorMinorVersion})";
174        assertion = cudaAtLeast "11.7";
175      }
176    ];
177  };
178
179  meta = {
180    description = "Parallel programming interface for NVIDIA GPUs based on OpenSHMEM";
181    homepage = "https://github.com/NVIDIA/nvshmem";
182    broken = _cuda.lib._mkMetaBroken finalAttrs;
183    # NOTE: There are many licenses:
184    # https://github.com/NVIDIA/nvshmem/blob/7dd48c9fd7aa2134264400802881269b7822bd2f/License.txt
185    license = licenses.nvidiaCudaRedist;
186    platforms = [
187      "aarch64-linux"
188      "x86_64-linux"
189    ];
190    maintainers = [ maintainers.connorbaker ];
191    teams = [ teams.cuda ];
192  };
193})