nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at devShellTools-shell 141 lines 3.7 kB view raw
1{ 2 lib, 3 stdenv, 4 fetchFromGitHub, 5 rocmUpdateScript, 6 cmake, 7 rocm-cmake, 8 rocm-smi, 9 rocm-core, 10 clr, 11 mscclpp, 12 perl, 13 hipify, 14 gtest, 15 chrpath, 16 rocprofiler, 17 rocprofiler-register, 18 autoPatchelfHook, 19 buildTests ? false, 20 gpuTargets ? (clr.localGpuTargets or [ ]), 21}: 22 23let 24 useAsan = buildTests; 25 useUbsan = buildTests; 26 san = lib.optionalString (useAsan || useUbsan) ( 27 "-fno-gpu-sanitize -fsanitize=undefined " 28 + (lib.optionalString useAsan "-fsanitize=address -shared-libsan ") 29 ); 30in 31# Note: we can't properly test or make use of multi-node collective ops 32# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support 33# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver 34# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros 35stdenv.mkDerivation (finalAttrs: { 36 pname = "rccl${clr.gpuArchSuffix}"; 37 version = "6.3.3"; 38 39 outputs = [ 40 "out" 41 ] 42 ++ lib.optionals buildTests [ 43 "test" 44 ]; 45 46 patches = [ 47 ./fix-mainline-support-and-ub.diff 48 ./enable-mscclpp-on-all-gfx9.diff 49 ./rccl-test-missing-iomanip.diff 50 ]; 51 52 src = fetchFromGitHub { 53 owner = "ROCm"; 54 repo = "rccl"; 55 rev = "rocm-${finalAttrs.version}"; 56 hash = "sha256-998tDiC0Qp9hhcXtFpiCWqwdKPVT2vNp0GU/rng03Bw="; 57 }; 58 59 nativeBuildInputs = [ 60 cmake 61 rocm-cmake 62 clr 63 perl 64 hipify 65 autoPatchelfHook # ASAN doesn't add rpath without this 66 ]; 67 68 buildInputs = [ 69 rocm-smi 70 gtest 71 rocprofiler 72 rocprofiler-register 73 mscclpp 74 ] 75 ++ lib.optionals buildTests [ 76 chrpath 77 ]; 78 79 cmakeFlags = [ 80 "-DHIP_CLANG_NUM_PARALLEL_JOBS=4" 81 "-DCMAKE_BUILD_TYPE=Release" 82 "-DROCM_PATH=${clr}" 83 "-DHIP_COMPILER=${clr}/bin/amdclang++" 84 "-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++" 85 "-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" 86 "-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" 87 "-DBUILD_BFD=OFF" # Can't get it to detect bfd.h 88 "-DENABLE_MSCCL_KERNEL=ON" 89 "-DENABLE_MSCCLPP=ON" 90 "-DMSCCLPP_ROOT=${mscclpp}" 91 # Manually define CMAKE_INSTALL_<DIR> 92 # See: https://github.com/NixOS/nixpkgs/pull/197838 93 "-DCMAKE_INSTALL_BINDIR=bin" 94 "-DCMAKE_INSTALL_LIBDIR=lib" 95 "-DCMAKE_INSTALL_INCLUDEDIR=include" 96 ] 97 ++ lib.optionals (gpuTargets != [ ]) [ 98 # AMD can't make up their minds and keep changing which one is used in different projects. 99 "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" 100 "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" 101 ] 102 ++ lib.optionals buildTests [ 103 "-DBUILD_TESTS=ON" 104 ]; 105 106 # -O2 and -fno-strict-aliasing due to UB issues in RCCL :c 107 # Reported upstream 108 env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; 109 env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; 110 env.LDFLAGS = "${san}"; 111 postPatch = '' 112 patchShebangs src tools 113 ''; 114 115 postInstall = 116 lib.optionalString useAsan '' 117 patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so 118 '' 119 + lib.optionalString buildTests '' 120 mkdir -p $test/bin 121 mv $out/bin/* $test/bin 122 rmdir $out/bin 123 ''; 124 125 passthru.updateScript = rocmUpdateScript { 126 name = finalAttrs.pname; 127 inherit (finalAttrs.src) owner; 128 inherit (finalAttrs.src) repo; 129 }; 130 131 meta = with lib; { 132 description = "ROCm communication collectives library"; 133 homepage = "https://github.com/ROCm/rccl"; 134 license = with licenses; [ 135 bsd2 136 bsd3 137 ]; 138 teams = [ teams.rocm ]; 139 platforms = platforms.linux; 140 }; 141})