nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at python-updates 163 lines 4.5 kB view raw
1{ 2 lib, 3 stdenv, 4 fetchFromGitHub, 5 rocmUpdateScript, 6 cmake, 7 rocm-cmake, 8 rocm-smi, 9 rocm-core, 10 pkg-config, 11 clr, 12 mscclpp, 13 perl, 14 hipify, 15 python3, 16 fmt, 17 gtest, 18 chrpath, 19 roctracer, 20 rocprofiler, 21 rocprofiler-register, 22 autoPatchelfHook, 23 buildTests ? false, 24 gpuTargets ? (clr.localGpuTargets or [ ]), 25 # for passthru.tests 26 rccl, 27}: 28 29let 30 useAsan = buildTests; 31 useUbsan = buildTests; 32 san = lib.optionalString (useAsan || useUbsan) ( 33 "-fno-gpu-sanitize -fsanitize=undefined " 34 + (lib.optionalString useAsan "-fsanitize=address -shared-libsan ") 35 ); 36in 37# Note: we can't properly test or make use of multi-node collective ops 38# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support 39# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver 40# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros 41stdenv.mkDerivation (finalAttrs: { 42 pname = "rccl${clr.gpuArchSuffix}"; 43 version = "7.1.1"; 44 45 outputs = [ 46 "out" 47 ] 48 ++ lib.optionals buildTests [ 49 "test" 50 ]; 51 52 patches = [ 53 ./rccl-test-missing-iomanip.diff 54 ./fix_hw_reg_hw_id_gt_gfx10.patch 55 ]; 56 57 src = fetchFromGitHub { 58 owner = "ROCm"; 59 repo = "rccl"; 60 rev = "rocm-${finalAttrs.version}"; 61 hash = "sha256-3u7D3Gre1n+4Lf+cK+RMfCUM9c46pXZjdhGOrwIKM0w="; 62 }; 63 64 requiredSystemFeatures = [ "big-parallel" ]; # Very resource intensive LTO 65 66 nativeBuildInputs = [ 67 cmake 68 rocm-cmake 69 clr 70 perl 71 hipify 72 python3 73 pkg-config 74 autoPatchelfHook # ASAN doesn't add rpath without this 75 ]; 76 77 buildInputs = [ 78 rocm-smi 79 fmt 80 gtest 81 roctracer 82 rocprofiler 83 rocprofiler-register 84 mscclpp 85 ] 86 ++ lib.optionals buildTests [ 87 chrpath 88 ]; 89 90 cmakeFlags = [ 91 "-DHIP_CLANG_NUM_PARALLEL_JOBS=4" 92 "-DCMAKE_BUILD_TYPE=Release" 93 "-DROCM_PATH=${clr}" 94 "-DHIP_COMPILER=${clr}/bin/amdclang++" 95 "-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++" 96 "-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" 97 "-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}" 98 "-DBUILD_BFD=OFF" # Can't get it to detect bfd.h 99 "-DENABLE_MSCCL_KERNEL=ON" 100 # FIXME: this is still running a download because if(NOT mscclpp_nccl_FOUND) is commented out T_T 101 "-DENABLE_MSCCLPP=OFF" 102 #"-DMSCCLPP_ROOT=${mscclpp}" 103 # Manually define CMAKE_INSTALL_<DIR> 104 # See: https://github.com/NixOS/nixpkgs/pull/197838 105 "-DCMAKE_INSTALL_BINDIR=bin" 106 "-DCMAKE_INSTALL_LIBDIR=lib" 107 "-DCMAKE_INSTALL_INCLUDEDIR=include" 108 ] 109 ++ lib.optionals (gpuTargets != [ ]) [ 110 # AMD can't make up their minds and keep changing which one is used in different projects. 111 "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" 112 "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" 113 ] 114 ++ lib.optionals buildTests [ 115 "-DBUILD_TESTS=ON" 116 ]; 117 118 # -O2 and -fno-strict-aliasing due to UB issues in RCCL :c 119 # Reported upstream 120 env.CFLAGS = "-I${clr}/include -I${roctracer}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; 121 env.CXXFLAGS = "-I${clr}/include -I${roctracer}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer"; 122 env.LDFLAGS = "${san}"; 123 postPatch = '' 124 patchShebangs src tools 125 substituteInPlace CMakeLists.txt \ 126 --replace-fail '${"\${HOST_OS_ID}"}' '"ubuntu"' \ 127 --replace-fail 'target_include_directories(rccl PRIVATE ''${ROCM_SMI_INCLUDE_DIR})' \ 128 'target_include_directories(rccl PRIVATE ''${ROCM_SMI_INCLUDE_DIRS})' 129 ''; 130 131 postInstall = 132 lib.optionalString useAsan '' 133 patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so 134 '' 135 + lib.optionalString buildTests '' 136 mkdir -p $test/bin 137 mv $out/bin/* $test/bin 138 rmdir $out/bin 139 ''; 140 141 passthru.updateScript = rocmUpdateScript { 142 name = finalAttrs.pname; 143 inherit (finalAttrs.src) owner; 144 inherit (finalAttrs.src) repo; 145 }; 146 147 # This package with sanitizers + manual integration test binaries built 148 # must be ran manually 149 passthru.tests.rccl = rccl.override { 150 buildTests = true; 151 }; 152 153 meta = { 154 description = "ROCm communication collectives library"; 155 homepage = "https://github.com/ROCm/rccl"; 156 license = with lib.licenses; [ 157 bsd2 158 bsd3 159 ]; 160 teams = [ lib.teams.rocm ]; 161 platforms = lib.platforms.linux; 162 }; 163})