nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 stdenv,
4 fetchFromGitHub,
5 rocmUpdateScript,
6 cmake,
7 rocm-cmake,
8 rocm-smi,
9 rocm-core,
10 pkg-config,
11 clr,
12 mscclpp,
13 perl,
14 hipify,
15 python3,
16 fmt,
17 gtest,
18 chrpath,
19 roctracer,
20 rocprofiler,
21 rocprofiler-register,
22 autoPatchelfHook,
23 buildTests ? false,
24 gpuTargets ? (clr.localGpuTargets or [ ]),
25 # for passthru.tests
26 rccl,
27}:
28
29let
30 useAsan = buildTests;
31 useUbsan = buildTests;
32 san = lib.optionalString (useAsan || useUbsan) (
33 "-fno-gpu-sanitize -fsanitize=undefined "
34 + (lib.optionalString useAsan "-fsanitize=address -shared-libsan ")
35 );
36in
37# Note: we can't properly test or make use of multi-node collective ops
38# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support
39# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver
40# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros
41stdenv.mkDerivation (finalAttrs: {
42 pname = "rccl${clr.gpuArchSuffix}";
43 version = "7.1.1";
44
45 outputs = [
46 "out"
47 ]
48 ++ lib.optionals buildTests [
49 "test"
50 ];
51
52 patches = [
53 ./rccl-test-missing-iomanip.diff
54 ./fix_hw_reg_hw_id_gt_gfx10.patch
55 ];
56
57 src = fetchFromGitHub {
58 owner = "ROCm";
59 repo = "rccl";
60 rev = "rocm-${finalAttrs.version}";
61 hash = "sha256-3u7D3Gre1n+4Lf+cK+RMfCUM9c46pXZjdhGOrwIKM0w=";
62 };
63
64 requiredSystemFeatures = [ "big-parallel" ]; # Very resource intensive LTO
65
66 nativeBuildInputs = [
67 cmake
68 rocm-cmake
69 clr
70 perl
71 hipify
72 python3
73 pkg-config
74 autoPatchelfHook # ASAN doesn't add rpath without this
75 ];
76
77 buildInputs = [
78 rocm-smi
79 fmt
80 gtest
81 roctracer
82 rocprofiler
83 rocprofiler-register
84 mscclpp
85 ]
86 ++ lib.optionals buildTests [
87 chrpath
88 ];
89
90 cmakeFlags = [
91 "-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
92 "-DCMAKE_BUILD_TYPE=Release"
93 "-DROCM_PATH=${clr}"
94 "-DHIP_COMPILER=${clr}/bin/amdclang++"
95 "-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++"
96 "-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
97 "-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
98 "-DBUILD_BFD=OFF" # Can't get it to detect bfd.h
99 "-DENABLE_MSCCL_KERNEL=ON"
100 # FIXME: this is still running a download because if(NOT mscclpp_nccl_FOUND) is commented out T_T
101 "-DENABLE_MSCCLPP=OFF"
102 #"-DMSCCLPP_ROOT=${mscclpp}"
103 # Manually define CMAKE_INSTALL_<DIR>
104 # See: https://github.com/NixOS/nixpkgs/pull/197838
105 "-DCMAKE_INSTALL_BINDIR=bin"
106 "-DCMAKE_INSTALL_LIBDIR=lib"
107 "-DCMAKE_INSTALL_INCLUDEDIR=include"
108 ]
109 ++ lib.optionals (gpuTargets != [ ]) [
110 # AMD can't make up their minds and keep changing which one is used in different projects.
111 "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
112 "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
113 ]
114 ++ lib.optionals buildTests [
115 "-DBUILD_TESTS=ON"
116 ];
117
118 # -O2 and -fno-strict-aliasing due to UB issues in RCCL :c
119 # Reported upstream
120 env.CFLAGS = "-I${clr}/include -I${roctracer}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
121 env.CXXFLAGS = "-I${clr}/include -I${roctracer}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
122 env.LDFLAGS = "${san}";
123 postPatch = ''
124 patchShebangs src tools
125 substituteInPlace CMakeLists.txt \
126 --replace-fail '${"\${HOST_OS_ID}"}' '"ubuntu"' \
127 --replace-fail 'target_include_directories(rccl PRIVATE ''${ROCM_SMI_INCLUDE_DIR})' \
128 'target_include_directories(rccl PRIVATE ''${ROCM_SMI_INCLUDE_DIRS})'
129 '';
130
131 postInstall =
132 lib.optionalString useAsan ''
133 patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so
134 ''
135 + lib.optionalString buildTests ''
136 mkdir -p $test/bin
137 mv $out/bin/* $test/bin
138 rmdir $out/bin
139 '';
140
141 passthru.updateScript = rocmUpdateScript {
142 name = finalAttrs.pname;
143 inherit (finalAttrs.src) owner;
144 inherit (finalAttrs.src) repo;
145 };
146
147 # This package with sanitizers + manual integration test binaries built
148 # must be ran manually
149 passthru.tests.rccl = rccl.override {
150 buildTests = true;
151 };
152
153 meta = {
154 description = "ROCm communication collectives library";
155 homepage = "https://github.com/ROCm/rccl";
156 license = with lib.licenses; [
157 bsd2
158 bsd3
159 ];
160 teams = [ lib.teams.rocm ];
161 platforms = lib.platforms.linux;
162 };
163})