nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 stdenv,
4 fetchFromGitHub,
5 cmake,
6 rocm-cmake,
7 rocm-smi,
8 pkg-config,
9 clr,
10 gfortran,
11 gtest,
12 boost,
13 llvm,
14 msgpack-cxx,
15 amd-blis,
16 libxml2,
17 python3,
18 python3Packages,
19 openmp,
20 hipblas-common,
21 lapack-reference,
22 ncurses,
23 ninja,
24 libffi,
25 jemalloc,
26 zlib,
27 zstd,
28 rocmUpdateScript,
29 buildTests ? false,
30 buildSamples ? false,
31 # hipblaslt supports only devices with MFMA or WMMA
32 gpuTargets ? (clr.localGpuTargets or clr.gpuTargets),
33}:
34
35let
36 # hipblaslt is extremely particular about what it will build with
37 # so intersect with a known supported list and use only those
38 supportedTargets = (
39 lib.lists.intersectLists gpuTargets [
40 "gfx908"
41 "gfx90a"
42 "gfx942"
43 "gfx950"
44 "gfx1100"
45 "gfx1101"
46 "gfx1150"
47 "gfx1151"
48 "gfx1200"
49 "gfx1201"
50 ]
51 );
52 supportsTargetArches = supportedTargets != [ ];
53 py = python3.withPackages (ps: [
54 ps.pyyaml
55 ps.setuptools
56 ps.packaging
57 ps.nanobind
58 ps.msgpack
59 ]);
60 # workaround: build for one working target if no targets are supported
61 # a few CXX files are still build for the device
62 gpuTargets' =
63 if supportsTargetArches then (lib.concatStringsSep ";" supportedTargets) else "gfx1200";
64 compiler = "amdclang++";
65 # no-switch due to spammy warnings on some cases with fixme messages
66 # FIXME(LunNova@): cmake files need patched to include this properly or
67 # maybe we improve the toolchain to use config files + assemble a sysroot
68 # so system wide include assumptions work
69 cFlags = "-Wno-switch -fopenmp -I${lib.getDev zstd}/include -I${amd-blis}/include/blis/ -I${lib.getDev msgpack-cxx}/include";
70in
71stdenv.mkDerivation (finalAttrs: {
72 pname = "hipblaslt${clr.gpuArchSuffix}";
73 version = "7.1.1";
74
75 src = fetchFromGitHub {
76 owner = "ROCm";
77 repo = "rocm-libraries";
78 rev = "a676499add42941ff6af1e8d3f0504416dac7429";
79 hash = "sha256-zIYdHFbHyP2V6dkx6Ueb6NBqWu8tJji2hSWF9zWEJa4=";
80 sparseCheckout = [ "projects/hipblaslt" ];
81 };
82 sourceRoot = "${finalAttrs.src.name}/projects/hipblaslt";
83 env.CXX = compiler;
84 env.CFLAGS = cFlags;
85 env.CXXFLAGS = cFlags;
86 env.ROCM_PATH = "${clr}";
87 env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++";
88 env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++";
89 env.LD_PRELOAD = "${jemalloc}/lib/libjemalloc.so";
90 env.MALLOC_CONF = "background_thread:true,metadata_thp:auto,dirty_decay_ms:10000,muzzy_decay_ms:10000";
91 requiredSystemFeatures = [ "big-parallel" ];
92
93 __structuredAttrs = true;
94 strictDeps = true;
95
96 outputs = [
97 "out"
98 # benchmarks are non-optional
99 "benchmark"
100 ]
101 ++ lib.optionals buildTests [
102 "test"
103 ]
104 ++ lib.optionals buildSamples [
105 "sample"
106 ];
107
108 patches = [
109 # Upstream issue requesting properly specifying
110 # parallel-jobs for these invocations
111 # https://github.com/ROCm/rocm-libraries/issues/1242
112 ./parallel-buildSourceCodeObjectFile.diff
113 # Support loading zstd compressed .dat files, required to keep output under
114 # hydra size limit
115 ./messagepack-compression-support.patch
116 # [hipblaslt] Refactor Parallel.py to drop joblib, massively reduce peak disk space usage
117 # https://github.com/ROCm/rocm-libraries/pull/2073
118 ./TensileCreateLibrary-refactor.patch
119 ./Tensile-interning.patch
120 ];
121
122 postPatch = ''
123 # git isn't needed and we have no .git
124 substituteInPlace cmake/dependencies.cmake \
125 --replace-fail "find_package(Git REQUIRED)" ""
126 substituteInPlace CMakeLists.txt \
127 --replace-fail " LANGUAGES CXX" " LANGUAGES CXX C ASM"
128 '';
129
130 doCheck = false;
131 doInstallCheck = true;
132
133 nativeBuildInputs = [
134 cmake
135 rocm-cmake
136 py
137 clr
138 gfortran
139 pkg-config
140 ninja
141 rocm-smi
142 zstd
143 ];
144
145 buildInputs = [
146 llvm.llvm
147 clr
148 rocm-cmake
149 hipblas-common
150 amd-blis
151 rocm-smi
152 openmp
153 libffi
154 ncurses
155 lapack-reference
156
157 # Tensile deps - not optional, building without tensile isn't actually supported
158 msgpack-cxx
159 libxml2
160 python3Packages.msgpack
161 zlib
162 zstd
163 ]
164 ++ lib.optionals buildTests [
165 gtest
166 ];
167
168 cmakeFlags = [
169 (lib.cmakeFeature "Boost_INCLUDE_DIR" "${lib.getDev boost}/include") # msgpack FindBoost fails to find boost
170 (lib.cmakeFeature "GPU_TARGETS" gpuTargets')
171 (lib.cmakeBool "BUILD_TESTING" buildTests)
172 (lib.cmakeBool "HIPBLASLT_ENABLE_BLIS" true)
173 (lib.cmakeBool "HIPBLASLT_BUILD_TESTING" buildTests)
174 (lib.cmakeBool "HIPBLASLT_ENABLE_SAMPLES" buildSamples)
175 (lib.cmakeBool "HIPBLASLT_ENABLE_DEVICE" supportsTargetArches)
176 # FIXME: Enable for ROCm 7.x
177 (lib.cmakeBool "HIPBLASLT_ENABLE_ROCROLLER" false)
178 "-DCMAKE_C_COMPILER=amdclang"
179 "-DCMAKE_HIP_COMPILER=${compiler}"
180 "-DCMAKE_CXX_COMPILER=${compiler}"
181 "-DROCM_FOUND=ON" # hipblaslt tries to download rocm-cmake if this isn't set
182 "-DBLIS_ROOT=${amd-blis}"
183 "-DBLIS_LIB=${amd-blis}/lib/libblis-mt.so"
184 "-DBLIS_INCLUDE_DIR=${amd-blis}/include/blis/"
185 "-DBLA_PREFER_PKGCONFIG=ON"
186 "-DFETCHCONTENT_SOURCE_DIR_NANOBIND=${python3Packages.nanobind.src}"
187 # Manually define CMAKE_INSTALL_<DIR>
188 # See: https://github.com/NixOS/nixpkgs/pull/197838
189 "-DCMAKE_INSTALL_BINDIR=bin"
190 "-DCMAKE_INSTALL_LIBDIR=lib"
191 "-DCMAKE_INSTALL_INCLUDEDIR=include"
192 "-DHIPBLASLT_ENABLE_MARKER=Off"
193 ];
194
195 postInstall =
196 # Compress msgpack .dat files to stay under hydra output size limit
197 # Relies on messagepack-compression-support.patch
198 ''
199 for file in $out/lib/hipblaslt/library/*.dat; do
200 zstd -19 --long -f "$file" -o "$file.tmp" && mv "$file.tmp" "$file"
201 done
202 ''
203 # Move binaries to appropriate outputs and delete leftover /bin
204 + ''
205 mkdir -p $benchmark/bin
206 mv $out/bin/hipblaslt-{api-overhead,sequence,bench*} $out/bin/*.yaml $out/bin/*.py $benchmark/bin
207 ${lib.optionalString buildTests ''
208 mkdir -p $test/bin
209 mv $out/bin/hipblas-test $test/bin
210 ''}
211 ${lib.optionalString buildSamples ''
212 mkdir -p $sample/bin
213 mv $out/bin/example-* $sample/bin
214 ''}
215 rmdir $out/bin
216 '';
217
218 installCheckPhase =
219 # Verify compression worked and .dat files aren't huge
220 ''
221 runHook preInstallCheck
222 find "$out" -type f -name "*.dat" -size "+2M" -exec sh -c '
223 echo "ERROR: oversized .dat file, check for issues with install compression: {}" >&2
224 exit 1
225 ' {} \;
226 echo "Verified .dat files in $out are not huge"
227 runHook postInstallCheck
228 '';
229
230 # If this is false there are no kernels in the output lib
231 # supporting the target device
232 # so if it's an optional dep it's best to not depend on it
233 # Some packages like torch need hipblaslt to compile
234 # and are fine ignoring it at runtime if it's not supported
235 # so we have to support building an empty hipblaslt
236 passthru.supportsTargetArches = supportsTargetArches;
237 passthru.updateScript = rocmUpdateScript {
238 name = finalAttrs.pname;
239 inherit (finalAttrs.src) owner repo;
240 };
241 meta = {
242 description = "Library that provides general matrix-matrix operations with a flexible API";
243 homepage = "https://github.com/ROCm/hipBLASlt";
244 license = with lib.licenses; [ mit ];
245 teams = [ lib.teams.rocm ];
246 platforms = lib.platforms.linux;
247 };
248})