nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 stdenv,
4 fetchpatch,
5 fetchFromGitHub,
6 cmake,
7 rocm-cmake,
8 clr,
9 gfortran,
10 gtest,
11 msgpack,
12 libxml2,
13 python3,
14 python3Packages,
15 openmp,
16 hipblas-common,
17 tensile,
18 lapack-reference,
19 ncurses,
20 libffi,
21 zlib,
22 zstd,
23 rocmUpdateScript,
24 buildTests ? false,
25 buildBenchmarks ? false,
26 buildSamples ? false,
27 # hipblaslt supports only devices with MFMA or WMMA
28 # WMMA on gfx1100 may be broken
29 # MFMA on MI100 may be broken
30 # MI200/MI300 known to work
31 gpuTargets ? (
32 clr.localGpuTargets or [
33 # "gfx908" FIXME: confirm MFMA on MI100 works
34 "gfx90a"
35 "gfx942"
36 # "gfx1100" FIXME: confirm WMMA targets work
37 ]
38 ),
39}:
40
41stdenv.mkDerivation (
42 finalAttrs:
43 let
44 supportsTargetArches =
45 (builtins.any (lib.strings.hasPrefix "gfx9") gpuTargets)
46 || (builtins.any (lib.strings.hasPrefix "gfx11") gpuTargets);
47 tensile' = (tensile.override { isTensileLite = true; }).overrideAttrs {
48 inherit (finalAttrs) src;
49 sourceRoot = "${finalAttrs.src.name}/tensilelite";
50 };
51 py = python3.withPackages (ps: [
52 ps.pyyaml
53 ps.setuptools
54 ps.packaging
55 ]);
56 gpuTargets' = lib.optionalString supportsTargetArches (lib.concatStringsSep ";" gpuTargets);
57 compiler = "amdclang++";
58 cFlags = "-O3 -I${msgpack}/include"; # FIXME: cmake files need patched to include this properly
59 in
60 {
61 pname = "hipblaslt${clr.gpuArchSuffix}";
62 version = "6.3.3";
63
64 src = fetchFromGitHub {
65 owner = "ROCm";
66 repo = "hipBLASLt";
67 rev = "rocm-${finalAttrs.version}";
68 hash = "sha256-ozfHwsxcczzYXN9SIkyfRvdtaCqlDN4bh3UHZNS2oVQ=";
69 };
70 env.CXX = compiler;
71 env.CFLAGS = cFlags;
72 env.CXXFLAGS = cFlags;
73 env.ROCM_PATH = "${clr}";
74 env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++";
75 env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++";
76 # Some tensile scripts look for this as an env var rather than a cmake flag
77 env.CMAKE_CXX_COMPILER = lib.getExe' clr "amdclang++";
78 requiredSystemFeatures = [ "big-parallel" ];
79
80 outputs = [
81 "out"
82 ]
83 ++ lib.optionals buildTests [
84 "test"
85 ]
86 ++ lib.optionals buildBenchmarks [
87 "benchmark"
88 ]
89 ++ lib.optionals buildSamples [
90 "sample"
91 ];
92
93 postPatch = ''
94 mkdir -p build/Tensile/library
95 # git isn't needed and we have no .git
96 substituteInPlace cmake/Dependencies.cmake \
97 --replace-fail "find_package(Git REQUIRED)" ""
98 substituteInPlace CMakeLists.txt \
99 --replace-fail "include(virtualenv)" "" \
100 --replace-fail "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" \
101 --replace-fail "virtualenv_install(\''${CMAKE_SOURCE_DIR}/tensilelite)" "" \
102 --replace-fail 'find_package(Tensile 4.33.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "''${INSTALLED_TENSILE_PATH}")' "find_package(Tensile)" \
103 --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
104 # FIXME: TensileCreateExtOpLibraries build failure due to unsupported null operand
105 # Working around for now by disabling the ExtOp libs
106 substituteInPlace library/src/amd_detail/rocblaslt/src/CMakeLists.txt \
107 --replace-fail 'TensileCreateExtOpLibraries("' '# skipping TensileCreateExtOpLibraries'
108 substituteInPlace library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh \
109 --replace-fail '${"\${rocm_path}"}/bin/' ""
110 '';
111
112 # Apply patches to allow building without a target arch if we need to do that
113 patches = lib.optionals (!supportsTargetArches) [
114 # Add ability to build without specitying any arch.
115 (fetchpatch {
116 sha256 = "sha256-VW3bPzmQvfo8+iKsVfpn4sbqAe41fLzCEUfBh9JxVyk=";
117 url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.1.1-no-arch.patch";
118 })
119 # Followup to above patch for 6.3.x
120 (fetchpatch {
121 sha256 = "sha256-GCsrne6BiWzwj8TMAfFuaYz1Pij97hoCc6E3qJhWb10=";
122 url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch";
123 })
124 ];
125
126 doCheck = false;
127 doInstallCheck = false;
128
129 nativeBuildInputs = [
130 cmake
131 rocm-cmake
132 py
133 clr
134 gfortran
135 # need make to get streaming console output so nix knows build is still running
136 # so deliberately not using ninja
137 ];
138
139 buildInputs = [
140 hipblas-common
141 tensile'
142 openmp
143 libffi
144 ncurses
145
146 # Tensile deps - not optional, building without tensile isn't actually supported
147 msgpack # FIXME: not included in cmake!
148 libxml2
149 python3Packages.msgpack
150 python3Packages.joblib
151 zlib
152 zstd
153 ]
154 ++ lib.optionals buildTests [
155 gtest
156 ]
157 ++ lib.optionals (buildTests || buildBenchmarks) [
158 lapack-reference
159 ];
160
161 cmakeFlags = [
162 "-Wno-dev"
163 "-DCMAKE_BUILD_TYPE=Release"
164 "-DCMAKE_VERBOSE_MAKEFILE=ON"
165 "-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
166 "-DTENSILE_USE_HIP=ON"
167 "-DTENSILE_BUILD_CLIENT=OFF"
168 "-DTENSILE_USE_FLOAT16_BUILTIN=ON"
169 "-DCMAKE_CXX_COMPILER=${compiler}"
170 # Manually define CMAKE_INSTALL_<DIR>
171 # See: https://github.com/NixOS/nixpkgs/pull/197838
172 "-DCMAKE_INSTALL_BINDIR=bin"
173 "-DCMAKE_INSTALL_LIBDIR=lib"
174 "-DCMAKE_INSTALL_INCLUDEDIR=include"
175 "-DHIPBLASLT_ENABLE_MARKER=Off"
176 # FIXME what are the implications of hardcoding this?
177 "-DTensile_CODE_OBJECT_VERSION=V5"
178 "-DTensile_COMPILER=${compiler}"
179 "-DAMDGPU_TARGETS=${gpuTargets'}"
180 "-DGPU_TARGETS=${gpuTargets'}"
181 "-DTensile_LIBRARY_FORMAT=msgpack"
182 ]
183 ++ lib.optionals (!supportsTargetArches) [
184 "-DBUILD_WITH_TENSILE=OFF"
185 ]
186 ++ lib.optionals buildTests [
187 "-DBUILD_CLIENTS_TESTS=ON"
188 ]
189 ++ lib.optionals buildBenchmarks [
190 "-DBUILD_CLIENTS_BENCHMARKS=ON"
191 ]
192 ++ lib.optionals buildSamples [
193 "-DBUILD_CLIENTS_SAMPLES=ON"
194 ];
195
196 postInstall =
197 lib.optionalString buildTests ''
198 mkdir -p $test/bin
199 mv $out/bin/hipblas-test $test/bin
200 ''
201 + lib.optionalString buildBenchmarks ''
202 mkdir -p $benchmark/bin
203 mv $out/bin/hipblas-bench $benchmark/bin
204 ''
205 + lib.optionalString buildSamples ''
206 mkdir -p $sample/bin
207 mv $out/bin/example-* $sample/bin
208 ''
209 + lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
210 rmdir $out/bin
211 '';
212 # If this is false there are no kernels in the output lib
213 # and it's useless at runtime
214 # so if it's an optional dep it's best to not depend on it
215 # Some packages like torch need hipblaslt to compile
216 # and are fine ignoring it at runtime if it's not supported
217 # so we have to support building an empty hipblaslt
218 passthru.supportsTargetArches = supportsTargetArches;
219 passthru.updateScript = rocmUpdateScript {
220 name = finalAttrs.pname;
221 inherit (finalAttrs.src) owner repo;
222 };
223 passthru.tensilelite = tensile';
224 meta = with lib; {
225 description = "Library that provides general matrix-matrix operations with a flexible API";
226 homepage = "https://github.com/ROCm/hipBLASlt";
227 license = with licenses; [ mit ];
228 teams = [ teams.rocm ];
229 platforms = platforms.linux;
230 };
231 }
232)