nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 stdenv,
4 fetchFromGitHub,
5 fetchpatch,
6 rocmUpdateScript,
7 cmake,
8 rocm-cmake,
9 clr,
10 python3,
11 tensile,
12 msgpack,
13 libxml2,
14 gtest,
15 gfortran,
16 openmp,
17 git,
18 amd-blis,
19 zstd,
20 hipblas-common,
21 hipblaslt,
22 python3Packages,
23 rocm-smi,
24 buildTensile ? true,
25 buildTests ? true,
26 buildBenchmarks ? true,
27 # https://github.com/ROCm/Tensile/issues/1757
28 # Allows gfx101* users to use rocBLAS normally.
29 # Turn the below two values to `true` after the fix has been cherry-picked
30 # into a release. Just backporting that single fix is not enough because it
31 # depends on some previous commits.
32 tensileSepArch ? true,
33 tensileLazyLib ? true,
34 withHipBlasLt ? true,
35 # `gfx940`, `gfx941` are not present in this list because they are early
36 # engineering samples, and all final MI300 hardware are `gfx942`:
37 # https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130
38 #
39 # `gfx1012` is not present in this list because the ISA compatibility patches
40 # would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will
41 # always try to use `gfx1010` code objects, hence building for `gfx1012` is
42 # useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152
43 gpuTargets ? (
44 clr.localGpuTargets or [
45 "gfx900"
46 "gfx906"
47 "gfx908"
48 "gfx90a"
49 "gfx942"
50 "gfx1010"
51 "gfx1030"
52 "gfx1100"
53 "gfx1101"
54 "gfx1102"
55 "gfx1200"
56 "gfx1201"
57 ]
58 ),
59}:
60
61let
62 gpuTargets' = lib.concatStringsSep ";" gpuTargets;
63in
64stdenv.mkDerivation (finalAttrs: {
65 pname = "rocblas${clr.gpuArchSuffix}";
66 version = "6.3.3";
67
68 outputs = [
69 "out"
70 ];
71
72 src = fetchFromGitHub {
73 owner = "ROCm";
74 repo = "rocBLAS";
75 rev = "rocm-${finalAttrs.version}";
76 hash = "sha256-IYcrVcGH4yZDkFZeNOJPfG0qsPS/WiH0fTSUSdo1BH4=";
77 };
78
79 nativeBuildInputs = [
80 cmake
81 # no ninja, it buffers console output and nix times out long periods of no output
82 rocm-cmake
83 clr
84 git
85 ]
86 ++ lib.optionals buildTensile [
87 tensile
88 ];
89
90 buildInputs = [
91 python3
92 hipblas-common
93 ]
94 ++ lib.optionals withHipBlasLt [
95 hipblaslt
96 ]
97 ++ lib.optionals buildTensile [
98 zstd
99 msgpack
100 libxml2
101 python3Packages.msgpack
102 python3Packages.zstandard
103 ]
104 ++ lib.optionals buildTests [
105 gtest
106 ]
107 ++ lib.optionals (buildTests || buildBenchmarks) [
108 gfortran
109 openmp
110 amd-blis
111 rocm-smi
112 ]
113 ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [
114 python3Packages.pyyaml
115 ];
116
117 dontStrip = true;
118 env.CXXFLAGS =
119 "-O3 -DNDEBUG -I${hipblas-common}/include"
120 + lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis";
121 # Fails to link tests if we don't add amd-blis libs
122 env.LDFLAGS = lib.optionalString (
123 buildTests || buildBenchmarks
124 ) "-Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas";
125 env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++";
126
127 cmakeFlags = [
128 (lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release")
129 (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
130 (lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR")
131 (lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran")
132 (lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar")
133 (lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib")
134 (lib.cmakeFeature "python" "python3")
135 (lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets')
136 (lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets')
137 (lib.cmakeFeature "GPU_TARGETS" gpuTargets')
138 (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
139 (lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
140 (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
141 (lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt)
142 (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
143 (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
144 (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks)
145 (lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true)
146 # Temporarily set variables to work around upstream CMakeLists issue
147 # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
148 "-DCMAKE_INSTALL_BINDIR=bin"
149 "-DCMAKE_INSTALL_INCLUDEDIR=include"
150 "-DCMAKE_INSTALL_LIBDIR=lib"
151 ]
152 ++ lib.optionals buildTensile [
153 "-DCPACK_SET_DESTDIR=OFF"
154 "-DLINK_BLIS=ON"
155 "-DTensile_CODE_OBJECT_VERSION=default"
156 "-DTensile_LOGIC=asm_full"
157 "-DTensile_LIBRARY_FORMAT=msgpack"
158 (lib.cmakeBool "BUILD_WITH_PIP" false)
159 (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
160 (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
161 ];
162
163 passthru.amdgpu_targets = gpuTargets';
164
165 patches = [
166 (fetchpatch {
167 name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
168 url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch";
169 hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo=";
170 })
171 ];
172
173 # Pass $NIX_BUILD_CORES to Tensile
174 postPatch = ''
175 substituteInPlace cmake/build-options.cmake \
176 --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
177 substituteInPlace CMakeLists.txt \
178 --replace-fail "4.42.0" "4.43.0"
179 '';
180
181 passthru.updateScript = rocmUpdateScript {
182 name = finalAttrs.pname;
183 inherit (finalAttrs.src) owner;
184 inherit (finalAttrs.src) repo;
185 };
186
187 enableParallelBuilding = true;
188 requiredSystemFeatures = [ "big-parallel" ];
189
190 meta = with lib; {
191 description = "BLAS implementation for ROCm platform";
192 homepage = "https://github.com/ROCm/rocBLAS";
193 license = with licenses; [ mit ];
194 teams = [ teams.rocm ];
195 platforms = platforms.linux;
196 };
197})