nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 stdenv,
4 fetchFromGitHub,
5 cmake,
6 rocm-cmake,
7 clr,
8 rocblas,
9 rocsolver,
10 gtest,
11 msgpack,
12 libxml2,
13 python3,
14 python3Packages,
15 openmp,
16 hipblas-common,
17 hipblas,
18 nlohmann_json,
19 triton-llvm,
20 rocmlir,
21 lapack-reference,
22 ninja,
23 ncurses,
24 libffi,
25 zlib,
26 zstd,
27 xz,
28 pkg-config,
29 buildTests ? false,
30 buildBenchmarks ? false,
31 buildSamples ? false,
32 gpuTargets ? [
33 # aotriton GPU support list:
34 # https://github.com/ROCm/aotriton/blob/main/v2python/gpu_targets.py
35 "gfx90a"
36 "gfx942"
37 "gfx1100"
38 "gfx1101"
39 ],
40}:
41
42stdenv.mkDerivation (
43 finalAttrs:
44 let
45 py = python3.withPackages (ps: [
46 ps.pyyaml
47 ps.distutils
48 ps.setuptools
49 ps.packaging
50 ps.numpy
51 ps.wheel
52 ps.filelock
53 ps.iniconfig
54 ps.pluggy
55 ps.pybind11
56 ]);
57 gpuTargets' = lib.concatStringsSep ";" gpuTargets;
58 compiler = "amdclang++";
59 cFlags = "-O3 -DNDEBUG";
60 cxxFlags = "${cFlags} -Wno-c++11-narrowing";
61 triton-llvm' = triton-llvm;
62 in
63 {
64 pname = "aotriton";
65 version = "0.9.2b";
66
67 src = fetchFromGitHub {
68 owner = "ROCm";
69 repo = "aotriton";
70 rev = "${finalAttrs.version}";
71 hash = "sha256-1Cf0olD3zRg9JESD6s/WaGifm3kfD12VUvjTZHpmGAE=";
72 fetchSubmodules = true;
73 };
74 env.CXX = compiler;
75 env.ROCM_PATH = "${clr}";
76 requiredSystemFeatures = [ "big-parallel" ];
77
78 outputs = [
79 "out"
80 ]
81 ++ lib.optionals buildTests [
82 "test"
83 ]
84 ++ lib.optionals buildBenchmarks [
85 "benchmark"
86 ]
87 ++ lib.optionals buildSamples [
88 "sample"
89 ];
90
91 # Need an empty cuda.h for this to compile
92 # Better than pulling in unfree cuda headers
93 postPatch = ''
94 touch third_party/triton/third_party/nvidia/include/cuda.h
95 '';
96
97 doCheck = false;
98 doInstallCheck = false;
99
100 nativeBuildInputs = [
101 cmake
102 rocm-cmake
103 pkg-config
104 py
105 clr
106 ninja
107 ];
108
109 buildInputs = [
110 rocblas
111 rocsolver
112 hipblas-common
113 hipblas
114 openmp
115 libffi
116 ncurses
117 xz
118 nlohmann_json
119 rocmlir
120
121 msgpack
122 libxml2
123 python3Packages.msgpack
124 zlib
125 zstd
126 ]
127 ++ lib.optionals buildTests [
128 gtest
129 ]
130 ++ lib.optionals (buildTests || buildBenchmarks) [
131 lapack-reference
132 ];
133
134 env.TRITON_OFFLINE_BUILD = 1;
135 env.LLVM_SYSPATH = "${triton-llvm'}";
136 env.JSON_SYSPATH = nlohmann_json;
137 env.MLIR_DIR = "${triton-llvm'}/lib/cmake/mlir";
138 env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include";
139
140 # Fix up header issues in triton: https://github.com/triton-lang/triton/pull/3985/files
141 preConfigure = ''
142 mkdir third_party/triton/third_party/nvidia/backend/include/
143 touch third_party/triton/third_party/nvidia/backend/include/cuda.h
144 find third_party/triton -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} +
145 find third_party/triton -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} +
146
147 sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' CMakeLists.txt
148 sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' CMakeLists.txt
149 sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
150 sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
151 substituteInPlace third_party/triton/python/setup.py \
152 --replace-fail "from distutils.command.clean import clean" "import setuptools;from distutils.command.clean import clean" \
153 --replace-fail 'system == "Linux"' 'False'
154 # Fix 'ld: error: unable to insert .comment after .comment'
155 substituteInPlace v2python/ld_script.py \
156 --replace-fail 'INSERT AFTER .comment;' ""
157
158 cmakeFlagsArray+=(
159 '-DCMAKE_C_FLAGS_RELEASE=${cFlags}'
160 '-DCMAKE_CXX_FLAGS_RELEASE=${cxxFlags}'
161 )
162 prependToVar cmakeFlags "-GNinja"
163 mkdir -p /build/tmp-home
164 export HOME=/build/tmp-home
165 '';
166
167 # Excerpt from README:
168 # Note: do not run ninja separately, due to the limit of the current build system,
169 # ninja install will run the whole build process unconditionally.
170 dontBuild = true;
171
172 installPhase = ''
173 runHook preInstall
174 ninja -v install
175 runHook postInstall
176 '';
177
178 cmakeFlags = [
179 "-Wno-dev"
180 "-DAOTRITON_NOIMAGE_MODE=ON" # FIXME: Should be able to build with object code but generate_shim is failing
181 "-DCMAKE_BUILD_TYPE=Release"
182 "-DCMAKE_VERBOSE_MAKEFILE=ON"
183 "-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
184 "-DCMAKE_CXX_COMPILER=${compiler}"
185 # Manually define CMAKE_INSTALL_<DIR>
186 # See: https://github.com/NixOS/nixpkgs/pull/197838
187 "-DCMAKE_INSTALL_BINDIR=bin"
188 "-DCMAKE_INSTALL_LIBDIR=lib"
189 "-DCMAKE_INSTALL_INCLUDEDIR=include"
190 "-DAMDGPU_TARGETS=${gpuTargets'}"
191 "-DGPU_TARGETS=${gpuTargets'}"
192 ]
193 ++ lib.optionals buildTests [
194 "-DBUILD_CLIENTS_TESTS=ON"
195 ]
196 ++ lib.optionals buildBenchmarks [
197 "-DBUILD_CLIENTS_BENCHMARKS=ON"
198 ]
199 ++ lib.optionals buildSamples [
200 "-DBUILD_CLIENTS_SAMPLES=ON"
201 ];
202
203 postInstall =
204 lib.optionalString buildTests ''
205 mkdir -p $test/bin
206 mv $out/bin/hipblas-test $test/bin
207 ''
208 + lib.optionalString buildBenchmarks ''
209 mkdir -p $benchmark/bin
210 mv $out/bin/hipblas-bench $benchmark/bin
211 ''
212 + lib.optionalString buildSamples ''
213 mkdir -p $sample/bin
214 mv $out/bin/example-* $sample/bin
215 ''
216 + lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
217 rmdir $out/bin
218 '';
219 meta = with lib; {
220 description = "ROCm Ahead of Time (AOT) Triton Math Library ";
221 homepage = "https://github.com/ROCm/aotriton";
222 license = with licenses; [ mit ];
223 teams = [ teams.rocm ];
224 platforms = platforms.linux;
225 };
226 }
227)