nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 stdenv,
4 fetchFromGitHub,
5 rocmUpdateScript,
6 cmake,
7 rocm-cmake,
8 llvm,
9 clr,
10 rocminfo,
11 python3,
12 hipify,
13 gitMinimal,
14 gtest,
15 zstd,
16 buildTests ? false,
17 buildExamples ? false,
18 # limits prebuilt kernel selection to those needed for MIOPEN (currently "*conv*")
19 # Other kernels can still be used if treating CK as a header only library
20 # and building specific instances, as done with ck4inductor/torch
21 miOpenReqLibsOnly ? true,
22 withDeprecatedKernels ? false,
23 gpuTargets ? (
24 clr.localGpuTargets or [
25 "gfx900"
26 "gfx906"
27 "gfx908"
28 "gfx90a"
29 "gfx942"
30 "gfx950"
31 "gfx10-3-generic"
32 "gfx11-generic"
33 "gfx12-generic"
34 ]
35 ),
36}:
37
38stdenv.mkDerivation (finalAttrs: {
39 preBuild = ''
40 echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks";
41 exit 1
42 '';
43
44 pname = "composable_kernel_base";
45 version = "7.1.1";
46
47 outputs = [
48 "out"
49 ]
50 ++ lib.optionals buildTests [
51 "test"
52 ]
53 ++ lib.optionals buildExamples [
54 "example"
55 ];
56
57 src = fetchFromGitHub {
58 owner = "ROCm";
59 repo = "composable_kernel";
60 rev = "rocm-${finalAttrs.version}";
61 hash = "sha256-exdkyTIK03dzlCXHm3j8ehEb9NxLOxPX9QyfMsiCgSs=";
62 };
63
64 nativeBuildInputs = [
65 # Deliberately not using ninja
66 # because we're jankily composing build outputs from multiple drvs
67 # ninja won't believe they're up to date
68 gitMinimal
69 cmake
70 rocminfo
71 clr
72 hipify
73 zstd
74 python3
75 ];
76
77 buildInputs = [
78 rocm-cmake
79 clr
80 zstd
81 ];
82
83 strictDeps = true;
84 enableParallelBuilding = true;
85 env.ROCM_PATH = clr;
86
87 cmakeFlags = [
88 (lib.cmakeBool "MIOPEN_REQ_LIBS_ONLY" miOpenReqLibsOnly)
89 (lib.cmakeBool "BUILD_MHA_LIB" (!miOpenReqLibsOnly))
90 (lib.cmakeBool "DISABLE_DL_KERNELS" true)
91 (lib.cmakeBool "DISABLE_DPP_KERNELS" true)
92 (lib.cmakeBool "CK_TIME_KERNEL" false)
93 "-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
94 "-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
95 "-DDL_KERNELS=OFF"
96 # CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp
97 # but migraphx requires an incompatible fork of CK and fails anyway
98 # "-DCK_USE_CODEGEN=ON"
99 # It might be worth skipping fp64 in future with this:
100 # "-DDTYPES=fp32;fp16;fp8;bf16;int8"
101 # Manually define CMAKE_INSTALL_<DIR>
102 # See: https://github.com/NixOS/nixpkgs/pull/197838
103 "-DCMAKE_INSTALL_BINDIR=bin"
104 "-DCMAKE_INSTALL_LIBDIR=lib"
105 "-DCMAKE_INSTALL_INCLUDEDIR=include"
106 "-DBUILD_DEV=OFF"
107 "-DBUILD_MHA_LIB=ON"
108 "-DROCM_PATH=${clr}"
109 "-DENABLE_CLANG_CPP_CHECKS=OFF"
110 "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
111
112 # FP8 can build for 908/90a but very slow build
113 # and produces unusably slow kernels that are huge
114 "-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF"
115 ]
116 ++ lib.optionals (gpuTargets != [ ]) [
117 # We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS
118 # per readme this is required if archs are dissimilar
119 # In rocm-6.3.x not setting any arch flag worked
120 # but setting dissimilar arches always failed
121 "-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
122 ]
123 ++ lib.optionals buildTests [
124 "-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
125 ];
126
127 # No flags to build selectively it seems...
128 postPatch =
129 # Reduce configure time by preventing thousands of clang-tidy targets being added
130 # We will never call them
131 # Never build profiler
132 ''
133 substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \
134 --replace-fail clang_tidy_check '#clang_tidy_check'
135 substituteInPlace CMakeLists.txt \
136 --replace-fail "add_subdirectory(profiler)" ""
137 substituteInPlace cmake/EnableCompilerWarnings.cmake \
138 --replace-fail "-Werror" ""
139 ''
140 + lib.optionalString (!withDeprecatedKernels) ''
141 substituteInPlace include/ck/ck.hpp \
142 --replace-fail "CK_BUILD_DEPRECATED 1" "CK_BUILD_DEPRECATED 0"
143 ''
144 # Optionally remove tests
145 + lib.optionalString (!buildTests) ''
146 substituteInPlace CMakeLists.txt \
147 --replace-fail "add_subdirectory(test)" ""
148 substituteInPlace codegen/CMakeLists.txt \
149 --replace-fail "include(ROCMTest)" ""
150 ''
151 # Optionally remove examples
152 + lib.optionalString (!buildExamples) ''
153 substituteInPlace CMakeLists.txt \
154 --replace-fail "add_subdirectory(example)" ""
155 '';
156
157 postInstall =
158 lib.optionalString buildTests ''
159 mkdir -p $test/bin
160 mv $out/bin/test_* $test/bin
161 ''
162 + lib.optionalString buildExamples ''
163 mkdir -p $example/bin
164 mv $out/bin/example_* $example/bin
165 '';
166
167 passthru = {
168 inherit gpuTargets miOpenReqLibsOnly;
169 updateScript = rocmUpdateScript {
170 name = finalAttrs.pname;
171 inherit (finalAttrs.src) owner;
172 inherit (finalAttrs.src) repo;
173 };
174 anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets;
175 anyMfmaTarget =
176 (lib.lists.intersectLists gpuTargets [
177 "gfx908"
178 "gfx90a"
179 "gfx942"
180 "gfx950"
181 ]) != [ ];
182 };
183
184 meta = {
185 description = "Performance portable programming model for machine learning tensor operators";
186 homepage = "https://github.com/ROCm/composable_kernel";
187 license = with lib.licenses; [ mit ];
188 teams = [ lib.teams.rocm ];
189 platforms = lib.platforms.linux;
190 broken = true; # this base package shouldn't be built directly
191 };
192})