1{ lib
2, stdenv
3, fetchFromGitHub
4, unstableGitUpdater
5, runCommand
6, cmake
7, rocm-cmake
8, hip
9, openmp
10, clang-tools-extra
11, gtest
12, buildTests ? false
13, buildExamples ? false
14, gpuTargets ? [ ] # gpuTargets = [ "gfx803" "gfx900" "gfx1030" ... ]
15}:
16
17let
18 # This is now over 3GB, to allow hydra caching we separate it
19 ck = stdenv.mkDerivation (finalAttrs: {
20 pname = "composable_kernel";
21 version = "unstable-2023-01-16";
22
23 outputs = [
24 "out"
25 ] ++ lib.optionals buildTests [
26 "test"
27 ] ++ lib.optionals buildExamples [
28 "example"
29 ];
30
31 # ROCm 5.6 should release composable_kernel as stable with a tag in the future
32 src = fetchFromGitHub {
33 owner = "ROCmSoftwarePlatform";
34 repo = "composable_kernel";
35 rev = "80e05267417f948e4f7e63c0fe807106d9a0c0ef";
36 hash = "sha256-+c0E2UtlG/abweLwCWWjNHDO5ZvSIVKwwwettT9mqR4=";
37 };
38
39 nativeBuildInputs = [
40 cmake
41 rocm-cmake
42 hip
43 clang-tools-extra
44 ];
45
46 buildInputs = [
47 openmp
48 ];
49
50 cmakeFlags = [
51 "-DCMAKE_C_COMPILER=hipcc"
52 "-DCMAKE_CXX_COMPILER=hipcc"
53 ] ++ lib.optionals (gpuTargets != [ ]) [
54 "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
55 "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
56 ] ++ lib.optionals buildTests [
57 "-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
58 ];
59
60 # No flags to build selectively it seems...
61 postPatch = lib.optionalString (!buildTests) ''
62 substituteInPlace CMakeLists.txt \
63 --replace "add_subdirectory(test)" ""
64 '' + lib.optionalString (!buildExamples) ''
65 substituteInPlace CMakeLists.txt \
66 --replace "add_subdirectory(example)" ""
67 '';
68
69 postInstall = lib.optionalString buildTests ''
70 mkdir -p $test/bin
71 mv $out/bin/test_* $test/bin
72 '' + lib.optionalString buildExamples ''
73 mkdir -p $example/bin
74 mv $out/bin/example_* $example/bin
75 '';
76
77 passthru.updateScript = unstableGitUpdater { };
78
79 # Times out otherwise
80 requiredSystemFeatures = [ "big-parallel" ];
81
82 meta = with lib; {
83 description = "Performance portable programming model for machine learning tensor operators";
84 homepage = "https://github.com/ROCmSoftwarePlatform/composable_kernel";
85 license = with licenses; [ mit ];
86 maintainers = teams.rocm.members;
87 platforms = platforms.linux;
88 };
89 });
90
91 ckProfiler = runCommand "ckProfiler" { preferLocalBuild = true; } ''
92 cp -a ${ck}/bin/ckProfiler $out
93 '';
94in stdenv.mkDerivation {
95 inherit (ck) pname version outputs src passthru requiredSystemFeatures meta;
96
97 dontUnpack = true;
98 dontPatch = true;
99 dontConfigure = true;
100 dontBuild = true;
101
102 installPhase = ''
103 runHook preInstall
104
105 mkdir -p $out/bin
106 cp -as ${ckProfiler} $out/bin/ckProfiler
107 cp -an ${ck}/* $out
108 '' + lib.optionalString buildTests ''
109 cp -a ${ck.test} $test
110 '' + lib.optionalString buildExamples ''
111 cp -a ${ck.example} $example
112 '' + ''
113 runHook postInstall
114 '';
115
116 # Fix paths
117 preFixup = ''
118 substituteInPlace $out/lib/cmake/composable_kernel/*.cmake \
119 --replace "${ck}" "$out"
120 '';
121}