nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at python-updates 192 lines 5.5 kB view raw
1{ 2 lib, 3 stdenv, 4 fetchFromGitHub, 5 rocmUpdateScript, 6 cmake, 7 rocm-cmake, 8 llvm, 9 clr, 10 rocminfo, 11 python3, 12 hipify, 13 gitMinimal, 14 gtest, 15 zstd, 16 buildTests ? false, 17 buildExamples ? false, 18 # limits prebuilt kernel selection to those needed for MIOPEN (currently "*conv*") 19 # Other kernels can still be used if treating CK as a header only library 20 # and building specific instances, as done with ck4inductor/torch 21 miOpenReqLibsOnly ? true, 22 withDeprecatedKernels ? false, 23 gpuTargets ? ( 24 clr.localGpuTargets or [ 25 "gfx900" 26 "gfx906" 27 "gfx908" 28 "gfx90a" 29 "gfx942" 30 "gfx950" 31 "gfx10-3-generic" 32 "gfx11-generic" 33 "gfx12-generic" 34 ] 35 ), 36}: 37 38stdenv.mkDerivation (finalAttrs: { 39 preBuild = '' 40 echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks"; 41 exit 1 42 ''; 43 44 pname = "composable_kernel_base"; 45 version = "7.1.1"; 46 47 outputs = [ 48 "out" 49 ] 50 ++ lib.optionals buildTests [ 51 "test" 52 ] 53 ++ lib.optionals buildExamples [ 54 "example" 55 ]; 56 57 src = fetchFromGitHub { 58 owner = "ROCm"; 59 repo = "composable_kernel"; 60 rev = "rocm-${finalAttrs.version}"; 61 hash = "sha256-exdkyTIK03dzlCXHm3j8ehEb9NxLOxPX9QyfMsiCgSs="; 62 }; 63 64 nativeBuildInputs = [ 65 # Deliberately not using ninja 66 # because we're jankily composing build outputs from multiple drvs 67 # ninja won't believe they're up to date 68 gitMinimal 69 cmake 70 rocminfo 71 clr 72 hipify 73 zstd 74 python3 75 ]; 76 77 buildInputs = [ 78 rocm-cmake 79 clr 80 zstd 81 ]; 82 83 strictDeps = true; 84 enableParallelBuilding = true; 85 env.ROCM_PATH = clr; 86 87 cmakeFlags = [ 88 (lib.cmakeBool "MIOPEN_REQ_LIBS_ONLY" miOpenReqLibsOnly) 89 (lib.cmakeBool "BUILD_MHA_LIB" (!miOpenReqLibsOnly)) 90 (lib.cmakeBool "DISABLE_DL_KERNELS" true) 91 (lib.cmakeBool "DISABLE_DPP_KERNELS" true) 92 (lib.cmakeBool "CK_TIME_KERNEL" false) 93 "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" 94 "-DCMAKE_POLICY_DEFAULT_CMP0069=NEW" 95 "-DDL_KERNELS=OFF" 96 # CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp 97 # but migraphx requires an incompatible fork of CK and fails anyway 98 # "-DCK_USE_CODEGEN=ON" 99 # It might be worth skipping fp64 in future with this: 100 # "-DDTYPES=fp32;fp16;fp8;bf16;int8" 101 # Manually define CMAKE_INSTALL_<DIR> 102 # See: https://github.com/NixOS/nixpkgs/pull/197838 103 "-DCMAKE_INSTALL_BINDIR=bin" 104 "-DCMAKE_INSTALL_LIBDIR=lib" 105 "-DCMAKE_INSTALL_INCLUDEDIR=include" 106 "-DBUILD_DEV=OFF" 107 "-DBUILD_MHA_LIB=ON" 108 "-DROCM_PATH=${clr}" 109 "-DENABLE_CLANG_CPP_CHECKS=OFF" 110 "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}" 111 112 # FP8 can build for 908/90a but very slow build 113 # and produces unusably slow kernels that are huge 114 "-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF" 115 ] 116 ++ lib.optionals (gpuTargets != [ ]) [ 117 # We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS 118 # per readme this is required if archs are dissimilar 119 # In rocm-6.3.x not setting any arch flag worked 120 # but setting dissimilar arches always failed 121 "-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}" 122 ] 123 ++ lib.optionals buildTests [ 124 "-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names 125 ]; 126 127 # No flags to build selectively it seems... 128 postPatch = 129 # Reduce configure time by preventing thousands of clang-tidy targets being added 130 # We will never call them 131 # Never build profiler 132 '' 133 substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \ 134 --replace-fail clang_tidy_check '#clang_tidy_check' 135 substituteInPlace CMakeLists.txt \ 136 --replace-fail "add_subdirectory(profiler)" "" 137 substituteInPlace cmake/EnableCompilerWarnings.cmake \ 138 --replace-fail "-Werror" "" 139 '' 140 + lib.optionalString (!withDeprecatedKernels) '' 141 substituteInPlace include/ck/ck.hpp \ 142 --replace-fail "CK_BUILD_DEPRECATED 1" "CK_BUILD_DEPRECATED 0" 143 '' 144 # Optionally remove tests 145 + lib.optionalString (!buildTests) '' 146 substituteInPlace CMakeLists.txt \ 147 --replace-fail "add_subdirectory(test)" "" 148 substituteInPlace codegen/CMakeLists.txt \ 149 --replace-fail "include(ROCMTest)" "" 150 '' 151 # Optionally remove examples 152 + lib.optionalString (!buildExamples) '' 153 substituteInPlace CMakeLists.txt \ 154 --replace-fail "add_subdirectory(example)" "" 155 ''; 156 157 postInstall = 158 lib.optionalString buildTests '' 159 mkdir -p $test/bin 160 mv $out/bin/test_* $test/bin 161 '' 162 + lib.optionalString buildExamples '' 163 mkdir -p $example/bin 164 mv $out/bin/example_* $example/bin 165 ''; 166 167 passthru = { 168 inherit gpuTargets miOpenReqLibsOnly; 169 updateScript = rocmUpdateScript { 170 name = finalAttrs.pname; 171 inherit (finalAttrs.src) owner; 172 inherit (finalAttrs.src) repo; 173 }; 174 anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets; 175 anyMfmaTarget = 176 (lib.lists.intersectLists gpuTargets [ 177 "gfx908" 178 "gfx90a" 179 "gfx942" 180 "gfx950" 181 ]) != [ ]; 182 }; 183 184 meta = { 185 description = "Performance portable programming model for machine learning tensor operators"; 186 homepage = "https://github.com/ROCm/composable_kernel"; 187 license = with lib.licenses; [ mit ]; 188 teams = [ lib.teams.rocm ]; 189 platforms = lib.platforms.linux; 190 broken = true; # this base package shouldn't be built directly 191 }; 192})