lol

Merge pull request #298388 from GZGavinZhao/rocm-gfx-compat

rocmPackages: extend ISA compatibility

authored by

Ulrik Strid and committed by
GitHub
cd711ad7 8596068d

+110 -124
+14
pkgs/development/rocm-modules/6/clr/default.nix
··· 111 111 url = "https://github.com/ROCm/clr/commit/77c581a3ebd47b5e2908973b70adea66891159ee.patch"; 112 112 hash = "sha256-auBedbd7rghlKav7A9V6l64J7VmtE9GizIdi5gWj+fs="; 113 113 }) 114 + (fetchpatch { 115 + name = "extend-hip-isa-compatibility-check.patch"; 116 + url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0026-extend-hip-isa-compatibility-check.patch"; 117 + hash = "sha256-eG0ALZZQLRzD7zJueJFhi2emontmYy6xx8Rsm346nQI="; 118 + }) 119 + (fetchpatch { 120 + name = "improve-rocclr-isa-compatibility-check.patch"; 121 + url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch"; 122 + hash = "sha256-8eowuRiOAdd9ucKv4Eg9FPU7c6367H3eP3fRAGfXc6Y="; 123 + }) 114 124 ]; 115 125 116 126 postPatch = '' ··· 124 134 125 135 substituteInPlace hipamd/src/hip_embed_pch.sh \ 126 136 --replace "\''$LLVM_DIR/bin/clang" "${clang}/bin/clang" 137 + 138 + # https://lists.debian.org/debian-ai/2024/02/msg00178.html 139 + substituteInPlace rocclr/utils/flags.hpp \ 140 + --replace-fail "HIP_USE_RUNTIME_UNBUNDLER, false" "HIP_USE_RUNTIME_UNBUNDLER, true" 127 141 ''; 128 142 129 143 postInstall = ''
+1 -1
pkgs/development/rocm-modules/6/default.nix
··· 194 194 }; 195 195 196 196 rocblas = callPackage ./rocblas { 197 - inherit rocblas rocmUpdateScript rocm-cmake clr tensile; 197 + inherit rocmUpdateScript rocm-cmake clr tensile; 198 198 inherit (llvm) openmp; 199 199 stdenv = llvm.rocmClangStdenv; 200 200 };
+5
pkgs/development/rocm-modules/6/miopen/default.nix
··· 116 116 url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch"; 117 117 hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs="; 118 118 }) 119 + (fetchpatch { 120 + name = "Extend-MIOpen-ISA-compatibility.patch"; 121 + url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch"; 122 + hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU="; 123 + }) 119 124 ]; 120 125 121 126 outputs = [
+3 -1
pkgs/development/rocm-modules/6/rccl/default.nix
··· 65 65 66 66 # Really strange behavior, `#!/usr/bin/env perl` should work... 67 67 substituteInPlace CMakeLists.txt \ 68 - --replace "\''$ \''${hipify-perl_executable}" "${perl}/bin/perl ${hipify}/bin/hipify-perl" 68 + --replace "\''$ \''${hipify-perl_executable}" "${perl}/bin/perl ${hipify}/bin/hipify-perl" \ 69 + --replace-warn "-parallel-jobs=12" "-parallel-jobs=1" \ 70 + --replace-warn "-parallel-jobs=16" "-parallel-jobs=1" 69 71 ''; 70 72 71 73 postInstall = lib.optionalString buildTests ''
+51 -119
pkgs/development/rocm-modules/6/rocblas/default.nix
··· 1 - { rocblas 2 - , lib 1 + { lib 3 2 , stdenv 4 3 , fetchFromGitHub 4 + , fetchpatch 5 5 , rocmUpdateScript 6 6 , runCommand 7 7 , cmake ··· 21 21 , buildBenchmarks ? false 22 22 , tensileLogic ? "asm_full" 23 23 , tensileCOVersion ? "default" 24 - , tensileSepArch ? true 25 - , tensileLazyLib ? true 24 + # https://github.com/ROCm/Tensile/issues/1757 25 + # Allows gfx101* users to use rocBLAS normally. 26 + # Turn the below two values to `true` after the fix has been cherry-picked 27 + # into a release. Just backporting that single fix is not enough because it 28 + # depends on some previous commits. 29 + , tensileSepArch ? false 30 + , tensileLazyLib ? false 26 31 , tensileLibFormat ? "msgpack" 27 - , gpuTargets ? [ "all" ] 32 + # `gfx940`, `gfx941` are not present in this list because they are early 33 + # engineering samples, and all final MI300 hardware are `gfx942`: 34 + # https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130 35 + # 36 + # `gfx1012` is not present in this list because the ISA compatibility patches 37 + # would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will 38 + # always try to use `gfx1010` code objects, hence building for `gfx1012` is 39 + # useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152 40 + , gpuTargets ? [ "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" ] 28 41 }: 29 42 30 - let 31 - # NOTE: Update the default GPU targets on every update 32 - gfx80 = (rocblas.override { 33 - gpuTargets = [ 34 - "gfx803" 35 - ]; 36 - }).overrideAttrs { pname = "rocblas-tensile-gfx80"; }; 37 - 38 - gfx90 = (rocblas.override { 39 - gpuTargets = [ 40 - "gfx900" 41 - "gfx906:xnack-" 42 - "gfx908:xnack-" 43 - "gfx90a:xnack+" 44 - "gfx90a:xnack-" 45 - ]; 46 - }).overrideAttrs { pname = "rocblas-tensile-gfx90"; }; 47 - 48 - gfx94 = (rocblas.override { 49 - gpuTargets = [ 50 - "gfx940" 51 - "gfx941" 52 - "gfx942" 53 - ]; 54 - }).overrideAttrs { pname = "rocblas-tensile-gfx94"; }; 55 - 56 - gfx10 = (rocblas.override { 57 - gpuTargets = [ 58 - "gfx1010" 59 - "gfx1012" 60 - "gfx1030" 61 - ]; 62 - }).overrideAttrs { pname = "rocblas-tensile-gfx10"; }; 63 - 64 - gfx11 = (rocblas.override { 65 - gpuTargets = [ 66 - "gfx1100" 67 - "gfx1101" 68 - "gfx1102" 69 - ]; 70 - }).overrideAttrs { pname = "rocblas-tensile-gfx11"; }; 71 - 72 - # Unfortunately, we have to do two full builds, otherwise we get overlapping _fallback.dat files 73 - fallbacks = rocblas.overrideAttrs { pname = "rocblas-tensile-fallbacks"; }; 74 - in stdenv.mkDerivation (finalAttrs: { 43 + stdenv.mkDerivation (finalAttrs: { 75 44 pname = "rocblas"; 76 45 version = "6.0.2"; 77 46 ··· 94 63 cmake 95 64 rocm-cmake 96 65 clr 66 + ] ++ lib.optionals buildTensile [ 67 + tensile 97 68 ]; 98 69 99 70 buildInputs = [ ··· 114 85 ]; 115 86 116 87 cmakeFlags = [ 117 - "-DCMAKE_C_COMPILER=hipcc" 118 - "-DCMAKE_CXX_COMPILER=hipcc" 119 - "-Dpython=python3" 120 - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" 121 - "-DBUILD_WITH_TENSILE=${if buildTensile then "ON" else "OFF"}" 122 - # Manually define CMAKE_INSTALL_<DIR> 123 - # See: https://github.com/NixOS/nixpkgs/pull/197838 124 - "-DCMAKE_INSTALL_BINDIR=bin" 125 - "-DCMAKE_INSTALL_LIBDIR=lib" 126 - "-DCMAKE_INSTALL_INCLUDEDIR=include" 88 + (lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc") 89 + (lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") 90 + (lib.cmakeFeature "python" "python3") 91 + (lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets)) 92 + (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile) 93 + (lib.cmakeBool "ROCM_SYMLINK_LIBS" false) 94 + (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas") 95 + (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) 96 + (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) 97 + # rocblas header files are not installed unless we set this 98 + (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include") 127 99 ] ++ lib.optionals buildTensile [ 128 - "-DVIRTUALENV_HOME_DIR=/build/source/tensile" 129 - "-DTensile_TEST_LOCAL_PATH=/build/source/tensile" 130 - "-DTensile_ROOT=/build/source/tensile/${python3.sitePackages}/Tensile" 131 - "-DTensile_LOGIC=${tensileLogic}" 132 - "-DTensile_CODE_OBJECT_VERSION=${tensileCOVersion}" 133 - "-DTensile_SEPARATE_ARCHITECTURES=${if tensileSepArch then "ON" else "OFF"}" 134 - "-DTensile_LAZY_LIBRARY_LOADING=${if tensileLazyLib then "ON" else "OFF"}" 135 - "-DTensile_LIBRARY_FORMAT=${tensileLibFormat}" 136 - ] ++ lib.optionals buildTests [ 137 - "-DBUILD_CLIENTS_TESTS=ON" 138 - ] ++ lib.optionals buildBenchmarks [ 139 - "-DBUILD_CLIENTS_BENCHMARKS=ON" 100 + (lib.cmakeBool "BUILD_WITH_PIP" false) 101 + (lib.cmakeFeature "Tensile_LOGIC" tensileLogic) 102 + (lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion) 103 + (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch) 104 + (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib) 105 + (lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat) 106 + (lib.cmakeBool "Tensile_PRINT_DEBUG" true) 140 107 ] ++ lib.optionals (buildTests || buildBenchmarks) [ 141 - "-DCMAKE_CXX_FLAGS=-I${amd-blis}/include/blis" 108 + (lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis") 142 109 ]; 143 110 144 - postPatch = lib.optionalString (finalAttrs.pname != "rocblas") '' 145 - # Return early and install tensile files manually 146 - substituteInPlace library/src/CMakeLists.txt \ 147 - --replace "set_target_properties( TensileHost PROPERTIES OUTPUT_NAME" "return()''\nset_target_properties( TensileHost PROPERTIES OUTPUT_NAME" 148 - '' + lib.optionalString (buildTensile && finalAttrs.pname == "rocblas") '' 149 - # Link the prebuilt Tensile files 150 - mkdir -p build/Tensile/library 111 + patches = [ 112 + (fetchpatch { 113 + name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; 114 + url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch"; 115 + hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo="; 116 + }) 117 + ]; 151 118 152 - for path in ${gfx80} ${gfx90} ${gfx94} ${gfx10} ${gfx11} ${fallbacks}; do 153 - ln -s $path/lib/rocblas/library/* build/Tensile/library 154 - done 155 - 156 - unlink build/Tensile/library/TensileManifest.txt 157 - '' + lib.optionalString buildTensile '' 158 - # Tensile REALLY wants to write to the nix directory if we include it normally 159 - cp -a ${tensile} tensile 160 - chmod +w -R tensile 161 - 162 - # Rewrap Tensile 163 - substituteInPlace tensile/bin/{.t*,.T*,*} \ 164 - --replace "${tensile}" "/build/source/tensile" 165 - 166 - substituteInPlace CMakeLists.txt \ 167 - --replace "include(virtualenv)" "" \ 168 - --replace "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" 169 - ''; 170 - 171 - postInstall = lib.optionalString (finalAttrs.pname == "rocblas") '' 172 - ln -sf ${fallbacks}/lib/rocblas/library/TensileManifest.txt $out/lib/rocblas/library 173 - '' + lib.optionalString (finalAttrs.pname != "rocblas") '' 174 - mkdir -p $out/lib/rocblas/library 175 - rm -rf $out/share 176 - '' + lib.optionalString (finalAttrs.pname != "rocblas" && finalAttrs.pname != "rocblas-tensile-fallbacks") '' 177 - rm Tensile/library/{TensileManifest.txt,*_fallback.dat} 178 - mv Tensile/library/* $out/lib/rocblas/library 179 - '' + lib.optionalString (finalAttrs.pname == "rocblas-tensile-fallbacks") '' 180 - mv Tensile/library/{TensileManifest.txt,*_fallback.dat} $out/lib/rocblas/library 181 - '' + lib.optionalString buildTests '' 182 - mkdir -p $test/bin 183 - cp -a $out/bin/* $test/bin 184 - rm $test/bin/*-bench || true 185 - '' + lib.optionalString buildBenchmarks '' 186 - mkdir -p $benchmark/bin 187 - cp -a $out/bin/* $benchmark/bin 188 - rm $benchmark/bin/*-test || true 189 - '' + lib.optionalString (buildTests || buildBenchmarks ) '' 190 - rm -rf $out/bin 119 + # Pass $NIX_BUILD_CORES to Tensile 120 + postPatch = '' 121 + substituteInPlace cmake/build-options.cmake \ 122 + --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' 191 123 ''; 192 124 193 125 passthru.updateScript = rocmUpdateScript {
+10
pkgs/development/rocm-modules/6/rocm-runtime/default.nix
··· 1 1 { lib 2 2 , stdenv 3 3 , fetchFromGitHub 4 + , fetchpatch 4 5 , rocmUpdateScript 5 6 , pkg-config 6 7 , cmake ··· 40 41 numactl 41 42 valgrind 42 43 libxml2 44 + ]; 45 + 46 + patches = [ 47 + (fetchpatch { 48 + name = "extend-isa-compatibility-check.patch"; 49 + url = "https://salsa.debian.org/rocm-team/rocr-runtime/-/raw/076026d43bbee7f816b81fea72f984213a9ff961/debian/patches/0004-extend-isa-compatibility-check.patch"; 50 + hash = "sha256-cC030zVGS4kNXwaztv5cwfXfVwOldpLGV9iYgEfPEnY="; 51 + stripLen = 1; 52 + }) 43 53 ]; 44 54 45 55 postPatch = ''
+9
pkgs/development/rocm-modules/6/rocprim/default.nix
··· 1 1 { lib 2 + , fetchpatch 2 3 , stdenv 3 4 , fetchFromGitHub 4 5 , rocmUpdateScript ··· 30 31 rev = "rocm-${finalAttrs.version}"; 31 32 hash = "sha256-nWvq26qRPZ6Au1rc5cR74TKArcdUFg7O9djFi8SvMeM="; 32 33 }; 34 + 35 + patches = [ 36 + (fetchpatch { 37 + name = "arch-conversion-marco.patch"; 38 + url = "https://salsa.debian.org/rocm-team/rocprim/-/raw/70c8aaee3cf545d92685f4ed9bf8f41e3d4d570c/debian/patches/arch-conversion-macro.patch"; 39 + hash = "sha256-oXdmbCArOB5bKE8ozDFrSh4opbO+c4VI6PNhljeUSms="; 40 + }) 41 + ]; 33 42 34 43 nativeBuildInputs = [ 35 44 cmake
+17 -3
pkgs/development/rocm-modules/6/tensile/default.nix
··· 1 1 { lib 2 2 , stdenv 3 3 , fetchFromGitHub 4 + , fetchpatch 4 5 , rocmUpdateScript 5 6 , buildPythonPackage 6 7 , pytestCheckHook ··· 34 35 joblib 35 36 ]; 36 37 38 + patches = [ 39 + (fetchpatch { 40 + name = "Extend-Tensile-HIP-ISA-compatibility.patch"; 41 + url = "https://github.com/GZGavinZhao/Tensile/commit/855cb15839849addb0816a6dde45772034a3e41f.patch"; 42 + hash = "sha256-d+fVf/vz+sxGqJ96vuxe0jRMgbC5K6j5FQ5SJ1e3Sl8="; 43 + }) 44 + (fetchpatch { 45 + name = "Don-t-copy-file-twice-in-copyStaticFiles.patch"; 46 + url = "https://github.com/GZGavinZhao/Tensile/commit/9e14d5a00a096bddac605910a0e4dfb4c35bb0d5.patch"; 47 + hash = "sha256-gOzjJyD1K056OFQ+hK5nbUeBhxLTIgQLoT+0K12SypI="; 48 + }) 49 + ]; 50 + 37 51 doCheck = false; # Too many errors, not sure how to set this up properly 38 52 39 53 nativeCheckInputs = [ ··· 42 56 rocminfo 43 57 ]; 44 58 45 - preCheck = '' 46 - export ROCM_PATH=${rocminfo} 47 - ''; 59 + env = { 60 + ROCM_PATH = rocminfo; 61 + }; 48 62 49 63 pythonImportsCheck = [ "Tensile" ]; 50 64