nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at devShellTools-shell 197 lines 5.8 kB view raw
1{ 2 lib, 3 stdenv, 4 fetchFromGitHub, 5 fetchpatch, 6 rocmUpdateScript, 7 cmake, 8 rocm-cmake, 9 clr, 10 python3, 11 tensile, 12 msgpack, 13 libxml2, 14 gtest, 15 gfortran, 16 openmp, 17 git, 18 amd-blis, 19 zstd, 20 hipblas-common, 21 hipblaslt, 22 python3Packages, 23 rocm-smi, 24 buildTensile ? true, 25 buildTests ? true, 26 buildBenchmarks ? true, 27 # https://github.com/ROCm/Tensile/issues/1757 28 # Allows gfx101* users to use rocBLAS normally. 29 # Turn the below two values to `true` after the fix has been cherry-picked 30 # into a release. Just backporting that single fix is not enough because it 31 # depends on some previous commits. 32 tensileSepArch ? true, 33 tensileLazyLib ? true, 34 withHipBlasLt ? true, 35 # `gfx940`, `gfx941` are not present in this list because they are early 36 # engineering samples, and all final MI300 hardware are `gfx942`: 37 # https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130 38 # 39 # `gfx1012` is not present in this list because the ISA compatibility patches 40 # would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will 41 # always try to use `gfx1010` code objects, hence building for `gfx1012` is 42 # useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152 43 gpuTargets ? ( 44 clr.localGpuTargets or [ 45 "gfx900" 46 "gfx906" 47 "gfx908" 48 "gfx90a" 49 "gfx942" 50 "gfx1010" 51 "gfx1030" 52 "gfx1100" 53 "gfx1101" 54 "gfx1102" 55 "gfx1200" 56 "gfx1201" 57 ] 58 ), 59}: 60 61let 62 gpuTargets' = lib.concatStringsSep ";" gpuTargets; 63in 64stdenv.mkDerivation (finalAttrs: { 65 pname = "rocblas${clr.gpuArchSuffix}"; 66 version = "6.3.3"; 67 68 outputs = [ 69 "out" 70 ]; 71 72 src = fetchFromGitHub { 73 owner = "ROCm"; 74 repo = "rocBLAS"; 75 rev = "rocm-${finalAttrs.version}"; 76 hash = "sha256-IYcrVcGH4yZDkFZeNOJPfG0qsPS/WiH0fTSUSdo1BH4="; 77 }; 78 79 nativeBuildInputs = [ 80 cmake 81 # no ninja, it buffers console output and nix times out long periods of no output 82 rocm-cmake 83 clr 84 git 85 ] 86 ++ lib.optionals buildTensile [ 87 tensile 88 ]; 89 90 buildInputs = [ 91 python3 92 hipblas-common 93 ] 94 ++ lib.optionals withHipBlasLt [ 95 hipblaslt 96 ] 97 ++ lib.optionals buildTensile [ 98 zstd 99 msgpack 100 libxml2 101 python3Packages.msgpack 102 python3Packages.zstandard 103 ] 104 ++ lib.optionals buildTests [ 105 gtest 106 ] 107 ++ lib.optionals (buildTests || buildBenchmarks) [ 108 gfortran 109 openmp 110 amd-blis 111 rocm-smi 112 ] 113 ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [ 114 python3Packages.pyyaml 115 ]; 116 117 dontStrip = true; 118 env.CXXFLAGS = 119 "-O3 -DNDEBUG -I${hipblas-common}/include" 120 + lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis"; 121 # Fails to link tests if we don't add amd-blis libs 122 env.LDFLAGS = lib.optionalString ( 123 buildTests || buildBenchmarks 124 ) "-Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas"; 125 env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++"; 126 127 cmakeFlags = [ 128 (lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release") 129 (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true) 130 (lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR") 131 (lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran") 132 (lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar") 133 (lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib") 134 (lib.cmakeFeature "python" "python3") 135 (lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets') 136 (lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets') 137 (lib.cmakeFeature "GPU_TARGETS" gpuTargets') 138 (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile) 139 (lib.cmakeBool "ROCM_SYMLINK_LIBS" false) 140 (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas") 141 (lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt) 142 (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) 143 (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) 144 (lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks) 145 (lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true) 146 # Temporarily set variables to work around upstream CMakeLists issue 147 # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed 148 "-DCMAKE_INSTALL_BINDIR=bin" 149 "-DCMAKE_INSTALL_INCLUDEDIR=include" 150 "-DCMAKE_INSTALL_LIBDIR=lib" 151 ] 152 ++ lib.optionals buildTensile [ 153 "-DCPACK_SET_DESTDIR=OFF" 154 "-DLINK_BLIS=ON" 155 "-DTensile_CODE_OBJECT_VERSION=default" 156 "-DTensile_LOGIC=asm_full" 157 "-DTensile_LIBRARY_FORMAT=msgpack" 158 (lib.cmakeBool "BUILD_WITH_PIP" false) 159 (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch) 160 (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib) 161 ]; 162 163 passthru.amdgpu_targets = gpuTargets'; 164 165 patches = [ 166 (fetchpatch { 167 name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; 168 url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch"; 169 hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo="; 170 }) 171 ]; 172 173 # Pass $NIX_BUILD_CORES to Tensile 174 postPatch = '' 175 substituteInPlace cmake/build-options.cmake \ 176 --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' 177 substituteInPlace CMakeLists.txt \ 178 --replace-fail "4.42.0" "4.43.0" 179 ''; 180 181 passthru.updateScript = rocmUpdateScript { 182 name = finalAttrs.pname; 183 inherit (finalAttrs.src) owner; 184 inherit (finalAttrs.src) repo; 185 }; 186 187 enableParallelBuilding = true; 188 requiredSystemFeatures = [ "big-parallel" ]; 189 190 meta = with lib; { 191 description = "BLAS implementation for ROCm platform"; 192 homepage = "https://github.com/ROCm/rocBLAS"; 193 license = with licenses; [ mit ]; 194 teams = [ teams.rocm ]; 195 platforms = platforms.linux; 196 }; 197})