nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at python-updates 248 lines 7.3 kB view raw
1{ 2 lib, 3 stdenv, 4 fetchFromGitHub, 5 cmake, 6 rocm-cmake, 7 rocm-smi, 8 pkg-config, 9 clr, 10 gfortran, 11 gtest, 12 boost, 13 llvm, 14 msgpack-cxx, 15 amd-blis, 16 libxml2, 17 python3, 18 python3Packages, 19 openmp, 20 hipblas-common, 21 lapack-reference, 22 ncurses, 23 ninja, 24 libffi, 25 jemalloc, 26 zlib, 27 zstd, 28 rocmUpdateScript, 29 buildTests ? false, 30 buildSamples ? false, 31 # hipblaslt supports only devices with MFMA or WMMA 32 gpuTargets ? (clr.localGpuTargets or clr.gpuTargets), 33}: 34 35let 36 # hipblaslt is extremely particular about what it will build with 37 # so intersect with a known supported list and use only those 38 supportedTargets = ( 39 lib.lists.intersectLists gpuTargets [ 40 "gfx908" 41 "gfx90a" 42 "gfx942" 43 "gfx950" 44 "gfx1100" 45 "gfx1101" 46 "gfx1150" 47 "gfx1151" 48 "gfx1200" 49 "gfx1201" 50 ] 51 ); 52 supportsTargetArches = supportedTargets != [ ]; 53 py = python3.withPackages (ps: [ 54 ps.pyyaml 55 ps.setuptools 56 ps.packaging 57 ps.nanobind 58 ps.msgpack 59 ]); 60 # workaround: build for one working target if no targets are supported 61 # a few CXX files are still build for the device 62 gpuTargets' = 63 if supportsTargetArches then (lib.concatStringsSep ";" supportedTargets) else "gfx1200"; 64 compiler = "amdclang++"; 65 # no-switch due to spammy warnings on some cases with fixme messages 66 # FIXME(LunNova@): cmake files need patched to include this properly or 67 # maybe we improve the toolchain to use config files + assemble a sysroot 68 # so system wide include assumptions work 69 cFlags = "-Wno-switch -fopenmp -I${lib.getDev zstd}/include -I${amd-blis}/include/blis/ -I${lib.getDev msgpack-cxx}/include"; 70in 71stdenv.mkDerivation (finalAttrs: { 72 pname = "hipblaslt${clr.gpuArchSuffix}"; 73 version = "7.1.1"; 74 75 src = fetchFromGitHub { 76 owner = "ROCm"; 77 repo = "rocm-libraries"; 78 rev = "a676499add42941ff6af1e8d3f0504416dac7429"; 79 hash = "sha256-zIYdHFbHyP2V6dkx6Ueb6NBqWu8tJji2hSWF9zWEJa4="; 80 sparseCheckout = [ "projects/hipblaslt" ]; 81 }; 82 sourceRoot = "${finalAttrs.src.name}/projects/hipblaslt"; 83 env.CXX = compiler; 84 env.CFLAGS = cFlags; 85 env.CXXFLAGS = cFlags; 86 env.ROCM_PATH = "${clr}"; 87 env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++"; 88 env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++"; 89 env.LD_PRELOAD = "${jemalloc}/lib/libjemalloc.so"; 90 env.MALLOC_CONF = "background_thread:true,metadata_thp:auto,dirty_decay_ms:10000,muzzy_decay_ms:10000"; 91 requiredSystemFeatures = [ "big-parallel" ]; 92 93 __structuredAttrs = true; 94 strictDeps = true; 95 96 outputs = [ 97 "out" 98 # benchmarks are non-optional 99 "benchmark" 100 ] 101 ++ lib.optionals buildTests [ 102 "test" 103 ] 104 ++ lib.optionals buildSamples [ 105 "sample" 106 ]; 107 108 patches = [ 109 # Upstream issue requesting properly specifying 110 # parallel-jobs for these invocations 111 # https://github.com/ROCm/rocm-libraries/issues/1242 112 ./parallel-buildSourceCodeObjectFile.diff 113 # Support loading zstd compressed .dat files, required to keep output under 114 # hydra size limit 115 ./messagepack-compression-support.patch 116 # [hipblaslt] Refactor Parallel.py to drop joblib, massively reduce peak disk space usage 117 # https://github.com/ROCm/rocm-libraries/pull/2073 118 ./TensileCreateLibrary-refactor.patch 119 ./Tensile-interning.patch 120 ]; 121 122 postPatch = '' 123 # git isn't needed and we have no .git 124 substituteInPlace cmake/dependencies.cmake \ 125 --replace-fail "find_package(Git REQUIRED)" "" 126 substituteInPlace CMakeLists.txt \ 127 --replace-fail " LANGUAGES CXX" " LANGUAGES CXX C ASM" 128 ''; 129 130 doCheck = false; 131 doInstallCheck = true; 132 133 nativeBuildInputs = [ 134 cmake 135 rocm-cmake 136 py 137 clr 138 gfortran 139 pkg-config 140 ninja 141 rocm-smi 142 zstd 143 ]; 144 145 buildInputs = [ 146 llvm.llvm 147 clr 148 rocm-cmake 149 hipblas-common 150 amd-blis 151 rocm-smi 152 openmp 153 libffi 154 ncurses 155 lapack-reference 156 157 # Tensile deps - not optional, building without tensile isn't actually supported 158 msgpack-cxx 159 libxml2 160 python3Packages.msgpack 161 zlib 162 zstd 163 ] 164 ++ lib.optionals buildTests [ 165 gtest 166 ]; 167 168 cmakeFlags = [ 169 (lib.cmakeFeature "Boost_INCLUDE_DIR" "${lib.getDev boost}/include") # msgpack FindBoost fails to find boost 170 (lib.cmakeFeature "GPU_TARGETS" gpuTargets') 171 (lib.cmakeBool "BUILD_TESTING" buildTests) 172 (lib.cmakeBool "HIPBLASLT_ENABLE_BLIS" true) 173 (lib.cmakeBool "HIPBLASLT_BUILD_TESTING" buildTests) 174 (lib.cmakeBool "HIPBLASLT_ENABLE_SAMPLES" buildSamples) 175 (lib.cmakeBool "HIPBLASLT_ENABLE_DEVICE" supportsTargetArches) 176 # FIXME: Enable for ROCm 7.x 177 (lib.cmakeBool "HIPBLASLT_ENABLE_ROCROLLER" false) 178 "-DCMAKE_C_COMPILER=amdclang" 179 "-DCMAKE_HIP_COMPILER=${compiler}" 180 "-DCMAKE_CXX_COMPILER=${compiler}" 181 "-DROCM_FOUND=ON" # hipblaslt tries to download rocm-cmake if this isn't set 182 "-DBLIS_ROOT=${amd-blis}" 183 "-DBLIS_LIB=${amd-blis}/lib/libblis-mt.so" 184 "-DBLIS_INCLUDE_DIR=${amd-blis}/include/blis/" 185 "-DBLA_PREFER_PKGCONFIG=ON" 186 "-DFETCHCONTENT_SOURCE_DIR_NANOBIND=${python3Packages.nanobind.src}" 187 # Manually define CMAKE_INSTALL_<DIR> 188 # See: https://github.com/NixOS/nixpkgs/pull/197838 189 "-DCMAKE_INSTALL_BINDIR=bin" 190 "-DCMAKE_INSTALL_LIBDIR=lib" 191 "-DCMAKE_INSTALL_INCLUDEDIR=include" 192 "-DHIPBLASLT_ENABLE_MARKER=Off" 193 ]; 194 195 postInstall = 196 # Compress msgpack .dat files to stay under hydra output size limit 197 # Relies on messagepack-compression-support.patch 198 '' 199 for file in $out/lib/hipblaslt/library/*.dat; do 200 zstd -19 --long -f "$file" -o "$file.tmp" && mv "$file.tmp" "$file" 201 done 202 '' 203 # Move binaries to appropriate outputs and delete leftover /bin 204 + '' 205 mkdir -p $benchmark/bin 206 mv $out/bin/hipblaslt-{api-overhead,sequence,bench*} $out/bin/*.yaml $out/bin/*.py $benchmark/bin 207 ${lib.optionalString buildTests '' 208 mkdir -p $test/bin 209 mv $out/bin/hipblas-test $test/bin 210 ''} 211 ${lib.optionalString buildSamples '' 212 mkdir -p $sample/bin 213 mv $out/bin/example-* $sample/bin 214 ''} 215 rmdir $out/bin 216 ''; 217 218 installCheckPhase = 219 # Verify compression worked and .dat files aren't huge 220 '' 221 runHook preInstallCheck 222 find "$out" -type f -name "*.dat" -size "+2M" -exec sh -c ' 223 echo "ERROR: oversized .dat file, check for issues with install compression: {}" >&2 224 exit 1 225 ' {} \; 226 echo "Verified .dat files in $out are not huge" 227 runHook postInstallCheck 228 ''; 229 230 # If this is false there are no kernels in the output lib 231 # supporting the target device 232 # so if it's an optional dep it's best to not depend on it 233 # Some packages like torch need hipblaslt to compile 234 # and are fine ignoring it at runtime if it's not supported 235 # so we have to support building an empty hipblaslt 236 passthru.supportsTargetArches = supportsTargetArches; 237 passthru.updateScript = rocmUpdateScript { 238 name = finalAttrs.pname; 239 inherit (finalAttrs.src) owner repo; 240 }; 241 meta = { 242 description = "Library that provides general matrix-matrix operations with a flexible API"; 243 homepage = "https://github.com/ROCm/hipBLASlt"; 244 license = with lib.licenses; [ mit ]; 245 teams = [ lib.teams.rocm ]; 246 platforms = lib.platforms.linux; 247 }; 248})