{ lib, stdenv, callPackage, fetchFromGitHub, fetchpatch, rocmUpdateScript, runCommand, pkg-config, cmake, rocm-cmake, rocblas, rocmlir, rocrand, rocm-runtime, hipblas-common, hipblas, hipblaslt, clr, composable_kernel, frugally-deep, rocm-docs-core, half, boost, sqlite, bzip2, lbzip2, nlohmann_json, texliveSmall, doxygen, sphinx, zlib, gtest, rocm-comgr, roctracer, python3Packages, gpuTargets ? clr.localGpuTargets or clr.gpuTargets, buildDocs ? false, # Needs internet because of rocm-docs-core buildTests ? false, withComposableKernel ? true, }: let # FIXME: cmake files need patched to include this properly cFlags = "-Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include"; version = "7.1.1"; # Targets outside this list will get # error: use of undeclared identifier 'CK_BUFFER_RESOURCE_3RD_DWORD' supportedTargets = lib.intersectLists [ "gfx900" "gfx906" "gfx908" "gfx90a" "gfx942" "gfx950" "gfx1030" "gfx1031" "gfx1100" "gfx1101" "gfx1102" "gfx1150" "gfx1151" "gfx1200" "gfx1201" ] gpuTargets; src = fetchFromGitHub { owner = "ROCm"; repo = "MIOpen"; rev = "rocm-${version}"; hash = "sha256-g0AEpuUiwKCu/doiRh9mW34W04m4ynHoarSyl6tR/aE="; fetchLFS = true; fetchSubmodules = true; # WORKAROUND: .lfsconfig is incorrectly set to exclude everything upstream leaveDotGit = true; # FIXME: if someone can reduce the level of awful here that would be really nice postFetch = '' export HOME=$(mktemp -d) cd $out git remote add origin $url git fetch origin +refs/tags/rocm-${version}:refs/tags/rocm-${version} git clean -fdx git switch -c rocm-${version} refs/tags/rocm-${version} git config lfs.fetchexclude "none" rm .lfsconfig git lfs install git lfs track "*.kdb.bz2" git lfs fetch --include="src/kernels/**" git lfs pull --include="src/kernels/**" git lfs checkout rm -rf .git ''; }; latex = lib.optionalAttrs buildDocs ( texliveSmall.withPackages ( ps: with ps; [ latexmk tex-gyre fncychap wrapfig capt-of framed needspace tabulary varwidth titlesec ] ) ); gfx900 = runCommand "miopen-gfx900.kdb" { preferLocalBuild = true; } '' ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx900.kdb.bz2 > $out ''; gfx906 = runCommand "miopen-gfx906.kdb" { preferLocalBuild = true; } '' ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx906.kdb.bz2 > $out ''; gfx908 = runCommand "miopen-gfx908.kdb" { preferLocalBuild = true; } '' ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx908.kdb.bz2 > $out ''; gfx90a = runCommand "miopen-gfx90a.kdb" { preferLocalBuild = true; } '' ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx90a.kdb.bz2 > $out ''; gfx1030 = runCommand "miopen-gfx1030.kdb" { preferLocalBuild = true; } '' ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1030.kdb.bz2 > $out ''; in stdenv.mkDerivation (finalAttrs: { inherit version src; pname = "miopen"; env.CFLAGS = cFlags; env.CXXFLAGS = cFlags; # Find zstd and add to target. Mainly for torch. patches = [ ./skip-preexisting-dbs.patch (fetchpatch { url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch"; hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M="; }) (fetchpatch { url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch"; hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs="; }) # FIXME: We need to rebase or drop this arch compat patch # https://github.com/ROCm/MIOpen/issues/3540 suggests that # arch compat patching doesn't work correctly for gfx1031 # (fetchpatch { # name = "Extend-MIOpen-ISA-compatibility.patch"; # url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch"; # hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU="; # }) ]; outputs = [ "out" ] ++ lib.optionals buildDocs [ "doc" ] ++ lib.optionals buildTests [ "test" ]; enableParallelBuilding = true; env.ROCM_PATH = clr; env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ]; nativeBuildInputs = [ pkg-config cmake rocm-cmake clr ]; buildInputs = [ hipblas hipblas-common rocblas rocmlir half boost sqlite bzip2 nlohmann_json frugally-deep roctracer rocrand hipblaslt ] ++ lib.optionals withComposableKernel [ composable_kernel ] ++ lib.optionals buildDocs [ latex doxygen sphinx rocm-docs-core python3Packages.sphinx-rtd-theme python3Packages.breathe python3Packages.myst-parser ] ++ lib.optionals buildTests [ gtest zlib ]; cmakeFlags = [ "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" supportedTargets}" "-DGPU_TARGETS=${lib.concatStringsSep ";" supportedTargets}" "-DGPU_ARCHS=${lib.concatStringsSep ";" supportedTargets}" "-DMIOPEN_USE_SQLITE_PERFDB=ON" "-DCMAKE_VERBOSE_MAKEFILE=ON" "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" "-DCMAKE_BUILD_TYPE=Release" # needs to stream to stdout so bzcat rather than bunzip2 "-DUNZIPPER=${bzip2}/bin/bzcat" "-DCMAKE_C_COMPILER=amdclang" "-DCMAKE_CXX_COMPILER=amdclang++" "-DROCM_PATH=${clr}" "-DHIP_ROOT_DIR=${clr}" (lib.cmakeBool "MIOPEN_USE_ROCBLAS" true) (lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true) (lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" withComposableKernel) (lib.cmakeBool "MIOPEN_USE_HIPRTC" true) (lib.cmakeBool "MIOPEN_USE_COMGR" true) "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}" # Manually define CMAKE_INSTALL_