lol

Merge pull request #267209 from Madouura/pr/triton-llvm

openai-triton-llvm: fix aarch64 and cross-compilation

authored by

Weijia Wang and committed by
GitHub
4774c536 9415631d

+95 -44
+89 -38
pkgs/by-name/op/openai-triton-llvm/package.nix
··· 1 - { config 2 - , lib 1 + { lib 3 2 , stdenv 4 3 , fetchFromGitHub 4 + , pkgsBuildBuild 5 5 , pkg-config 6 6 , cmake 7 7 , ninja 8 8 , git 9 - , doxygen 10 - , sphinx 11 9 , libxml2 12 10 , libxcrypt 13 11 , libedit 14 12 , libffi 13 + , libpfm 15 14 , mpfr 16 15 , zlib 17 16 , ncurses 17 + , doxygen 18 + , sphinx 19 + , which 20 + , sysctl 18 21 , python3Packages 19 22 , buildDocs ? true 20 23 , buildMan ? true 21 24 , buildTests ? true 25 + , llvmTargetsToBuild ? [ "NATIVE" ] # "NATIVE" resolves into x86 or aarch64 depending on stdenv 26 + , llvmProjectsToBuild ? [ "llvm" "mlir" ] 22 27 }: 23 28 24 - stdenv.mkDerivation (finalAttrs: { 29 + let 30 + llvmNativeTarget = 31 + if stdenv.hostPlatform.isx86_64 then "X86" 32 + else if stdenv.hostPlatform.isAarch64 then "AArch64" 33 + else throw "Currently unsupported LLVM platform '${stdenv.hostPlatform.config}'"; 34 + 35 + inferNativeTarget = t: if t == "NATIVE" then llvmNativeTarget else t; 36 + llvmTargetsToBuild' = [ "AMDGPU" "NVPTX" ] ++ builtins.map inferNativeTarget llvmTargetsToBuild; 37 + 38 + # This LLVM version can't seem to find pygments/pyyaml, 39 + # but a later update will likely fix this (openai-triton-2.1.0) 40 + python = 41 + if buildTests 42 + then python3Packages.python.withPackages (p: with p; [ psutil pygments pyyaml ]) 43 + else python3Packages.python; 44 + 45 + isNative = stdenv.hostPlatform == stdenv.buildPlatform; 46 + in stdenv.mkDerivation (finalAttrs: { 25 47 pname = "openai-triton-llvm"; 26 48 version = "14.0.6-f28c006a5895"; 27 49 ··· 33 55 "man" 34 56 ]; 35 57 36 - # See https://github.com/openai/triton/blob/main/python/setup.py and https://github.com/ptillet/triton-llvm-releases/releases 58 + # See https://github.com/openai/triton/blob/main/python/setup.py 59 + # and https://github.com/ptillet/triton-llvm-releases/releases 37 60 src = fetchFromGitHub { 38 61 owner = "llvm"; 39 62 repo = "llvm-project"; ··· 46 69 cmake 47 70 ninja 48 71 git 49 - python3Packages.python 72 + python 50 73 ] ++ lib.optionals (buildDocs || buildMan) [ 51 74 doxygen 52 75 sphinx ··· 58 81 libxcrypt 59 82 libedit 60 83 libffi 84 + libpfm 61 85 mpfr 62 86 ]; 63 87 ··· 69 93 sourceRoot = "${finalAttrs.src.name}/llvm"; 70 94 71 95 cmakeFlags = [ 72 - "-DLLVM_TARGETS_TO_BUILD=${ 73 - let 74 - # Targets can be found in 75 - # https://github.com/llvm/llvm-project/tree/f28c006a5895fc0e329fe15fead81e37457cb1d1/clang/lib/Basic/Targets 76 - # NOTE: Unsure of how "host" would function, especially given that we might be cross-compiling. 77 - llvmTargets = [ "AMDGPU" "NVPTX" ] 78 - ++ lib.optionals stdenv.isAarch64 [ "AArch64" ] 79 - ++ lib.optionals stdenv.isx86_64 [ "X86" ]; 80 - in 81 - lib.concatStringsSep ";" llvmTargets 82 - }" 83 - "-DLLVM_ENABLE_PROJECTS=llvm;mlir" 84 - "-DLLVM_INSTALL_UTILS=ON" 85 - ] ++ lib.optionals (buildDocs || buildMan) [ 86 - "-DLLVM_INCLUDE_DOCS=ON" 87 - "-DMLIR_INCLUDE_DOCS=ON" 88 - "-DLLVM_BUILD_DOCS=ON" 89 - # "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core 90 - "-DLLVM_ENABLE_SPHINX=ON" 91 - "-DSPHINX_OUTPUT_HTML=ON" 92 - "-DSPHINX_OUTPUT_MAN=ON" 93 - "-DSPHINX_WARNINGS_AS_ERRORS=OFF" 94 - ] ++ lib.optionals buildTests [ 95 - "-DLLVM_INCLUDE_TESTS=ON" 96 - "-DMLIR_INCLUDE_TESTS=ON" 97 - "-DLLVM_BUILD_TESTS=ON" 98 - ]; 96 + (lib.cmakeFeature "LLVM_TARGETS_TO_BUILD" (lib.concatStringsSep ";" llvmTargetsToBuild')) 97 + (lib.cmakeFeature "LLVM_ENABLE_PROJECTS" (lib.concatStringsSep ";" llvmProjectsToBuild)) 98 + (lib.cmakeFeature "LLVM_HOST_TRIPLE" stdenv.hostPlatform.config) 99 + (lib.cmakeFeature "LLVM_DEFAULT_TARGET_TRIPLE" stdenv.hostPlatform.config) 100 + (lib.cmakeBool "LLVM_INSTALL_UTILS" true) 101 + (lib.cmakeBool "LLVM_INCLUDE_DOCS" (buildDocs || buildMan)) 102 + (lib.cmakeBool "MLIR_INCLUDE_DOCS" (buildDocs || buildMan)) 103 + (lib.cmakeBool "LLVM_BUILD_DOCS" (buildDocs || buildMan)) 104 + # Way too slow, only uses one core 105 + # (lib.cmakeBool "LLVM_ENABLE_DOXYGEN" (buildDocs || buildMan)) 106 + (lib.cmakeBool "LLVM_ENABLE_SPHINX" (buildDocs || buildMan)) 107 + (lib.cmakeBool "SPHINX_OUTPUT_HTML" buildDocs) 108 + (lib.cmakeBool "SPHINX_OUTPUT_MAN" buildMan) 109 + (lib.cmakeBool "SPHINX_WARNINGS_AS_ERRORS" false) 110 + (lib.cmakeBool "LLVM_INCLUDE_TESTS" buildTests) 111 + (lib.cmakeBool "MLIR_INCLUDE_TESTS" buildTests) 112 + (lib.cmakeBool "LLVM_BUILD_TESTS" buildTests) 113 + # Cross compilation code taken/modified from LLVM 16 derivation 114 + ] ++ lib.optionals (!isNative) (let 115 + nativeToolchainFlags = let 116 + nativeCC = pkgsBuildBuild.targetPackages.stdenv.cc; 117 + nativeBintools = nativeCC.bintools.bintools; 118 + in [ 119 + (lib.cmakeFeature "CMAKE_C_COMPILER" "${nativeCC}/bin/${nativeCC.targetPrefix}cc") 120 + (lib.cmakeFeature "CMAKE_CXX_COMPILER" "${nativeCC}/bin/${nativeCC.targetPrefix}c++") 121 + (lib.cmakeFeature "CMAKE_AR" "${nativeBintools}/bin/${nativeBintools.targetPrefix}ar") 122 + (lib.cmakeFeature "CMAKE_STRIP" "${nativeBintools}/bin/${nativeBintools.targetPrefix}strip") 123 + (lib.cmakeFeature "CMAKE_RANLIB" "${nativeBintools}/bin/${nativeBintools.targetPrefix}ranlib") 124 + ]; 125 + 126 + # We need to repass the custom GNUInstallDirs values, otherwise CMake 127 + # will choose them for us, leading to wrong results in llvm-config-native 128 + nativeInstallFlags = [ 129 + (lib.cmakeFeature "CMAKE_INSTALL_PREFIX" (placeholder "out")) 130 + (lib.cmakeFeature "CMAKE_INSTALL_BINDIR" "${placeholder "out"}/bin") 131 + (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "${placeholder "out"}/include") 132 + (lib.cmakeFeature "CMAKE_INSTALL_LIBDIR" "${placeholder "out"}/lib") 133 + (lib.cmakeFeature "CMAKE_INSTALL_LIBEXECDIR" "${placeholder "out"}/libexec") 134 + ]; 135 + in [ 136 + (lib.cmakeBool "CMAKE_CROSSCOMPILING" true) 137 + (lib.cmakeFeature "CROSS_TOOLCHAIN_FLAGS_NATIVE" (lib.concatStringsSep ";" 138 + (lib.concatLists [ nativeToolchainFlags nativeInstallFlags ]))) 139 + ]); 99 140 100 141 postPatch = '' 101 142 # `CMake Error: cannot write to file "/build/source/llvm/build/lib/cmake/mlir/MLIRTargets.cmake": Permission denied` 102 143 chmod +w -R ../mlir 144 + patchShebangs ../mlir/test/mlir-reduce 103 145 104 146 # FileSystem permissions tests fail with various special bits 105 147 rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test ··· 107 149 108 150 substituteInPlace unittests/Support/CMakeLists.txt \ 109 151 --replace "Path.cpp" "" 152 + '' + lib.optionalString stdenv.isAarch64 '' 153 + # Not sure why this fails 154 + rm test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s 155 + ''; 156 + 157 + postInstall = lib.optionalString (!isNative) '' 158 + cp -a NATIVE/bin/llvm-config $out/bin/llvm-config-native 110 159 ''; 111 160 112 161 doCheck = buildTests; 162 + 163 + nativeCheckInputs = [ which ] 164 + ++ lib.optionals stdenv.isDarwin [ sysctl ]; 165 + 166 + checkTarget = "check-all"; 113 167 requiredSystemFeatures = [ "big-parallel" ]; 114 168 115 169 meta = with lib; { ··· 117 171 homepage = "https://github.com/llvm/llvm-project"; 118 172 license = with licenses; [ ncsa ]; 119 173 maintainers = with maintainers; [ SomeoneSerge Madouura ]; 120 - platforms = platforms.linux; 121 - # Consider the derivation broken if we're not building for CUDA or ROCm, or if we're building for aarch64 122 - # and ROCm is enabled. See https://github.com/RadeonOpenCompute/ROCm/issues/1831#issuecomment-1278205344. 123 - broken = stdenv.isAarch64 && !config.cudaSupport; 174 + platforms = with platforms; aarch64 ++ x86; 124 175 }; 125 176 })
+6 -6
pkgs/development/python-modules/torch/default.nix
··· 16 16 filelock, 17 17 jinja2, 18 18 networkx, 19 - openai-triton, 20 19 sympy, 21 20 numpy, pyyaml, cffi, click, typing-extensions, 21 + # ROCm build and `torch.compile` requires `openai-triton` 22 + tritonSupport ? (!stdenv.isDarwin), openai-triton, 22 23 23 24 # Unit tests 24 25 hypothesis, psutil, ··· 303 304 "-Wno-pass-failed" 304 305 ] ++ [ 305 306 "-Wno-unused-command-line-argument" 306 - "-Wno-maybe-uninitialized" 307 307 "-Wno-uninitialized" 308 308 "-Wno-array-bounds" 309 - "-Wno-stringop-overflow" 310 309 "-Wno-free-nonheap-object" 311 310 "-Wno-unused-result" 311 + ] ++ lib.optionals stdenv.cc.isGNU [ 312 + "-Wno-maybe-uninitialized" 313 + "-Wno-stringop-overflow" 312 314 ])); 313 315 314 316 nativeBuildInputs = [ ··· 377 379 # the following are required for tensorboard support 378 380 pillow six future tensorboard protobuf 379 381 380 - # ROCm build and `torch.compile` requires openai-triton 381 - openai-triton 382 - 383 382 # torch/csrc requires `pybind11` at runtime 384 383 pybind11 385 384 ] 385 + ++ lib.optionals tritonSupport [ openai-triton ] 386 386 ++ lib.optionals MPISupport [ mpi ] 387 387 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; 388 388