commit 78ecdee2b82b80d66777102c2fbdf33ff86569a0 · pyrox.dev/nixpkgs

+27 -10

pkgs/development/python-modules/torch/source/default.nix

··· 42 42 # Build inputs 43 43 apple-sdk_13, 44 44 numactl, 45 + llvmPackages, 45 46 46 47 # dependencies 47 48 astunparse, ··· 120 121 121 122 setBool = v: if v then "1" else "0"; 122 123 123 - # https://github.com/pytorch/pytorch/blob/v2.6.0/torch/utils/cpp_extension.py#L2046-L2048 124 + # https://github.com/pytorch/pytorch/blob/v2.7.0/torch/utils/cpp_extension.py#L2343-L2345 124 125 supportedTorchCudaCapabilities = 125 126 let 126 127 real = [ ··· 142 143 "9.0" 143 144 "9.0a" 144 145 "10.0" 146 + "10.0" 147 + "10.0a" 148 + "10.1" 149 + "10.1a" 150 + "12.0" 151 + "12.0a" 145 152 ]; 146 153 ptx = lists.map (x: "${x}+PTX") real; 147 154 in ··· 173 180 else if cudaSupport then 174 181 gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities 175 182 else if rocmSupport then 176 - rocmPackages.clr.gpuTargets 183 + # Remove RDNA1 gfx101x archs from default ROCm support list to avoid 184 + # use of undeclared identifier 'CK_BUFFER_RESOURCE_3RD_DWORD' 185 + # TODO: Retest after ROCm 6.4 or torch 2.8 186 + lib.lists.subtractLists [ 187 + "gfx1010" 188 + "gfx1012" 189 + ] (rocmPackages.clr.localGpuTargets or rocmPackages.clr.gpuTargets) 177 190 else 178 191 throw "No GPU targets specified" 179 192 ); ··· 187 200 rccl 188 201 miopen 189 202 aotriton 203 + composable_kernel 190 204 rocrand 191 205 rocblas 192 206 rocsparse ··· 254 268 buildPythonPackage rec { 255 269 pname = "torch"; 256 270 # Don't forget to update torch-bin to the same version. 257 - version = "2.6.0"; 271 + version = "2.7.0"; 258 272 pyproject = true; 259 273 260 274 stdenv = stdenv'; ··· 279 293 patches = 280 294 [ 281 295 ./clang19-template-warning.patch 282 - # fix invalid static cast in XNNPACK 283 - # https://github.com/google/XNNPACK/issues/7489 284 - ./xnnpack-bfloat16.patch 285 296 ] 286 297 ++ lib.optionals cudaSupport [ ./fix-cmake-cuda-toolkit.patch ] 287 298 ++ lib.optionals stdenv.hostPlatform.isLinux [ ··· 298 309 299 310 postPatch = 300 311 '' 312 + # Prevent NCCL from being cloned during the configure phase 313 + # TODO: remove when updating to the next release as it will not be needed anymore 314 + substituteInPlace tools/build_pytorch_libs.py \ 315 + --replace-fail " checkout_nccl()" " " 316 + 301 317 substituteInPlace cmake/public/cuda.cmake \ 302 318 --replace-fail \ 303 319 'message(FATAL_ERROR "Found two conflicting CUDA' \ ··· 323 339 324 340 # Replace hard-coded rocm paths 325 341 substituteInPlace caffe2/CMakeLists.txt \ 326 - --replace-fail "/opt/rocm" "${rocmtoolkit_joined}" \ 327 342 --replace-fail "hcc/include" "hip/include" \ 328 343 --replace-fail "rocblas/include" "include/rocblas" \ 329 344 --replace-fail "hipsparse/include" "include/hipsparse" 330 345 331 346 # Doesn't pick up the environment variable? 332 347 substituteInPlace third_party/kineto/libkineto/CMakeLists.txt \ 333 - --replace-fail "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" \ 334 - --replace-fail "/opt/rocm" "${rocmtoolkit_joined}" 348 + --replace-fail "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" 335 349 336 350 # Strangely, this is never set in cmake 337 351 substituteInPlace cmake/public/LoadHIP.cmake \ ··· 508 522 blas 509 523 blas.provider 510 524 ] 525 + ++ lib.optionals stdenv.cc.isClang [ llvmPackages.openmp ] 511 526 ++ lib.optionals cudaSupport ( 512 527 with cudaPackages; 513 528 [ ··· 521 536 cusparselt 522 537 libcublas 523 538 libcufft 539 + libcufile 524 540 libcurand 525 541 libcusolver 526 542 libcusparse ··· 657 673 --replace-fail "\''${_IMPORT_PREFIX}/lib64" "$lib/lib" 658 674 659 675 substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \ 660 - --replace-fail "/build/source/torch/include" "$dev/include" 676 + --replace-fail "/build/${src.name}/torch/include" "$dev/include" 661 677 ''; 662 678 663 679 postFixup = ··· 723 739 homepage = "https://pytorch.org/"; 724 740 license = lib.licenses.bsd3; 725 741 maintainers = with lib.maintainers; [ 742 + GaetanLepage 726 743 teh 727 744 thoughtpolice 728 745 tscholak

+62 -33

pkgs/development/python-modules/torch/source/src.nix

··· 4 4 fetchFromGitHub, 5 5 runCommand, 6 6 }: 7 - assert version == "2.6.0"; 7 + assert version == "2.7.0"; 8 8 (rec { 9 9 src_asmjit = fetchFromGitHub { 10 10 owner = "asmjit"; ··· 45 45 src_composable_kernel = fetchFromGitHub { 46 46 owner = "ROCm"; 47 47 repo = "composable_kernel"; 48 - rev = "50ee4267e27b875d149e642f4cebd47be1dc3b57"; 49 - hash = "sha256-COkyf+FZzX6OdOPCHkP2bXsVvSXK9UR9s7RHWRtIXUE="; 48 + rev = "8086bbe3a78d931eb96fe12fdc014082e18d18d3"; 49 + hash = "sha256-fyL1SzRs5CXW5CWy6kCN1y1xX6cG+ur7iQlbKX2zbCM="; 50 + }; 51 + src_composable_kernel_flash-attention = fetchFromGitHub { 52 + owner = "ROCm"; 53 + repo = "composable_kernel"; 54 + rev = "888317e698e9803c62bd38568abc9e05d7709f33"; 55 + hash = "sha256-0FAiGf7AErBzGxhOFcLQ4ceshqp0Esqo4ee7NsjfJUo="; 50 56 }; 51 57 src_cpp-httplib = fetchFromGitHub { 52 58 owner = "yhirose"; ··· 75 81 src_cudnn-frontend = fetchFromGitHub { 76 82 owner = "NVIDIA"; 77 83 repo = "cudnn-frontend"; 78 - rev = "936021bfed8c91dc416af1588b2c4eca631a9e45"; 79 - hash = "sha256-hKqIWGxVco1qkKxDZjc+pUisIcYJwFjZobJZg1WgDvY="; 84 + rev = "91b7532f3386768bba4f444ee7672b497f34da8a"; 85 + hash = "sha256-Ks07ApADA3xQQ+N5BIfvDG+djCvxYL9btF8Aw7TuMvQ="; 80 86 }; 81 87 src_cutlass = fetchFromGitHub { 82 88 owner = "NVIDIA"; 83 89 repo = "cutlass"; 84 - rev = "bbe579a9e3beb6ea6626d9227ec32d0dae119a49"; 85 - hash = "sha256-81O80F3MMOn22N9UaXLU6/9DTVWenYvKhLTHoxw8EEU="; 90 + rev = "afa1772203677c5118fcd82537a9c8fefbcc7008"; 91 + hash = "sha256-oIzlbKRdOh6gp6nRZ8udLSqleBFoFtgM7liCBlHZLOk="; 86 92 }; 87 93 src_cutlass_fbgemm = fetchFromGitHub { 88 94 owner = "NVIDIA"; ··· 90 96 rev = "fc9ebc645b63f3a6bc80aaefde5c063fb72110d6"; 91 97 hash = "sha256-e2SwXNNwjl/1fV64b+mOJvwGDYeO1LFcqZGbNten37U="; 92 98 }; 99 + src_cutlass_flash-attention = fetchFromGitHub { 100 + owner = "NVIDIA"; 101 + repo = "cutlass"; 102 + rev = "c506e16788cb08416a4a57e11a9067beeee29420"; 103 + hash = "sha256-P/BgJ7MdNTzNgsqoTjxrOQwvKfhAS+rmBw3sC/SdO1I="; 104 + }; 93 105 src_DCGM = fetchFromGitHub { 94 106 owner = "NVIDIA"; 95 107 repo = "DCGM"; ··· 121 133 rev = "7e1e1fe3858c63c251c637ae41a20de425dde96f"; 122 134 hash = "sha256-PsgUHtCE3dNR2QdUnRjrXb0ZKZNGwFkA8RWYkZEklEY="; 123 135 }; 136 + src_flash-attention = fetchFromGitHub { 137 + owner = "Dao-AILab"; 138 + repo = "flash-attention"; 139 + rev = "979702c87a8713a8e0a5e9fee122b90d2ef13be5"; 140 + hash = "sha256-7jk8TD+EkkOPrVT5Nzoc+zoZ4jH/C1AtjLcP12hBrp0="; 141 + }; 124 142 src_flatbuffers = fetchFromGitHub { 125 143 owner = "google"; 126 144 repo = "flatbuffers"; ··· 130 148 src_fmt = fetchFromGitHub { 131 149 owner = "fmtlib"; 132 150 repo = "fmt"; 133 - rev = "0c9fce2ffefecfdce794e1859584e25877b7b592"; 134 - hash = "sha256-IKNt4xUoVi750zBti5iJJcCk3zivTt7nU12RIf8pM+0="; 151 + rev = "123913715afeb8a437e6388b4473fcc4753e1c9a"; 152 + hash = "sha256-sUbxlYi/Aupaox3JjWFqXIjcaQa0LFjclQAOleT+FRA="; 135 153 }; 136 154 src_fmt_dynolog = fetchFromGitHub { 137 155 owner = "fmtlib"; ··· 250 268 src_ideep = fetchFromGitHub { 251 269 owner = "intel"; 252 270 repo = "ideep"; 253 - rev = "c7ccd5bdbe5434ba156f4e856dcef0601637334b"; 254 - hash = "sha256-/u3wJQl49tBw/QNVDBXL+eNSRCuvltso7g1+siIhlXM="; 271 + rev = "719d8e6cd7f7a0e01b155657526d693acf97c2b3"; 272 + hash = "sha256-WX1lluqYeoB2IuwJX1vqZ6nr8W8KsWxUDIvKNA3CAxo="; 255 273 }; 256 274 src_ittapi = fetchFromGitHub { 257 275 owner = "intel"; ··· 280 298 src_kineto = fetchFromGitHub { 281 299 owner = "pytorch"; 282 300 repo = "kineto"; 283 - rev = "338140f58a28d599da3434ced4fd2d75dd1a213d"; 284 - hash = "sha256-ydHUYB2m7dbbqBtLordZUCraewU0Q4m/ohBXNKtqHnw="; 301 + rev = "a054a4be0db117c579a21747debf19c863631f26"; 302 + hash = "sha256-FyxMiDoDrjI8JZZNxK4S18yjULkKLUvqLQsCyxaTLSg="; 303 + }; 304 + src_kleidiai = fetchFromGitHub { 305 + owner = "ARM-software"; 306 + repo = "kleidiai"; 307 + rev = "ef685a13cfbe8d418aa2ed34350e21e4938358b6"; 308 + hash = "sha256-5RS2o+163/6Q+0y0FuWz6OBXw3FKxZINDkGOqlmd2N0="; 285 309 }; 286 310 src_libnop = fetchFromGitHub { 287 311 owner = "google"; ··· 304 328 src_mkl-dnn = fetchFromGitHub { 305 329 owner = "intel"; 306 330 repo = "mkl-dnn"; 307 - rev = "66f0cb9eb66affd2da3bf5f8d897376f04aae6af"; 308 - hash = "sha256-/ERkk6bgGEKoJEVdnBxMFEzB8pii71t3zQZNtyg+TdQ="; 309 - }; 310 - src_nccl = fetchFromGitHub { 311 - owner = "NVIDIA"; 312 - repo = "nccl"; 313 - rev = "ab2b89c4c339bd7f816fbc114a4b05d386b66290"; 314 - hash = "sha256-IF2tILwW8XnzSmfn7N1CO7jXL95gUp02guIW5n1eaig="; 331 + rev = "8d263e693366ef8db40acc569cc7d8edf644556d"; 332 + hash = "sha256-+4z5l0mJsw0SOW245GfZh41mdHGZ8u+xED7afm6pQjs="; 315 333 }; 316 334 src_NNPACK = fetchFromGitHub { 317 335 owner = "Maratyszcza"; ··· 386 404 hash = "sha256-lV+VZi2b4SQlRYrhKx9Dxc6HlDEFz3newvcBjTekupo="; 387 405 }; 388 406 src_pthreadpool = fetchFromGitHub { 389 - owner = "google"; 407 + owner = "Maratyszcza"; 390 408 repo = "pthreadpool"; 391 - rev = "b92447772365661680f486e39a91dfe6675adafc"; 392 - hash = "sha256-lRY+5sYv/KOuVngsrODRa4/yj1CDmZBW2HuKEbjsY+0="; 409 + rev = "4fe0e1e183925bf8cfa6aae24237e724a96479b8"; 410 + hash = "sha256-R4YmNzWEELSkAws/ejmNVxqXDTJwcqjLU/o/HvgRn2E="; 393 411 }; 394 412 src_pybind11 = fetchFromGitHub { 395 413 owner = "pybind"; ··· 412 430 src_pytorch = fetchFromGitHub { 413 431 owner = "pytorch"; 414 432 repo = "pytorch"; 415 - rev = "v2.6.0"; 416 - hash = "sha256-X58OPtnPplEFtzOrQzyYRwdTmaoWEOGWWF5v6y+vaWo="; 433 + rev = "v2.7.0"; 434 + hash = "sha256-ReXyzy+OuYxEQwU+t2WL3+jqd7ItdW6w8MiS0f9t+aY="; 417 435 }; 418 436 src_sleef = fetchFromGitHub { 419 437 owner = "shibatch"; 420 438 repo = "sleef"; 421 - rev = "60e76d2bce17d278b439d9da17177c8f957a9e9b"; 422 - hash = "sha256-JfARLkdt4je8ll+oqPGJqzUCQbsXoJ0bbX3jf0aHd0o="; 439 + rev = "56e1f79cb140fb9326d612d0be06b5250565cade"; 440 + hash = "sha256-5hha7c/Lu6fkbXGlDieoJP6n2bnjY5iPp+hm0f//0ek="; 423 441 }; 424 442 src_tensorpipe = fetchFromGitHub { 425 443 owner = "pytorch"; ··· 442 460 src_XNNPACK = fetchFromGitHub { 443 461 owner = "google"; 444 462 repo = "XNNPACK"; 445 - rev = "4ea82e595b36106653175dcb04b2aa532660d0d8"; 446 - hash = "sha256-5IubxhCW5E6xzrOLfJzWPrPeftaxXApTx09p6B3NPgo="; 463 + rev = "51a0103656eff6fc9bfd39a4597923c4b542c883"; 464 + hash = "sha256-nhowllqv/hBs7xHdTwbWtiKJ1mvAYsVIyIZ35ZGsmkg="; 447 465 }; 448 466 src_asmjit_recursive = src_asmjit; 449 467 src_benchmark_recursive = src_benchmark; ··· 452 470 src_civetweb_recursive = src_civetweb; 453 471 src_clang-cindex-python3_recursive = src_clang-cindex-python3; 454 472 src_composable_kernel_recursive = src_composable_kernel; 473 + src_composable_kernel_flash-attention_recursive = src_composable_kernel_flash-attention; 455 474 src_cpp-httplib_recursive = src_cpp-httplib; 456 475 src_cpr_recursive = src_cpr; 457 476 src_cpuinfo_recursive = src_cpuinfo; ··· 459 478 src_cudnn-frontend_recursive = src_cudnn-frontend; 460 479 src_cutlass_recursive = src_cutlass; 461 480 src_cutlass_fbgemm_recursive = src_cutlass_fbgemm; 481 + src_cutlass_flash-attention_recursive = src_cutlass_flash-attention; 462 482 src_DCGM_recursive = src_DCGM; 463 483 src_dynolog_recursive = runCommand "dynolog" { } '' 464 484 cp -r ${src_dynolog} $out ··· 494 514 cp -r ${src_hipify_torch_recursive}/* $out/third_party/hipify_torch 495 515 ''; 496 516 src_fbjni_recursive = src_fbjni; 517 + src_flash-attention_recursive = runCommand "flash-attention" { } '' 518 + cp -r ${src_flash-attention} $out 519 + chmod u+w $out/csrc/composable_kernel 520 + cp -r ${src_composable_kernel_flash-attention_recursive}/* $out/csrc/composable_kernel 521 + chmod u+w $out/csrc/cutlass 522 + cp -r ${src_cutlass_flash-attention_recursive}/* $out/csrc/cutlass 523 + ''; 497 524 src_flatbuffers_recursive = src_flatbuffers; 498 525 src_fmt_recursive = src_fmt; 499 526 src_fmt_dynolog_recursive = src_fmt_dynolog; ··· 537 564 chmod u+w $out/libkineto/third_party/googletest 538 565 cp -r ${src_googletest_kineto_recursive}/* $out/libkineto/third_party/googletest 539 566 ''; 567 + src_kleidiai_recursive = src_kleidiai; 540 568 src_libnop_recursive = src_libnop; 541 569 src_libuv_recursive = src_libuv; 542 570 src_mimalloc_recursive = src_mimalloc; 543 571 src_mkl-dnn_recursive = src_mkl-dnn; 544 - src_nccl_recursive = src_nccl; 545 572 src_NNPACK_recursive = src_NNPACK; 546 573 src_NVTX_recursive = src_NVTX; 547 574 src_onnx_recursive = runCommand "onnx" { } '' ··· 616 643 cp -r ${src_eigen_recursive}/* $out/third_party/eigen 617 644 chmod u+w $out/third_party/fbgemm 618 645 cp -r ${src_fbgemm_recursive}/* $out/third_party/fbgemm 646 + chmod u+w $out/third_party/flash-attention 647 + cp -r ${src_flash-attention_recursive}/* $out/third_party/flash-attention 619 648 chmod u+w $out/third_party/flatbuffers 620 649 cp -r ${src_flatbuffers_recursive}/* $out/third_party/flatbuffers 621 650 chmod u+w $out/third_party/fmt ··· 636 665 cp -r ${src_ittapi_recursive}/* $out/third_party/ittapi 637 666 chmod u+w $out/third_party/kineto 638 667 cp -r ${src_kineto_recursive}/* $out/third_party/kineto 668 + chmod u+w $out/third_party/kleidiai 669 + cp -r ${src_kleidiai_recursive}/* $out/third_party/kleidiai 639 670 chmod u+w $out/third_party/mimalloc 640 671 cp -r ${src_mimalloc_recursive}/* $out/third_party/mimalloc 641 - chmod u+w $out/third_party/nccl/nccl 642 - cp -r ${src_nccl_recursive}/* $out/third_party/nccl/nccl 643 672 chmod u+w $out/third_party/nlohmann 644 673 cp -r ${src_json_recursive}/* $out/third_party/nlohmann 645 674 chmod u+w $out/third_party/NNPACK

-21

pkgs/development/python-modules/torch/source/xnnpack-bfloat16.patch

··· 1 - diff --git a/third_party/XNNPACK/src/reference/unary-elementwise.cc b/third_party/XNNPACK/src/reference/unary-elementwise.cc 2 - index bd95ded6c..da892d8be 100644 3 - --- a/third_party/XNNPACK/src/reference/unary-elementwise.cc 4 - +++ b/third_party/XNNPACK/src/reference/unary-elementwise.cc 5 - @@ -127,6 +127,16 @@ struct ConvertOp { 6 - } 7 - }; 8 - 9 - +#ifdef XNN_HAVE_FLOAT16 10 - +template <> 11 - +struct ConvertOp<xnn_bfloat16, _Float16> { 12 - + explicit ConvertOp(const xnn_unary_uparams*) {} 13 - + _Float16 operator()(xnn_bfloat16 x) const { 14 - + return static_cast<_Float16>(static_cast<float>(x)); 15 - + } 16 - +}; 17 - +#endif 18 - + 19 - template <typename TIn, typename TOut> 20 - const xnn_unary_elementwise_config* get_convert_config( 21 - std::true_type /*input_quantized*/, std::true_type /*output_quantized*/) {

+8 -4

pkgs/development/python-modules/torchaudio/default.nix

··· 15 15 pybind11, 16 16 sox, 17 17 torch, 18 + llvmPackages, 18 19 19 20 cudaSupport ? torch.cudaSupport, 20 21 cudaPackages, ··· 76 77 in 77 78 buildPythonPackage rec { 78 79 pname = "torchaudio"; 79 - version = "2.6.0"; 80 + version = "2.7.0"; 80 81 pyproject = true; 81 82 82 83 stdenv = torch.stdenv; ··· 85 86 owner = "pytorch"; 86 87 repo = "audio"; 87 88 tag = "v${version}"; 88 - hash = "sha256-WNdDBB2nShbPPW7GU5cMij00u5PUdN+j5pm41yrKnCA="; 89 + hash = "sha256-/5XIVj0jLE7+A1LZxA3bFH3mdwNIcrV4XMOa4xznr/w="; 89 90 }; 90 91 91 92 patches = [ ··· 133 134 pybind11 134 135 sox 135 136 torch.cxxdev 136 - ]; 137 + ] ++ lib.optionals stdenv.cc.isClang [ llvmPackages.openmp ]; 137 138 138 139 dependencies = [ torch ]; 139 140 ··· 161 162 platforms = 162 163 lib.platforms.linux 163 164 ++ lib.optionals (!cudaSupport && !rocmSupport) lib.platforms.darwin; 164 - maintainers = with lib.maintainers; [ junjihashimoto ]; 165 + maintainers = with lib.maintainers; [ 166 + GaetanLepage 167 + junjihashimoto 168 + ]; 165 169 }; 166 170 }

+12 -12

pkgs/development/python-modules/torchvision/default.nix

··· 29 29 inherit (torch) cudaCapabilities cudaPackages cudaSupport; 30 30 31 31 pname = "torchvision"; 32 - version = "0.21.0"; 32 + version = "0.22.0"; 33 33 in 34 34 buildPythonPackage { 35 35 inherit pname version; ··· 40 40 owner = "pytorch"; 41 41 repo = "vision"; 42 42 tag = "v${version}"; 43 - hash = "sha256-eDWw1Lt/sUc2Xt6cqOM5xaOfmsm+NEL5lZO+cIJKMtU="; 43 + hash = "sha256-+70Rhfma4dM5tRlYNz0cuuTIxRbYf6dsnAhvkw7a5kM="; 44 44 }; 45 45 46 46 nativeBuildInputs = [ ··· 71 71 scipy 72 72 ]; 73 73 74 - preConfigure = 75 - '' 76 - export TORCHVISION_INCLUDE="${libjpeg_turbo.dev}/include/" 77 - export TORCHVISION_LIBRARY="${libjpeg_turbo}/lib/" 78 - '' 79 - + lib.optionalString cudaSupport '' 80 - export TORCH_CUDA_ARCH_LIST="${lib.concatStringsSep ";" cudaCapabilities}" 81 - export FORCE_CUDA=1 82 - ''; 74 + env = 75 + { 76 + TORCHVISION_INCLUDE = "${libjpeg_turbo.dev}/include/"; 77 + TORCHVISION_LIBRARY = "${libjpeg_turbo}/lib/"; 78 + } 79 + // lib.optionalAttrs cudaSupport { 80 + TORCH_CUDA_ARCH_LIST = "${lib.concatStringsSep ";" cudaCapabilities}"; 81 + FORCE_CUDA = 1; 82 + }; 83 83 84 84 # tests download big datasets, models, require internet connection, etc. 85 85 doCheck = false; ··· 101 101 changelog = "https://github.com/pytorch/vision/releases/tag/v${version}"; 102 102 license = lib.licenses.bsd3; 103 103 platforms = with lib.platforms; linux ++ lib.optionals (!cudaSupport) darwin; 104 - maintainers = with lib.maintainers; [ ]; 104 + maintainers = with lib.maintainers; [ GaetanLepage ]; 105 105 }; 106 106 }

+7 -3

pkgs/development/rocm-modules/6/aotriton/default.nix

··· 57 57 gpuTargets' = lib.concatStringsSep ";" gpuTargets; 58 58 compiler = "amdclang++"; 59 59 cFlags = "-O3 -DNDEBUG"; 60 + cxxFlags = "${cFlags} -Wno-c++11-narrowing"; 60 61 triton-llvm' = triton-llvm; 61 62 in 62 63 { 63 64 pname = "aotriton"; 64 - version = "0.8.2b"; 65 + version = "0.9.2b"; 65 66 66 67 src = fetchFromGitHub { 67 68 owner = "ROCm"; 68 69 repo = "aotriton"; 69 70 rev = "${finalAttrs.version}"; 70 - hash = "sha256-gSzGYWfyUNLyzqpu3BM8rjFFL7cRVZ+w9L5pnh9QGz4="; 71 + hash = "sha256-1Cf0olD3zRg9JESD6s/WaGifm3kfD12VUvjTZHpmGAE="; 71 72 fetchSubmodules = true; 72 73 }; 73 74 env.CXX = compiler; ··· 152 153 substituteInPlace third_party/triton/python/setup.py \ 153 154 --replace-fail "from distutils.command.clean import clean" "import setuptools;from distutils.command.clean import clean" \ 154 155 --replace-fail 'system == "Linux"' 'False' 156 + # Fix 'ld: error: unable to insert .comment after .comment' 157 + substituteInPlace v2python/ld_script.py \ 158 + --replace-fail 'INSERT AFTER .comment;' "" 155 159 156 160 cmakeFlagsArray+=( 157 161 '-DCMAKE_C_FLAGS_RELEASE=${cFlags}' 158 - '-DCMAKE_CXX_FLAGS_RELEASE=${cFlags}' 162 + '-DCMAKE_CXX_FLAGS_RELEASE=${cxxFlags}' 159 163 ) 160 164 prependToVar cmakeFlags "-GNinja" 161 165 mkdir -p /build/tmp-home