at 24.05-pre 19 kB view raw
1{ stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python, 2 config, cudaSupport ? config.cudaSupport, cudaPackages, magma, 3 useSystemNccl ? true, 4 MPISupport ? false, mpi, 5 buildDocs ? false, 6 7 # Native build inputs 8 cmake, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo, 9 pythonRelaxDepsHook, 10 11 # Build inputs 12 numactl, 13 Accelerate, CoreServices, libobjc, 14 15 # Propagated build inputs 16 filelock, 17 jinja2, 18 networkx, 19 sympy, 20 numpy, pyyaml, cffi, click, typing-extensions, 21 # ROCm build and `torch.compile` requires `openai-triton` 22 tritonSupport ? (!stdenv.isDarwin), openai-triton, 23 24 # Unit tests 25 hypothesis, psutil, 26 27 # Disable MKLDNN on aarch64-darwin, it negatively impacts performance, 28 # this is also what official pytorch build does 29 mklDnnSupport ? !(stdenv.isDarwin && stdenv.isAarch64), 30 31 # virtual pkg that consistently instantiates blas across nixpkgs 32 # See https://github.com/NixOS/nixpkgs/pull/83888 33 blas, 34 35 # ninja (https://ninja-build.org) must be available to run C++ extensions tests, 36 ninja, 37 38 # dependencies for torch.utils.tensorboard 39 pillow, six, future, tensorboard, protobuf, 40 41 pythonOlder, 42 43 # ROCm dependencies 44 rocmSupport ? config.rocmSupport, 45 rocmPackages, 46 gpuTargets ? [ ] 47}: 48 49let 50 inherit (lib) attrsets lists strings trivial; 51 inherit (cudaPackages) cudaFlags cudnn; 52 53 # Some packages are not available on all platforms 54 nccl = cudaPackages.nccl or null; 55 56 setBool = v: if v then "1" else "0"; 57 58 # https://github.com/pytorch/pytorch/blob/v2.0.1/torch/utils/cpp_extension.py#L1744 59 supportedTorchCudaCapabilities = 60 let 61 real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6" "8.7" "8.9" "9.0"]; 62 ptx = lists.map (x: "${x}+PTX") real; 63 in 64 real ++ ptx; 65 66 # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements 67 # of the first list *from* the second list. That means: 68 # lists.subtractLists a b = b - a 69 70 # For CUDA 71 supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities; 72 unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities; 73 74 # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified. 75 gpuArchWarner = supported: unsupported: 76 trivial.throwIf (supported == [ ]) 77 ( 78 "No supported GPU targets specified. Requested GPU targets: " 79 + strings.concatStringsSep ", " unsupported 80 ) 81 supported; 82 83 # Create the gpuTargetString. 84 gpuTargetString = strings.concatStringsSep ";" ( 85 if gpuTargets != [ ] then 86 # If gpuTargets is specified, it always takes priority. 87 gpuTargets 88 else if cudaSupport then 89 gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities 90 else if rocmSupport then 91 rocmPackages.clr.gpuTargets 92 else 93 throw "No GPU targets specified" 94 ); 95 96 rocmtoolkit_joined = symlinkJoin { 97 name = "rocm-merged"; 98 99 paths = with rocmPackages; [ 100 rocm-core clr rccl miopen miopengemm rocrand rocblas 101 rocsparse hipsparse rocthrust rocprim hipcub roctracer 102 rocfft rocsolver hipfft hipsolver hipblas 103 rocminfo rocm-thunk rocm-comgr rocm-device-libs 104 rocm-runtime clr.icd hipify 105 ]; 106 107 # Fix `setuptools` not being found 108 postBuild = '' 109 rm -rf $out/nix-support 110 ''; 111 }; 112 113 brokenConditions = attrsets.filterAttrs (_: cond: cond) { 114 "CUDA and ROCm are not mutually exclusive" = cudaSupport && rocmSupport; 115 "CUDA is not targeting Linux" = cudaSupport && !stdenv.isLinux; 116 "Unsupported CUDA version" = cudaSupport && !(builtins.elem cudaPackages.cudaMajorVersion [ "11" "12" ]); 117 "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit); 118 "Magma cudaPackages does not match cudaPackages" = cudaSupport && (magma.cudaPackages != cudaPackages); 119 }; 120in buildPythonPackage rec { 121 pname = "torch"; 122 # Don't forget to update torch-bin to the same version. 123 version = "2.0.1"; 124 format = "setuptools"; 125 126 disabled = pythonOlder "3.8.0"; 127 128 outputs = [ 129 "out" # output standard python package 130 "dev" # output libtorch headers 131 "lib" # output libtorch libraries 132 ]; 133 134 src = fetchFromGitHub { 135 owner = "pytorch"; 136 repo = "pytorch"; 137 rev = "refs/tags/v${version}"; 138 fetchSubmodules = true; 139 hash = "sha256-xUj77yKz3IQ3gd/G32pI4OhL3LoN1zS7eFg0/0nZp5I="; 140 }; 141 142 patches = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [ 143 # pthreadpool added support for Grand Central Dispatch in April 144 # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO) 145 # that is available starting with macOS 10.13. However, our current 146 # base is 10.12. Until we upgrade, we can fall back on the older 147 # pthread support. 148 ./pthreadpool-disable-gcd.diff 149 ] ++ lib.optionals stdenv.isLinux [ 150 # Propagate CUPTI to Kineto by overriding the search path with environment variables. 151 # https://github.com/pytorch/pytorch/pull/108847 152 ./pytorch-pr-108847.patch 153 ]; 154 155 postPatch = lib.optionalString rocmSupport '' 156 # https://github.com/facebookincubator/gloo/pull/297 157 substituteInPlace third_party/gloo/cmake/Hipify.cmake \ 158 --replace "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}" 159 160 # Replace hard-coded rocm paths 161 substituteInPlace caffe2/CMakeLists.txt \ 162 --replace "/opt/rocm" "${rocmtoolkit_joined}" \ 163 --replace "hcc/include" "hip/include" \ 164 --replace "rocblas/include" "include/rocblas" \ 165 --replace "hipsparse/include" "include/hipsparse" 166 167 # Doesn't pick up the environment variable? 168 substituteInPlace third_party/kineto/libkineto/CMakeLists.txt \ 169 --replace "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" \ 170 --replace "/opt/rocm" "${rocmtoolkit_joined}" 171 172 # Strangely, this is never set in cmake 173 substituteInPlace cmake/public/LoadHIP.cmake \ 174 --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \ 175 "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." rocmPackages.clr.version))})" 176 '' 177 # Detection of NCCL version doesn't work particularly well when using the static binary. 178 + lib.optionalString cudaSupport '' 179 substituteInPlace cmake/Modules/FindNCCL.cmake \ 180 --replace \ 181 'message(FATAL_ERROR "Found NCCL header version and library version' \ 182 'message(WARNING "Found NCCL header version and library version' 183 '' 184 # TODO(@connorbaker): Remove this patch after 2.1.0 lands. 185 + lib.optionalString cudaSupport '' 186 substituteInPlace torch/utils/cpp_extension.py \ 187 --replace \ 188 "'8.6', '8.9'" \ 189 "'8.6', '8.7', '8.9'" 190 '' 191 # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc' 192 # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header. 193 + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.hostPlatform.darwinSdkVersion "11.0") '' 194 substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace '#if __cplusplus >= 201703L 195 inline void *aligned_alloc(size_t align, size_t size)' '#if __cplusplus >= 201703L && 0 196 inline void *aligned_alloc(size_t align, size_t size)' 197 ''; 198 199 # NOTE(@connorbaker): Though we do not disable Gloo or MPI when building with CUDA support, caution should be taken 200 # when using the different backends. Gloo's GPU support isn't great, and MPI and CUDA can't be used at the same time 201 # without extreme care to ensure they don't lock each other out of shared resources. 202 # For more, see https://github.com/open-mpi/ompi/issues/7733#issuecomment-629806195. 203 preConfigure = lib.optionalString cudaSupport '' 204 export TORCH_CUDA_ARCH_LIST="${gpuTargetString}" 205 export CUDNN_INCLUDE_DIR=${cudnn.dev}/include 206 export CUDNN_LIB_DIR=${cudnn.lib}/lib 207 export CUPTI_INCLUDE_DIR=${cudaPackages.cuda_cupti.dev}/include 208 export CUPTI_LIBRARY_DIR=${cudaPackages.cuda_cupti.lib}/lib 209 '' + lib.optionalString rocmSupport '' 210 export ROCM_PATH=${rocmtoolkit_joined} 211 export ROCM_SOURCE_DIR=${rocmtoolkit_joined} 212 export PYTORCH_ROCM_ARCH="${gpuTargetString}" 213 export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas" 214 python tools/amd_build/build_amd.py 215 ''; 216 217 # Use pytorch's custom configurations 218 dontUseCmakeConfigure = true; 219 220 # causes possible redefinition of _FORTIFY_SOURCE 221 hardeningDisable = [ "fortify3" ]; 222 223 BUILD_NAMEDTENSOR = setBool true; 224 BUILD_DOCS = setBool buildDocs; 225 226 # We only do an imports check, so do not build tests either. 227 BUILD_TEST = setBool false; 228 229 # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for 230 # it by default. PyTorch currently uses its own vendored version 231 # of oneDNN through Intel iDeep. 232 USE_MKLDNN = setBool mklDnnSupport; 233 USE_MKLDNN_CBLAS = setBool mklDnnSupport; 234 235 # Avoid using pybind11 from git submodule 236 # Also avoids pytorch exporting the headers of pybind11 237 USE_SYSTEM_PYBIND11 = true; 238 239 preBuild = '' 240 export MAX_JOBS=$NIX_BUILD_CORES 241 ${python.pythonOnBuildForHost.interpreter} setup.py build --cmake-only 242 ${cmake}/bin/cmake build 243 ''; 244 245 preFixup = '' 246 function join_by { local IFS="$1"; shift; echo "$*"; } 247 function strip2 { 248 IFS=':' 249 read -ra RP <<< $(patchelf --print-rpath $1) 250 IFS=' ' 251 RP_NEW=$(join_by : ''${RP[@]:2}) 252 patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1" 253 } 254 for f in $(find ''${out} -name 'libcaffe2*.so') 255 do 256 strip2 $f 257 done 258 ''; 259 260 # Override the (weirdly) wrong version set by default. See 261 # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038 262 # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267 263 PYTORCH_BUILD_VERSION = version; 264 PYTORCH_BUILD_NUMBER = 0; 265 266 USE_NCCL = setBool (nccl != null); 267 USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL 268 USE_STATIC_NCCL = setBool useSystemNccl; 269 270 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch 271 # (upstream seems to have fixed this in the wrong place?) 272 # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc 273 # https://github.com/pytorch/pytorch/issues/22346 274 # 275 # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++: 276 # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17 277 env.NIX_CFLAGS_COMPILE = toString ((lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ] 278 # Suppress gcc regression: avx512 math function raises uninitialized variable warning 279 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 280 # See also: Fails to compile with GCC 12.1.0 https://github.com/pytorch/pytorch/issues/77939 281 ++ lib.optionals (stdenv.cc.isGNU && lib.versionAtLeast stdenv.cc.version "12.0.0") [ 282 "-Wno-error=maybe-uninitialized" 283 "-Wno-error=uninitialized" 284 ] 285 # Since pytorch 2.0: 286 # gcc-12.2.0/include/c++/12.2.0/bits/new_allocator.h:158:33: error: ‘void operator delete(void*, std::size_t)’ 287 # ... called on pointer ‘<unknown>’ with nonzero offset [1, 9223372036854775800] [-Werror=free-nonheap-object] 288 ++ lib.optionals (stdenv.cc.isGNU && lib.versions.major stdenv.cc.version == "12" ) [ 289 "-Wno-error=free-nonheap-object" 290 ] 291 # .../source/torch/csrc/autograd/generated/python_functions_0.cpp:85:3: 292 # error: cast from ... to ... converts to incompatible function type [-Werror,-Wcast-function-type-strict] 293 ++ lib.optionals (stdenv.cc.isClang && lib.versionAtLeast stdenv.cc.version "16") [ 294 "-Wno-error=cast-function-type-strict" 295 # Suppresses the most spammy warnings. 296 # This is mainly to fix https://github.com/NixOS/nixpkgs/issues/266895. 297 ] ++ lib.optionals rocmSupport [ 298 "-Wno-#warnings" 299 "-Wno-cpp" 300 "-Wno-unknown-warning-option" 301 "-Wno-ignored-attributes" 302 "-Wno-deprecated-declarations" 303 "-Wno-defaulted-function-deleted" 304 "-Wno-pass-failed" 305 ] ++ [ 306 "-Wno-unused-command-line-argument" 307 "-Wno-uninitialized" 308 "-Wno-array-bounds" 309 "-Wno-free-nonheap-object" 310 "-Wno-unused-result" 311 ] ++ lib.optionals stdenv.cc.isGNU [ 312 "-Wno-maybe-uninitialized" 313 "-Wno-stringop-overflow" 314 ])); 315 316 nativeBuildInputs = [ 317 cmake 318 which 319 ninja 320 pybind11 321 pythonRelaxDepsHook 322 removeReferencesTo 323 ] ++ lib.optionals cudaSupport (with cudaPackages; [ 324 autoAddOpenGLRunpathHook 325 cuda_nvcc 326 ]) 327 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; 328 329 buildInputs = [ blas blas.provider ] 330 ++ lib.optionals cudaSupport (with cudaPackages; [ 331 cuda_cccl.dev # <thrust/*> 332 cuda_cudart # cuda_runtime.h and libraries 333 cuda_cupti.dev # For kineto 334 cuda_cupti.lib # For kineto 335 cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too 336 cuda_nvml_dev.dev # <nvml.h> 337 cuda_nvrtc.dev 338 cuda_nvrtc.lib 339 cuda_nvtx.dev 340 cuda_nvtx.lib # -llibNVToolsExt 341 cudnn.dev 342 cudnn.lib 343 libcublas.dev 344 libcublas.lib 345 libcufft.dev 346 libcufft.lib 347 libcurand.dev 348 libcurand.lib 349 libcusolver.dev 350 libcusolver.lib 351 libcusparse.dev 352 libcusparse.lib 353 ] ++ lists.optionals (nccl != null) [ 354 # Some platforms do not support NCCL (i.e., Jetson) 355 nccl.dev # Provides nccl.h AND a static copy of NCCL! 356 ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [ 357 cuda_nvprof.dev # <cuda_profiler_api.h> 358 ] ++ lists.optionals (strings.versionAtLeast cudaVersion "11.8") [ 359 cuda_profiler_api.dev # <cuda_profiler_api.h> 360 ]) 361 ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ] 362 ++ lib.optionals (cudaSupport || rocmSupport) [ magma ] 363 ++ lib.optionals stdenv.isLinux [ numactl ] 364 ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ]; 365 366 propagatedBuildInputs = [ 367 cffi 368 click 369 numpy 370 pyyaml 371 372 # From install_requires: 373 filelock 374 typing-extensions 375 sympy 376 networkx 377 jinja2 378 379 # the following are required for tensorboard support 380 pillow six future tensorboard protobuf 381 382 # torch/csrc requires `pybind11` at runtime 383 pybind11 384 ] 385 ++ lib.optionals tritonSupport [ openai-triton ] 386 ++ lib.optionals MPISupport [ mpi ] 387 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; 388 389 # Tests take a long time and may be flaky, so just sanity-check imports 390 doCheck = false; 391 392 pythonImportsCheck = [ 393 "torch" 394 ]; 395 396 nativeCheckInputs = [ hypothesis ninja psutil ]; 397 398 checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [ 399 "runHook preCheck" 400 "${python.interpreter} test/run_test.py" 401 "--exclude" 402 (concatStringsSep " " [ 403 "utils" # utils requires git, which is not allowed in the check phase 404 405 # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors 406 # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build 407 408 # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins 409 (optionalString (majorMinor version == "1.3" ) "tensorboard") 410 ]) 411 "runHook postCheck" 412 ]; 413 414 pythonRemoveDeps = [ 415 # In our dist-info the name is just "triton" 416 "pytorch-triton-rocm" 417 ]; 418 419 postInstall = '' 420 find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' + 421 422 mkdir $dev 423 cp -r $out/${python.sitePackages}/torch/include $dev/include 424 cp -r $out/${python.sitePackages}/torch/share $dev/share 425 426 # Fix up library paths for split outputs 427 substituteInPlace \ 428 $dev/share/cmake/Torch/TorchConfig.cmake \ 429 --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib" 430 431 substituteInPlace \ 432 $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \ 433 --replace \''${_IMPORT_PREFIX}/lib "$lib/lib" 434 435 mkdir $lib 436 mv $out/${python.sitePackages}/torch/lib $lib/lib 437 ln -s $lib/lib $out/${python.sitePackages}/torch/lib 438 '' + lib.optionalString rocmSupport '' 439 substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \ 440 --replace "\''${_IMPORT_PREFIX}/lib64" "$lib/lib" 441 442 substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \ 443 --replace "/build/source/torch/include" "$dev/include" 444 ''; 445 446 postFixup = lib.optionalString stdenv.isDarwin '' 447 for f in $(ls $lib/lib/*.dylib); do 448 install_name_tool -id $lib/lib/$(basename $f) $f || true 449 done 450 451 install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib 452 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib 453 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib 454 455 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib 456 457 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib 458 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib 459 ''; 460 461 # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder. 462 requiredSystemFeatures = [ "big-parallel" ]; 463 464 passthru = { 465 inherit cudaSupport cudaPackages; 466 # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability. 467 blasProvider = blas.provider; 468 # To help debug when a package is broken due to CUDA support 469 inherit brokenConditions; 470 cudaCapabilities = if cudaSupport then supportedCudaCapabilities else [ ]; 471 }; 472 473 meta = with lib; { 474 changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}"; 475 # keep PyTorch in the description so the package can be found under that name on search.nixos.org 476 description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration"; 477 homepage = "https://pytorch.org/"; 478 license = licenses.bsd3; 479 maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds 480 platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin; 481 broken = builtins.any trivial.id (builtins.attrValues brokenConditions); 482 }; 483}