pkgs/development/python-modules/torch/default.nix at 23.05 · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / pkgs / development / python-modules / torch / default.nix
at 23.05 421 lines 16 kB view raw
  1{ stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python,
  2  cudaSupport ? false, cudaPackages, magma,
  3  useSystemNccl ? true,
  4  MPISupport ? false, mpi,
  5  buildDocs ? false,
  6
  7  # Native build inputs
  8  cmake, util-linux, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo,
  9  pythonRelaxDepsHook,
 10
 11  # Build inputs
 12  numactl,
 13  Accelerate, CoreServices, libobjc,
 14
 15  # Propagated build inputs
 16  filelock,
 17  jinja2,
 18  networkx,
 19  openai-triton,
 20  sympy,
 21  numpy, pyyaml, cffi, click, typing-extensions,
 22
 23  # Unit tests
 24  hypothesis, psutil,
 25
 26  # Disable MKLDNN on aarch64-darwin, it negatively impacts performance,
 27  # this is also what official pytorch build does
 28  mklDnnSupport ? !(stdenv.isDarwin && stdenv.isAarch64),
 29
 30  # virtual pkg that consistently instantiates blas across nixpkgs
 31  # See https://github.com/NixOS/nixpkgs/pull/83888
 32  blas,
 33
 34  # ninja (https://ninja-build.org) must be available to run C++ extensions tests,
 35  ninja,
 36
 37  linuxHeaders_5_19,
 38
 39  # dependencies for torch.utils.tensorboard
 40  pillow, six, future, tensorboard, protobuf,
 41
 42  isPy3k, pythonOlder,
 43
 44  # ROCm dependencies
 45  rocmSupport ? false,
 46  gpuTargets ? [ ],
 47  openmp, rocm-core, hip, rccl, miopen, miopengemm, rocrand, rocblas,
 48  rocfft, rocsparse, hipsparse, rocthrust, rocprim, hipcub, roctracer,
 49  rocsolver, hipfft, hipsolver, hipblas, rocminfo, rocm-thunk, rocm-comgr,
 50  rocm-device-libs, rocm-runtime, rocm-opencl-runtime, hipify
 51}:
 52
 53let
 54  inherit (lib) lists strings trivial;
 55  inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
 56in
 57
 58assert cudaSupport -> stdenv.isLinux;
 59assert cudaSupport -> (cudaPackages.cudaMajorVersion == "11");
 60
 61# confirm that cudatoolkits are sync'd across dependencies
 62assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit;
 63assert !cudaSupport || magma.cudaPackages.cudatoolkit == cudatoolkit;
 64
 65let
 66  setBool = v: if v then "1" else "0";
 67
 68  # https://github.com/pytorch/pytorch/blob/v1.13.1/torch/utils/cpp_extension.py#L1751
 69  supportedTorchCudaCapabilities =
 70    let
 71      real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6"];
 72      ptx = lists.map (x: "${x}+PTX") real;
 73    in
 74    real ++ ptx;
 75
 76  # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements
 77  #   of the first list *from* the second list. That means:
 78  #   lists.subtractLists a b = b - a
 79
 80  # For CUDA
 81  supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities;
 82  unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities;
 83
 84  # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified.
 85  gpuArchWarner = supported: unsupported:
 86    trivial.throwIf (supported == [ ])
 87      (
 88        "No supported GPU targets specified. Requested GPU targets: "
 89        + strings.concatStringsSep ", " unsupported
 90      )
 91      supported;
 92
 93  # Create the gpuTargetString.
 94  gpuTargetString = strings.concatStringsSep ";" (
 95    if gpuTargets != [ ] then
 96    # If gpuTargets is specified, it always takes priority.
 97      gpuTargets
 98    else if cudaSupport then
 99      gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities
100    else if rocmSupport then
101      hip.gpuTargets
102    else
103      throw "No GPU targets specified"
104  );
105
106  cudatoolkit_joined = symlinkJoin {
107    name = "${cudatoolkit.name}-unsplit";
108    # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs
109    paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ];
110  };
111
112  # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via
113  # LD_LIBRARY_PATH=/run/opengl-driver/lib.  We only use the stub
114  # libcuda.so from cudatoolkit for running tests, so that we don’t have
115  # to recompile pytorch on every update to nvidia-x11 or the kernel.
116  cudaStub = linkFarm "cuda-stub" [{
117    name = "libcuda.so.1";
118    path = "${cudatoolkit}/lib/stubs/libcuda.so";
119  }];
120  cudaStubEnv = lib.optionalString cudaSupport
121    "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ";
122
123  rocmtoolkit_joined = symlinkJoin {
124    name = "rocm-merged";
125
126    paths = [
127      rocm-core hip rccl miopen miopengemm rocrand rocblas
128      rocfft rocsparse hipsparse rocthrust rocprim hipcub
129      roctracer rocfft rocsolver hipfft hipsolver hipblas
130      rocminfo rocm-thunk rocm-comgr rocm-device-libs
131      rocm-runtime rocm-opencl-runtime hipify
132    ];
133  };
134in buildPythonPackage rec {
135  pname = "torch";
136  # Don't forget to update torch-bin to the same version.
137  version = "2.0.1";
138  format = "setuptools";
139
140  disabled = pythonOlder "3.8.0";
141
142  outputs = [
143    "out" # output standard python package
144    "dev" # output libtorch headers
145    "lib" # output libtorch libraries
146  ];
147
148  src = fetchFromGitHub {
149    owner = "pytorch";
150    repo = "pytorch";
151    rev = "refs/tags/v${version}";
152    fetchSubmodules = true;
153    hash = "sha256-xUj77yKz3IQ3gd/G32pI4OhL3LoN1zS7eFg0/0nZp5I=";
154  };
155
156  patches = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
157    # pthreadpool added support for Grand Central Dispatch in April
158    # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO)
159    # that is available starting with macOS 10.13. However, our current
160    # base is 10.12. Until we upgrade, we can fall back on the older
161    # pthread support.
162    ./pthreadpool-disable-gcd.diff
163  ];
164
165  postPatch = lib.optionalString rocmSupport ''
166    # https://github.com/facebookincubator/gloo/pull/297
167    substituteInPlace third_party/gloo/cmake/Hipify.cmake \
168      --replace "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}"
169
170    # Replace hard-coded rocm paths
171    substituteInPlace caffe2/CMakeLists.txt \
172      --replace "/opt/rocm" "${rocmtoolkit_joined}" \
173      --replace "hcc/include" "hip/include" \
174      --replace "rocblas/include" "include/rocblas" \
175      --replace "hipsparse/include" "include/hipsparse"
176
177    # Doesn't pick up the environment variable?
178    substituteInPlace third_party/kineto/libkineto/CMakeLists.txt \
179      --replace "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" \
180      --replace "/opt/rocm" "${rocmtoolkit_joined}"
181
182    # Strangely, this is never set in cmake
183    substituteInPlace cmake/public/LoadHIP.cmake \
184      --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \
185        "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." hip.version))})"
186  ''
187  # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc'
188  # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header.
189  + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.targetPlatform.darwinSdkVersion "11.0") ''
190    substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace '#if __cplusplus >= 201703L
191    inline void *aligned_alloc(size_t align, size_t size)' '#if __cplusplus >= 201703L && 0
192    inline void *aligned_alloc(size_t align, size_t size)'
193  '';
194
195  preConfigure = lib.optionalString cudaSupport ''
196    export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
197    export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
198  '' + lib.optionalString (cudaSupport && cudnn != null) ''
199    export CUDNN_INCLUDE_DIR=${cudnn}/include
200  '' + lib.optionalString rocmSupport ''
201    export ROCM_PATH=${rocmtoolkit_joined}
202    export ROCM_SOURCE_DIR=${rocmtoolkit_joined}
203    export PYTORCH_ROCM_ARCH="${gpuTargetString}"
204    export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas"
205    python tools/amd_build/build_amd.py
206  '';
207
208  # Use pytorch's custom configurations
209  dontUseCmakeConfigure = true;
210
211  BUILD_NAMEDTENSOR = setBool true;
212  BUILD_DOCS = setBool buildDocs;
213
214  # We only do an imports check, so do not build tests either.
215  BUILD_TEST = setBool false;
216
217  # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for
218  # it by default. PyTorch currently uses its own vendored version
219  # of oneDNN through Intel iDeep.
220  USE_MKLDNN = setBool mklDnnSupport;
221  USE_MKLDNN_CBLAS = setBool mklDnnSupport;
222
223  # Avoid using pybind11 from git submodule
224  # Also avoids pytorch exporting the headers of pybind11
225  USE_SYSTEM_BIND11 = true;
226
227  preBuild = ''
228    export MAX_JOBS=$NIX_BUILD_CORES
229    ${python.pythonForBuild.interpreter} setup.py build --cmake-only
230    ${cmake}/bin/cmake build
231  '';
232
233  preFixup = ''
234    function join_by { local IFS="$1"; shift; echo "$*"; }
235    function strip2 {
236      IFS=':'
237      read -ra RP <<< $(patchelf --print-rpath $1)
238      IFS=' '
239      RP_NEW=$(join_by : ''${RP[@]:2})
240      patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1"
241    }
242    for f in $(find ''${out} -name 'libcaffe2*.so')
243    do
244      strip2 $f
245    done
246  '';
247
248  # Override the (weirdly) wrong version set by default. See
249  # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038
250  # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267
251  PYTORCH_BUILD_VERSION = version;
252  PYTORCH_BUILD_NUMBER = 0;
253
254  USE_SYSTEM_NCCL = setBool useSystemNccl;                  # don't build pytorch's third_party NCCL
255
256  # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
257  # (upstream seems to have fixed this in the wrong place?)
258  # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc
259  # https://github.com/pytorch/pytorch/issues/22346
260  #
261  # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++:
262  # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17
263  env.NIX_CFLAGS_COMPILE = toString ((lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ]
264  # Suppress gcc regression: avx512 math function raises uninitialized variable warning
265  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
266  # See also: Fails to compile with GCC 12.1.0 https://github.com/pytorch/pytorch/issues/77939
267  ++ lib.optionals (stdenv.cc.isGNU && lib.versionAtLeast stdenv.cc.version "12.0.0") [
268    "-Wno-error=maybe-uninitialized"
269    "-Wno-error=uninitialized"
270  ]
271  # Since pytorch 2.0:
272  # gcc-12.2.0/include/c++/12.2.0/bits/new_allocator.h:158:33: error: ‘void operator delete(void*, std::size_t)’
273  # ... called on pointer ‘<unknown>’ with nonzero offset [1, 9223372036854775800] [-Werror=free-nonheap-object]
274  ++ lib.optionals (stdenv.cc.isGNU && lib.versions.major stdenv.cc.version == "12" ) [
275    "-Wno-error=free-nonheap-object"
276  ]));
277
278  nativeBuildInputs = [
279    cmake
280    util-linux
281    which
282    ninja
283    pybind11
284    pythonRelaxDepsHook
285    removeReferencesTo
286  ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ]
287    ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
288
289  buildInputs = [ blas blas.provider pybind11 ]
290    ++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now
291    ++ lib.optionals cudaSupport [ cudnn nccl ]
292    ++ lib.optionals rocmSupport [ openmp ]
293    ++ lib.optionals (cudaSupport || rocmSupport) [ magma ]
294    ++ lib.optionals stdenv.isLinux [ numactl ]
295    ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ];
296
297  propagatedBuildInputs = [
298    cffi
299    click
300    numpy
301    pyyaml
302
303    # From install_requires:
304    filelock
305    typing-extensions
306    sympy
307    networkx
308    jinja2
309
310    # the following are required for tensorboard support
311    pillow six future tensorboard protobuf
312  ]
313  ++ lib.optionals MPISupport [ mpi ]
314  ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]
315  # rocm build requires openai-triton;
316  # openai-triton currently requires cuda_nvcc,
317  # so not including it in the cpu-only build;
318  # torch.compile relies on openai-triton,
319  # so we include it for the cuda build as well
320  ++ lib.optionals (rocmSupport || cudaSupport) [
321    openai-triton
322  ];
323
324  # Tests take a long time and may be flaky, so just sanity-check imports
325  doCheck = false;
326
327  pythonImportsCheck = [
328    "torch"
329  ];
330
331  nativeCheckInputs = [ hypothesis ninja psutil ];
332
333  checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [
334    "runHook preCheck"
335    cudaStubEnv
336    "${python.interpreter} test/run_test.py"
337    "--exclude"
338    (concatStringsSep " " [
339      "utils" # utils requires git, which is not allowed in the check phase
340
341      # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors
342      # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build
343
344      # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins
345      (optionalString (majorMinor version == "1.3" ) "tensorboard")
346    ])
347    "runHook postCheck"
348  ];
349
350  pythonRemoveDeps = [
351    # In our dist-info the name is just "triton"
352    "pytorch-triton-rocm"
353  ];
354
355  postInstall = ''
356    find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' +
357
358    mkdir $dev
359    cp -r $out/${python.sitePackages}/torch/include $dev/include
360    cp -r $out/${python.sitePackages}/torch/share $dev/share
361
362    # Fix up library paths for split outputs
363    substituteInPlace \
364      $dev/share/cmake/Torch/TorchConfig.cmake \
365      --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib"
366
367    substituteInPlace \
368      $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \
369      --replace \''${_IMPORT_PREFIX}/lib "$lib/lib"
370
371    mkdir $lib
372    mv $out/${python.sitePackages}/torch/lib $lib/lib
373    ln -s $lib/lib $out/${python.sitePackages}/torch/lib
374  '' + lib.optionalString rocmSupport ''
375    substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \
376      --replace "\''${_IMPORT_PREFIX}/lib64" "$lib/lib"
377
378    substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \
379      --replace "/build/source/torch/include" "$dev/include"
380  '';
381
382  postFixup = lib.optionalString stdenv.isDarwin ''
383    for f in $(ls $lib/lib/*.dylib); do
384        install_name_tool -id $lib/lib/$(basename $f) $f || true
385    done
386
387    install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib
388    install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib
389    install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib
390
391    install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib
392
393    install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib
394    install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib
395  '';
396
397  # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder.
398  requiredSystemFeatures = [ "big-parallel" ];
399
400  passthru = {
401    inherit cudaSupport cudaPackages;
402    # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability.
403    blasProvider = blas.provider;
404  } // lib.optionalAttrs cudaSupport {
405    # NOTE: supportedCudaCapabilities isn't computed unless cudaSupport is true, so we can't use
406    #   it in the passthru set above because a downstream package might try to access it even
407    #   when cudaSupport is false. Better to have it missing than null or an empty list by default.
408    cudaCapabilities = supportedCudaCapabilities;
409  };
410
411  meta = with lib; {
412    changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}";
413    # keep PyTorch in the description so the package can be found under that name on search.nixos.org
414    description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration";
415    homepage = "https://pytorch.org/";
416    license = licenses.bsd3;
417    maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds
418    platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin;
419    broken = rocmSupport && cudaSupport; # CUDA and ROCm are mutually exclusive
420  };
421}