pkgs/development/python-modules/torch/default.nix at netboot-syslinux-multiplatform · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / pkgs / development / python-modules / torch / default.nix
at netboot-syslinux-multiplatform 16 kB view raw
  1{ stdenv, lib, fetchFromGitHub, buildPythonPackage, python,
  2  cudaSupport ? false, cudaPackages, magma,
  3  useSystemNccl ? true,
  4  MPISupport ? false, mpi,
  5  buildDocs ? false,
  6
  7  # Native build inputs
  8  cmake, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo,
  9  pythonRelaxDepsHook,
 10
 11  # Build inputs
 12  numactl,
 13  Accelerate, CoreServices, libobjc,
 14
 15  # Propagated build inputs
 16  filelock,
 17  jinja2,
 18  networkx,
 19  openai-triton,
 20  sympy,
 21  numpy, pyyaml, cffi, click, typing-extensions,
 22
 23  # Unit tests
 24  hypothesis, psutil,
 25
 26  # Disable MKLDNN on aarch64-darwin, it negatively impacts performance,
 27  # this is also what official pytorch build does
 28  mklDnnSupport ? !(stdenv.isDarwin && stdenv.isAarch64),
 29
 30  # virtual pkg that consistently instantiates blas across nixpkgs
 31  # See https://github.com/NixOS/nixpkgs/pull/83888
 32  blas,
 33
 34  # ninja (https://ninja-build.org) must be available to run C++ extensions tests,
 35  ninja,
 36
 37  linuxHeaders_5_19,
 38
 39  # dependencies for torch.utils.tensorboard
 40  pillow, six, future, tensorboard, protobuf,
 41
 42  pythonOlder,
 43
 44  # ROCm dependencies
 45  rocmSupport ? false,
 46  gpuTargets ? [ ],
 47  openmp, rocm-core, hip, rccl, miopen, miopengemm, rocrand, rocblas,
 48  rocfft, rocsparse, hipsparse, rocthrust, rocprim, hipcub, roctracer,
 49  rocsolver, hipfft, hipsolver, hipblas, rocminfo, rocm-thunk, rocm-comgr,
 50  rocm-device-libs, rocm-runtime, rocm-opencl-runtime, hipify
 51}:
 52
 53let
 54  inherit (lib) lists strings trivial;
 55  inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
 56in
 57
 58assert cudaSupport -> stdenv.isLinux;
 59assert cudaSupport -> (cudaPackages.cudaMajorVersion == "11");
 60
 61# confirm that cudatoolkits are sync'd across dependencies
 62assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit;
 63assert !cudaSupport || magma.cudaPackages.cudatoolkit == cudatoolkit;
 64
 65let
 66  setBool = v: if v then "1" else "0";
 67
 68  # https://github.com/pytorch/pytorch/blob/v2.0.1/torch/utils/cpp_extension.py#L1744
 69  supportedTorchCudaCapabilities =
 70    let
 71      real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6" "8.9" "9.0"];
 72      ptx = lists.map (x: "${x}+PTX") real;
 73    in
 74    real ++ ptx;
 75
 76  # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements
 77  #   of the first list *from* the second list. That means:
 78  #   lists.subtractLists a b = b - a
 79
 80  # For CUDA
 81  supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities;
 82  unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities;
 83
 84  # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified.
 85  gpuArchWarner = supported: unsupported:
 86    trivial.throwIf (supported == [ ])
 87      (
 88        "No supported GPU targets specified. Requested GPU targets: "
 89        + strings.concatStringsSep ", " unsupported
 90      )
 91      supported;
 92
 93  # Create the gpuTargetString.
 94  gpuTargetString = strings.concatStringsSep ";" (
 95    if gpuTargets != [ ] then
 96    # If gpuTargets is specified, it always takes priority.
 97      gpuTargets
 98    else if cudaSupport then
 99      gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities
100    else if rocmSupport then
101      hip.gpuTargets
102    else
103      throw "No GPU targets specified"
104  );
105
106  cudatoolkit_joined = symlinkJoin {
107    name = "${cudatoolkit.name}-unsplit";
108    # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs
109    paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ];
110  };
111
112  # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via
113  # LD_LIBRARY_PATH=/run/opengl-driver/lib.  We only use the stub
114  # libcuda.so from cudatoolkit for running tests, so that we don’t have
115  # to recompile pytorch on every update to nvidia-x11 or the kernel.
116  cudaStub = linkFarm "cuda-stub" [{
117    name = "libcuda.so.1";
118    path = "${cudatoolkit}/lib/stubs/libcuda.so";
119  }];
120  cudaStubEnv = lib.optionalString cudaSupport
121    "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ";
122
123  rocmtoolkit_joined = symlinkJoin {
124    name = "rocm-merged";
125
126    paths = [
127      rocm-core hip rccl miopen miopengemm rocrand rocblas
128      rocfft rocsparse hipsparse rocthrust rocprim hipcub
129      roctracer rocfft rocsolver hipfft hipsolver hipblas
130      rocminfo rocm-thunk rocm-comgr rocm-device-libs
131      rocm-runtime rocm-opencl-runtime hipify
132    ];
133  };
134in buildPythonPackage rec {
135  pname = "torch";
136  # Don't forget to update torch-bin to the same version.
137  version = "2.0.1";
138  format = "setuptools";
139
140  disabled = pythonOlder "3.8.0";
141
142  outputs = [
143    "out" # output standard python package
144    "dev" # output libtorch headers
145    "lib" # output libtorch libraries
146  ];
147
148  src = fetchFromGitHub {
149    owner = "pytorch";
150    repo = "pytorch";
151    rev = "refs/tags/v${version}";
152    fetchSubmodules = true;
153    hash = "sha256-xUj77yKz3IQ3gd/G32pI4OhL3LoN1zS7eFg0/0nZp5I=";
154  };
155
156  patches = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
157    # pthreadpool added support for Grand Central Dispatch in April
158    # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO)
159    # that is available starting with macOS 10.13. However, our current
160    # base is 10.12. Until we upgrade, we can fall back on the older
161    # pthread support.
162    ./pthreadpool-disable-gcd.diff
163  ];
164
165  postPatch = lib.optionalString rocmSupport ''
166    # https://github.com/facebookincubator/gloo/pull/297
167    substituteInPlace third_party/gloo/cmake/Hipify.cmake \
168      --replace "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}"
169
170    # Replace hard-coded rocm paths
171    substituteInPlace caffe2/CMakeLists.txt \
172      --replace "/opt/rocm" "${rocmtoolkit_joined}" \
173      --replace "hcc/include" "hip/include" \
174      --replace "rocblas/include" "include/rocblas" \
175      --replace "hipsparse/include" "include/hipsparse"
176
177    # Doesn't pick up the environment variable?
178    substituteInPlace third_party/kineto/libkineto/CMakeLists.txt \
179      --replace "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" \
180      --replace "/opt/rocm" "${rocmtoolkit_joined}"
181
182    # Strangely, this is never set in cmake
183    substituteInPlace cmake/public/LoadHIP.cmake \
184      --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \
185        "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." hip.version))})"
186  ''
187  # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc'
188  # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header.
189  + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.targetPlatform.darwinSdkVersion "11.0") ''
190    substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace '#if __cplusplus >= 201703L
191    inline void *aligned_alloc(size_t align, size_t size)' '#if __cplusplus >= 201703L && 0
192    inline void *aligned_alloc(size_t align, size_t size)'
193  '';
194
195  preConfigure = lib.optionalString cudaSupport ''
196    export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
197    export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
198  '' + lib.optionalString (cudaSupport && cudnn != null) ''
199    export CUDNN_INCLUDE_DIR=${cudnn}/include
200  '' + lib.optionalString rocmSupport ''
201    export ROCM_PATH=${rocmtoolkit_joined}
202    export ROCM_SOURCE_DIR=${rocmtoolkit_joined}
203    export PYTORCH_ROCM_ARCH="${gpuTargetString}"
204    export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas"
205    python tools/amd_build/build_amd.py
206  '';
207
208  # Use pytorch's custom configurations
209  dontUseCmakeConfigure = true;
210
211  BUILD_NAMEDTENSOR = setBool true;
212  BUILD_DOCS = setBool buildDocs;
213
214  # We only do an imports check, so do not build tests either.
215  BUILD_TEST = setBool false;
216
217  # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for
218  # it by default. PyTorch currently uses its own vendored version
219  # of oneDNN through Intel iDeep.
220  USE_MKLDNN = setBool mklDnnSupport;
221  USE_MKLDNN_CBLAS = setBool mklDnnSupport;
222
223  # Avoid using pybind11 from git submodule
224  # Also avoids pytorch exporting the headers of pybind11
225  USE_SYSTEM_BIND11 = true;
226
227  preBuild = ''
228    export MAX_JOBS=$NIX_BUILD_CORES
229    ${python.pythonForBuild.interpreter} setup.py build --cmake-only
230    ${cmake}/bin/cmake build
231  '';
232
233  preFixup = ''
234    function join_by { local IFS="$1"; shift; echo "$*"; }
235    function strip2 {
236      IFS=':'
237      read -ra RP <<< $(patchelf --print-rpath $1)
238      IFS=' '
239      RP_NEW=$(join_by : ''${RP[@]:2})
240      patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1"
241    }
242    for f in $(find ''${out} -name 'libcaffe2*.so')
243    do
244      strip2 $f
245    done
246  '';
247
248  # Override the (weirdly) wrong version set by default. See
249  # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038
250  # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267
251  PYTORCH_BUILD_VERSION = version;
252  PYTORCH_BUILD_NUMBER = 0;
253
254  USE_SYSTEM_NCCL = setBool useSystemNccl;                  # don't build pytorch's third_party NCCL
255
256  # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
257  # (upstream seems to have fixed this in the wrong place?)
258  # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc
259  # https://github.com/pytorch/pytorch/issues/22346
260  #
261  # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++:
262  # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17
263  env.NIX_CFLAGS_COMPILE = toString ((lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ]
264  # Suppress gcc regression: avx512 math function raises uninitialized variable warning
265  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
266  # See also: Fails to compile with GCC 12.1.0 https://github.com/pytorch/pytorch/issues/77939
267  ++ lib.optionals (stdenv.cc.isGNU && lib.versionAtLeast stdenv.cc.version "12.0.0") [
268    "-Wno-error=maybe-uninitialized"
269    "-Wno-error=uninitialized"
270  ]
271  # Since pytorch 2.0:
272  # gcc-12.2.0/include/c++/12.2.0/bits/new_allocator.h:158:33: error: ‘void operator delete(void*, std::size_t)’
273  # ... called on pointer ‘<unknown>’ with nonzero offset [1, 9223372036854775800] [-Werror=free-nonheap-object]
274  ++ lib.optionals (stdenv.cc.isGNU && lib.versions.major stdenv.cc.version == "12" ) [
275    "-Wno-error=free-nonheap-object"
276  ]));
277
278  nativeBuildInputs = [
279    cmake
280    which
281    ninja
282    pybind11
283    pythonRelaxDepsHook
284    removeReferencesTo
285  ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ]
286    ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
287
288  buildInputs = [ blas blas.provider pybind11 ]
289    ++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now
290    ++ lib.optionals cudaSupport [ cudnn nccl ]
291    ++ lib.optionals rocmSupport [ openmp ]
292    ++ lib.optionals (cudaSupport || rocmSupport) [ magma ]
293    ++ lib.optionals stdenv.isLinux [ numactl ]
294    ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ];
295
296  propagatedBuildInputs = [
297    cffi
298    click
299    numpy
300    pyyaml
301
302    # From install_requires:
303    filelock
304    typing-extensions
305    sympy
306    networkx
307    jinja2
308
309    # the following are required for tensorboard support
310    pillow six future tensorboard protobuf
311  ]
312  ++ lib.optionals MPISupport [ mpi ]
313  ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]
314  # rocm build requires openai-triton;
315  # openai-triton currently requires cuda_nvcc,
316  # so not including it in the cpu-only build;
317  # torch.compile relies on openai-triton,
318  # so we include it for the cuda build as well
319  ++ lib.optionals (rocmSupport || cudaSupport) [
320    openai-triton
321  ];
322
323  # Tests take a long time and may be flaky, so just sanity-check imports
324  doCheck = false;
325
326  pythonImportsCheck = [
327    "torch"
328  ];
329
330  nativeCheckInputs = [ hypothesis ninja psutil ];
331
332  checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [
333    "runHook preCheck"
334    cudaStubEnv
335    "${python.interpreter} test/run_test.py"
336    "--exclude"
337    (concatStringsSep " " [
338      "utils" # utils requires git, which is not allowed in the check phase
339
340      # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors
341      # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build
342
343      # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins
344      (optionalString (majorMinor version == "1.3" ) "tensorboard")
345    ])
346    "runHook postCheck"
347  ];
348
349  pythonRemoveDeps = [
350    # In our dist-info the name is just "triton"
351    "pytorch-triton-rocm"
352  ];
353
354  postInstall = ''
355    find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' +
356
357    mkdir $dev
358    cp -r $out/${python.sitePackages}/torch/include $dev/include
359    cp -r $out/${python.sitePackages}/torch/share $dev/share
360
361    # Fix up library paths for split outputs
362    substituteInPlace \
363      $dev/share/cmake/Torch/TorchConfig.cmake \
364      --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib"
365
366    substituteInPlace \
367      $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \
368      --replace \''${_IMPORT_PREFIX}/lib "$lib/lib"
369
370    mkdir $lib
371    mv $out/${python.sitePackages}/torch/lib $lib/lib
372    ln -s $lib/lib $out/${python.sitePackages}/torch/lib
373  '' + lib.optionalString rocmSupport ''
374    substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \
375      --replace "\''${_IMPORT_PREFIX}/lib64" "$lib/lib"
376
377    substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \
378      --replace "/build/source/torch/include" "$dev/include"
379  '';
380
381  postFixup = lib.optionalString stdenv.isDarwin ''
382    for f in $(ls $lib/lib/*.dylib); do
383        install_name_tool -id $lib/lib/$(basename $f) $f || true
384    done
385
386    install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib
387    install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib
388    install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib
389
390    install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib
391
392    install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib
393    install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib
394  '';
395
396  # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder.
397  requiredSystemFeatures = [ "big-parallel" ];
398
399  passthru = {
400    inherit cudaSupport cudaPackages;
401    # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability.
402    blasProvider = blas.provider;
403  } // lib.optionalAttrs cudaSupport {
404    # NOTE: supportedCudaCapabilities isn't computed unless cudaSupport is true, so we can't use
405    #   it in the passthru set above because a downstream package might try to access it even
406    #   when cudaSupport is false. Better to have it missing than null or an empty list by default.
407    cudaCapabilities = supportedCudaCapabilities;
408  };
409
410  meta = with lib; {
411    changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}";
412    # keep PyTorch in the description so the package can be found under that name on search.nixos.org
413    description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration";
414    homepage = "https://pytorch.org/";
415    license = licenses.bsd3;
416    maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds
417    platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin;
418    broken = rocmSupport && cudaSupport; # CUDA and ROCm are mutually exclusive
419  };
420}