pkgs/development/python-modules/pytorch/default.nix at 20.03 · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / pkgs / development / python-modules / pytorch / default.nix
at 20.03 238 lines 9.7 kB view raw
  1{ stdenv, fetchurl, fetchgit, buildPythonPackage, python, pythonOlder,
  2  cudaSupport ? false, cudatoolkit ? null, cudnn ? null, nccl ? null, magma ? null,
  3  mklSupport ? false, mkl ? null,
  4  openMPISupport ? false, openmpi ? null,
  5  buildNamedTensor ? false,
  6  buildBinaries ? false,
  7  cudaArchList ? null,
  8  fetchFromGitHub, lib, numpy, pyyaml, cffi, click, typing, cmake, hypothesis, numactl,
  9  linkFarm, symlinkJoin,
 10
 11  # ninja (https://ninja-build.org) must be available to run C++ extensions tests,
 12  ninja,
 13
 14  # dependencies for torch.utils.tensorboard
 15  tensorboardSupport ? true, pillow, six, future, tensorflow-tensorboard,
 16
 17  utillinux, which, isPy3k }:
 18
 19assert !openMPISupport || openmpi != null;
 20assert !tensorboardSupport || tensorflow-tensorboard != null;
 21
 22# assert that everything needed for cuda is present and that the correct cuda versions are used
 23assert !cudaSupport || cudatoolkit != null;
 24assert cudnn == null || cudatoolkit != null;
 25assert !cudaSupport || (let majorIs = lib.versions.major cudatoolkit.version;
 26                        in majorIs == "9" || majorIs == "10");
 27
 28let
 29  hasDependency = dep: pkg: lib.lists.any (inp: inp == dep) pkg.buildInputs;
 30  matchesCudatoolkit = hasDependency cudatoolkit;
 31  matchesMkl = hasDependency mkl;
 32in
 33# confirm that cudatoolkits are sync'd across dependencies
 34assert !(openMPISupport && cudaSupport) || matchesCudatoolkit openmpi;
 35assert !cudaSupport || matchesCudatoolkit magma;
 36
 37# confirm that mkl is sync'd across dependencies
 38assert !mklSupport || mkl != null;
 39assert !(mklSupport && cudaSupport) || matchesMkl magma;
 40assert !mklSupport || (numpy.blasImplementation == "mkl" && numpy.blas == mkl);
 41
 42let
 43  cudatoolkit_joined = symlinkJoin {
 44    name = "${cudatoolkit.name}-unsplit";
 45    # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs
 46    paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ];
 47  };
 48
 49  # Give an explicit list of supported architectures for the build, See:
 50  # - pytorch bug report: https://github.com/pytorch/pytorch/issues/23573
 51  # - pytorch-1.2.0 build on nixpks: https://github.com/NixOS/nixpkgs/pull/65041
 52  #
 53  # This list was selected by omitting the TORCH_CUDA_ARCH_LIST parameter,
 54  # observing the fallback option (which selected all architectures known
 55  # from cudatoolkit_10_0, pytorch-1.2, and python-3.6), and doing a binary
 56  # searching to find offending architectures.
 57  #
 58  # NOTE: Because of sandboxing, this derivation can't auto-detect the hardware's
 59  # cuda architecture, so there is also now a problem around new architectures
 60  # not being supported until explicitly added to this derivation.
 61  #
 62  # FIXME: CMake is throwing the following warning on python-1.2:
 63  #
 64  # ```
 65  # CMake Warning at cmake/public/utils.cmake:172 (message):
 66  #   In the future we will require one to explicitly pass TORCH_CUDA_ARCH_LIST
 67  #   to cmake instead of implicitly setting it as an env variable.  This will
 68  #   become a FATAL_ERROR in future version of pytorch.
 69  # ```
 70  # If this is causing problems for your build, this derivation may have to strip
 71  # away the standard `buildPythonPackage` and use the
 72  # [*Adjust Build Options*](https://github.com/pytorch/pytorch/tree/v1.2.0#adjust-build-options-optional)
 73  # instructions. This will also add more flexibility around configurations
 74  # (allowing FBGEMM to be built in pytorch-1.1), and may future proof this
 75  # derivation.
 76  brokenArchs = [ "3.0" ]; # this variable is only used as documentation.
 77  cuda9ArchList = [
 78    "3.5"
 79    "5.0"
 80    "5.2"
 81    "6.0"
 82    "6.1"
 83    "7.0"
 84    "7.0+PTX"  # I am getting a "undefined architecture compute_75" on cuda 9
 85               # which leads me to believe this is the final cuda-9-compatible architecture.
 86  ];
 87  cuda10ArchList = cuda9ArchList ++ [
 88    "7.5"
 89    "7.5+PTX"  # < most recent architecture as of cudatoolkit_10_0 and pytorch-1.2.0
 90  ];
 91  final_cudaArchList =
 92    if !cudaSupport || cudaArchList != null
 93    then cudaArchList
 94    else
 95      if lib.versions.major cudatoolkit.version == "9"
 96      then cuda9ArchList
 97      else cuda10ArchList; # the assert above removes any ambiguity here.
 98
 99  # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via
100  # LD_LIBRARY_PATH=/run/opengl-driver/lib.  We only use the stub
101  # libcuda.so from cudatoolkit for running tests, so that we don’t have
102  # to recompile pytorch on every update to nvidia-x11 or the kernel.
103  cudaStub = linkFarm "cuda-stub" [{
104    name = "libcuda.so.1";
105    path = "${cudatoolkit}/lib/stubs/libcuda.so";
106  }];
107  cudaStubEnv = lib.optionalString cudaSupport
108    "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ";
109
110in buildPythonPackage rec {
111  version = "1.2.0";
112  pname = "pytorch";
113  disabled = !isPy3k;
114
115  outputs = [
116    "out"   # output standard python package
117    "dev"   # output libtorch only
118  ];
119
120  src = fetchFromGitHub {
121    owner  = "pytorch";
122    repo   = "pytorch";
123    rev    = "v${version}";
124    fetchSubmodules = true;
125    sha256 = "1biyq2p48chakf2xw7hazzqmr5ps1nx475ql8vkmxjg5zaa071cz";
126  };
127
128  dontUseCmakeConfigure = true;
129
130  preConfigure = lib.optionalString cudaSupport ''
131    export TORCH_CUDA_ARCH_LIST="${lib.strings.concatStringsSep ";" final_cudaArchList}"
132    export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
133  '' + lib.optionalString (cudaSupport && cudnn != null) ''
134    export CUDNN_INCLUDE_DIR=${cudnn}/include
135  '';
136
137  preFixup = ''
138    function join_by { local IFS="$1"; shift; echo "$*"; }
139    function strip2 {
140      IFS=':'
141      read -ra RP <<< $(patchelf --print-rpath $1)
142      IFS=' '
143      RP_NEW=$(join_by : ''${RP[@]:2})
144      patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1"
145    }
146    for f in $(find ''${out} -name 'libcaffe2*.so')
147    do
148      strip2 $f
149    done
150  '';
151
152  # Override the (weirdly) wrong version set by default. See
153  # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038
154  # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267
155  PYTORCH_BUILD_VERSION = version;
156  PYTORCH_BUILD_NUMBER = 0;
157
158  BUILD_NAMEDTENSOR = buildNamedTensor;  # experimental feature
159  USE_SYSTEM_NCCL=true;                  # don't build pytorch's third_party NCCL
160
161  # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
162  # (upstream seems to have fixed this in the wrong place?)
163  # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc
164  # https://github.com/pytorch/pytorch/issues/22346
165  #
166  # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++:
167  # https://github.com/pytorch/pytorch/blob/v1.2.0/setup.py#L17
168  NIX_CFLAGS_COMPILE = lib.optionals (numpy.blas == mkl) [ "-Wno-error=array-bounds" ];
169
170  nativeBuildInputs = [
171    cmake
172    utillinux
173    which
174    ninja
175  ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ];
176
177  buildInputs = [
178    numpy.blas
179  ] ++ lib.optionals cudaSupport [ cudnn magma nccl ]
180    ++ lib.optionals stdenv.isLinux [ numactl ];
181
182  propagatedBuildInputs = [
183    cffi
184    click
185    numpy
186    pyyaml
187  ] ++ lib.optionals openMPISupport [ openmpi ]
188    ++ lib.optional (pythonOlder "3.5") typing
189    ++ lib.optionals tensorboardSupport [pillow six future tensorflow-tensorboard];
190
191  checkInputs = [ hypothesis ninja ];
192
193  doCheck = false; # tests take a long time for channel release, so doCheck should be overridden only when developing
194  checkPhase = "${cudaStubEnv}python test/run_test.py"
195    + " --exclude utils" # utils requires git, which is not allowed in the check phase
196
197    # Other tests which have been disabled in previous nix derivations of pytorch.
198    # --exclude dataloader sparse torch utils thd_distributed distributed cpp_extensions
199    ;
200  postInstall = ''
201    mkdir $dev
202    cp -r $out/${python.sitePackages}/torch/lib     $dev/lib
203    cp -r $out/${python.sitePackages}/torch/include $dev/include
204  '';
205
206  postFixup = stdenv.lib.optionalString stdenv.isDarwin ''
207    for f in $(ls $dev/lib/*.dylib); do
208        install_name_tool -id $dev/lib/$(basename $f) $f || true
209    done
210
211    install_name_tool -change @rpath/libshm.dylib $dev/lib/libshm.dylib $dev/lib/libtorch_python.dylib
212    install_name_tool -change @rpath/libtorch.dylib $dev/lib/libtorch.dylib $dev/lib/libtorch_python.dylib
213    install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libtorch_python.dylib
214
215    install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libtorch.dylib
216
217    install_name_tool -change @rpath/libtorch.dylib $dev/lib/libtorch.dylib $dev/lib/libcaffe2_observers.dylib
218    install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libcaffe2_observers.dylib
219
220    install_name_tool -change @rpath/libtorch.dylib $dev/lib/libtorch.dylib $dev/lib/libcaffe2_module_test_dynamic.dylib
221    install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libcaffe2_module_test_dynamic.dylib
222
223    install_name_tool -change @rpath/libtorch.dylib $dev/lib/libtorch.dylib $dev/lib/libcaffe2_detectron_ops.dylib
224    install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libcaffe2_detectron_ops.dylib
225
226    install_name_tool -change @rpath/libtorch.dylib $dev/lib/libtorch.dylib $dev/lib/libshm.dylib
227    install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libshm.dylib
228  '';
229
230
231  meta = {
232    description = "Open source, prototype-to-production deep learning platform";
233    homepage    = https://pytorch.org/;
234    license     = lib.licenses.bsd3;
235    platforms   = with lib.platforms; linux ++ lib.optionals (!cudaSupport) darwin;
236    maintainers = with lib.maintainers; [ teh thoughtpolice stites tscholak ]; # tscholak esp. for darwin-related builds
237  };
238}