nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{ stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python,
2 cudaSupport ? false, cudaPackages, magma,
3 mklDnnSupport ? true, useSystemNccl ? true,
4 MPISupport ? false, mpi,
5 buildDocs ? false,
6 cudaArchList ? null,
7
8 # Native build inputs
9 cmake, util-linux, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo,
10
11 # Build inputs
12 numactl,
13
14 # Propagated build inputs
15 numpy, pyyaml, cffi, click, typing-extensions,
16
17 # Unit tests
18 hypothesis, psutil,
19
20 # virtual pkg that consistently instantiates blas across nixpkgs
21 # See https://github.com/NixOS/nixpkgs/pull/83888
22 blas,
23
24 # ninja (https://ninja-build.org) must be available to run C++ extensions tests,
25 ninja,
26
27 # dependencies for torch.utils.tensorboard
28 pillow, six, future, tensorboard, protobuf,
29
30 isPy3k, pythonOlder }:
31
32let
33 inherit (cudaPackages) cudatoolkit cudnn nccl;
34in
35
36# assert that everything needed for cuda is present and that the correct cuda versions are used
37assert !cudaSupport || (let majorIs = lib.versions.major cudatoolkit.version;
38 in majorIs == "9" || majorIs == "10" || majorIs == "11");
39
40# confirm that cudatoolkits are sync'd across dependencies
41assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit;
42assert !cudaSupport || magma.cudatoolkit == cudatoolkit;
43
44let
45 setBool = v: if v then "1" else "0";
46 cudatoolkit_joined = symlinkJoin {
47 name = "${cudatoolkit.name}-unsplit";
48 # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs
49 paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ];
50 };
51
52 # Give an explicit list of supported architectures for the build, See:
53 # - pytorch bug report: https://github.com/pytorch/pytorch/issues/23573
54 # - pytorch-1.2.0 build on nixpks: https://github.com/NixOS/nixpkgs/pull/65041
55 #
56 # This list was selected by omitting the TORCH_CUDA_ARCH_LIST parameter,
57 # observing the fallback option (which selected all architectures known
58 # from cudatoolkit_10_0, pytorch-1.2, and python-3.6), and doing a binary
59 # searching to find offending architectures.
60 #
61 # NOTE: Because of sandboxing, this derivation can't auto-detect the hardware's
62 # cuda architecture, so there is also now a problem around new architectures
63 # not being supported until explicitly added to this derivation.
64 #
65 # FIXME: CMake is throwing the following warning on python-1.2:
66 #
67 # ```
68 # CMake Warning at cmake/public/utils.cmake:172 (message):
69 # In the future we will require one to explicitly pass TORCH_CUDA_ARCH_LIST
70 # to cmake instead of implicitly setting it as an env variable. This will
71 # become a FATAL_ERROR in future version of pytorch.
72 # ```
73 # If this is causing problems for your build, this derivation may have to strip
74 # away the standard `buildPythonPackage` and use the
75 # [*Adjust Build Options*](https://github.com/pytorch/pytorch/tree/v1.2.0#adjust-build-options-optional)
76 # instructions. This will also add more flexibility around configurations
77 # (allowing FBGEMM to be built in pytorch-1.1), and may future proof this
78 # derivation.
79 brokenArchs = [ "3.0" ]; # this variable is only used as documentation.
80
81 cudaCapabilities = rec {
82 cuda9 = [
83 "3.5"
84 "5.0"
85 "5.2"
86 "6.0"
87 "6.1"
88 "7.0"
89 "7.0+PTX" # I am getting a "undefined architecture compute_75" on cuda 9
90 # which leads me to believe this is the final cuda-9-compatible architecture.
91 ];
92
93 cuda10 = cuda9 ++ [
94 "7.5"
95 "7.5+PTX" # < most recent architecture as of cudatoolkit_10_0 and pytorch-1.2.0
96 ];
97
98 cuda11 = cuda10 ++ [
99 "8.0"
100 "8.0+PTX" # < CUDA toolkit 11.0
101 "8.6"
102 "8.6+PTX" # < CUDA toolkit 11.1
103 ];
104 };
105 final_cudaArchList =
106 if !cudaSupport || cudaArchList != null
107 then cudaArchList
108 else cudaCapabilities."cuda${lib.versions.major cudatoolkit.version}";
109
110 # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via
111 # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub
112 # libcuda.so from cudatoolkit for running tests, so that we don’t have
113 # to recompile pytorch on every update to nvidia-x11 or the kernel.
114 cudaStub = linkFarm "cuda-stub" [{
115 name = "libcuda.so.1";
116 path = "${cudatoolkit}/lib/stubs/libcuda.so";
117 }];
118 cudaStubEnv = lib.optionalString cudaSupport
119 "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ";
120
121in buildPythonPackage rec {
122 pname = "pytorch";
123 # Don't forget to update pytorch-bin to the same version.
124 version = "1.11.0";
125 format = "setuptools";
126
127 disabled = pythonOlder "3.7.0";
128
129 outputs = [
130 "out" # output standard python package
131 "dev" # output libtorch headers
132 "lib" # output libtorch libraries
133 ];
134
135 src = fetchFromGitHub {
136 owner = "pytorch";
137 repo = "pytorch";
138 rev = "v${version}";
139 fetchSubmodules = true;
140 sha256 = "sha256-CEu63tdRBAF8CTchO3Qu8gUNObQylX6U08yDTI4/c/0=";
141 };
142
143 patches = [
144 # Fix for a breakpad incompatibility with glibc>2.33
145 # https://github.com/pytorch/pytorch/issues/70297
146 # https://github.com/google/breakpad/commit/605c51ed96ad44b34c457bbca320e74e194c317e
147 ./breakpad-sigstksz.patch
148 ] ++ lib.optionals stdenv.isDarwin [
149 # pthreadpool added support for Grand Central Dispatch in April
150 # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO)
151 # that is available starting with macOS 10.13. However, our current
152 # base is 10.12. Until we upgrade, we can fall back on the older
153 # pthread support.
154 ./pthreadpool-disable-gcd.diff
155 ];
156
157 preConfigure = lib.optionalString cudaSupport ''
158 export TORCH_CUDA_ARCH_LIST="${lib.strings.concatStringsSep ";" final_cudaArchList}"
159 export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
160 '' + lib.optionalString (cudaSupport && cudnn != null) ''
161 export CUDNN_INCLUDE_DIR=${cudnn}/include
162 '';
163
164 # Use pytorch's custom configurations
165 dontUseCmakeConfigure = true;
166
167 BUILD_NAMEDTENSOR = setBool true;
168 BUILD_DOCS = setBool buildDocs;
169
170 # We only do an imports check, so do not build tests either.
171 BUILD_TEST = setBool false;
172
173 # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for
174 # it by default. PyTorch currently uses its own vendored version
175 # of oneDNN through Intel iDeep.
176 USE_MKLDNN = setBool mklDnnSupport;
177 USE_MKLDNN_CBLAS = setBool mklDnnSupport;
178
179 preBuild = ''
180 export MAX_JOBS=$NIX_BUILD_CORES
181 ${python.interpreter} setup.py build --cmake-only
182 ${cmake}/bin/cmake build
183 '';
184
185 preFixup = ''
186 function join_by { local IFS="$1"; shift; echo "$*"; }
187 function strip2 {
188 IFS=':'
189 read -ra RP <<< $(patchelf --print-rpath $1)
190 IFS=' '
191 RP_NEW=$(join_by : ''${RP[@]:2})
192 patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1"
193 }
194 for f in $(find ''${out} -name 'libcaffe2*.so')
195 do
196 strip2 $f
197 done
198 '';
199
200 # Override the (weirdly) wrong version set by default. See
201 # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038
202 # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267
203 PYTORCH_BUILD_VERSION = version;
204 PYTORCH_BUILD_NUMBER = 0;
205
206 USE_SYSTEM_NCCL=setBool useSystemNccl; # don't build pytorch's third_party NCCL
207
208 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
209 # (upstream seems to have fixed this in the wrong place?)
210 # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc
211 # https://github.com/pytorch/pytorch/issues/22346
212 #
213 # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++:
214 # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17
215 NIX_CFLAGS_COMPILE = lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ];
216
217 nativeBuildInputs = [
218 cmake
219 util-linux
220 which
221 ninja
222 pybind11
223 removeReferencesTo
224 ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ];
225
226 buildInputs = [ blas blas.provider ]
227 ++ lib.optionals cudaSupport [ cudnn magma nccl ]
228 ++ lib.optionals stdenv.isLinux [ numactl ];
229
230 propagatedBuildInputs = [
231 cffi
232 click
233 numpy
234 pyyaml
235 typing-extensions
236 # the following are required for tensorboard support
237 pillow six future tensorboard protobuf
238 ] ++ lib.optionals MPISupport [ mpi ];
239
240 checkInputs = [ hypothesis ninja psutil ];
241
242 # Tests take a long time and may be flaky, so just sanity-check imports
243 doCheck = false;
244 pythonImportsCheck = [
245 "torch"
246 ];
247
248 checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [
249 cudaStubEnv
250 "${python.interpreter} test/run_test.py"
251 "--exclude"
252 (concatStringsSep " " [
253 "utils" # utils requires git, which is not allowed in the check phase
254
255 # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors
256 # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build
257
258 # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins
259 (optionalString (majorMinor version == "1.3" ) "tensorboard")
260 ])
261 ];
262 postInstall = ''
263 find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' +
264
265 mkdir $dev
266 cp -r $out/${python.sitePackages}/torch/include $dev/include
267 cp -r $out/${python.sitePackages}/torch/share $dev/share
268
269 # Fix up library paths for split outputs
270 substituteInPlace \
271 $dev/share/cmake/Torch/TorchConfig.cmake \
272 --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib"
273
274 substituteInPlace \
275 $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \
276 --replace \''${_IMPORT_PREFIX}/lib "$lib/lib"
277
278 mkdir $lib
279 mv $out/${python.sitePackages}/torch/lib $lib/lib
280 ln -s $lib/lib $out/${python.sitePackages}/torch/lib
281 '';
282
283 postFixup = lib.optionalString stdenv.isDarwin ''
284 for f in $(ls $lib/lib/*.dylib); do
285 install_name_tool -id $lib/lib/$(basename $f) $f || true
286 done
287
288 install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib
289 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib
290 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib
291
292 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib
293
294 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libcaffe2_observers.dylib
295 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libcaffe2_observers.dylib
296
297 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libcaffe2_module_test_dynamic.dylib
298 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libcaffe2_module_test_dynamic.dylib
299
300 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libcaffe2_detectron_ops.dylib
301 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libcaffe2_detectron_ops.dylib
302
303 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib
304 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib
305 '';
306
307 # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder.
308 requiredSystemFeatures = [ "big-parallel" ];
309
310 passthru = {
311 inherit cudaSupport cudaPackages;
312 cudaArchList = final_cudaArchList;
313 # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability.
314 blasProvider = blas.provider;
315 };
316
317 meta = with lib; {
318 # darwin: error: use of undeclared identifier 'noU'; did you mean 'no'?
319 broken = (stdenv.isLinux && stdenv.isAarch64) || stdenv.isDarwin;
320 description = "Open source, prototype-to-production deep learning platform";
321 homepage = "https://pytorch.org/";
322 license = licenses.bsd3;
323 platforms = with platforms; linux ++ lib.optionals (!cudaSupport) darwin;
324 maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds
325 };
326}