1{ stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python,
2 cudaSupport ? false, cudaPackages, magma,
3 useSystemNccl ? true,
4 MPISupport ? false, mpi,
5 buildDocs ? false,
6
7 # Native build inputs
8 cmake, util-linux, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo,
9 pythonRelaxDepsHook,
10
11 # Build inputs
12 numactl,
13 Accelerate, CoreServices, libobjc,
14
15 # Propagated build inputs
16 filelock,
17 jinja2,
18 networkx,
19 openai-triton,
20 sympy,
21 numpy, pyyaml, cffi, click, typing-extensions,
22
23 # Unit tests
24 hypothesis, psutil,
25
26 # Disable MKLDNN on aarch64-darwin, it negatively impacts performance,
27 # this is also what official pytorch build does
28 mklDnnSupport ? !(stdenv.isDarwin && stdenv.isAarch64),
29
30 # virtual pkg that consistently instantiates blas across nixpkgs
31 # See https://github.com/NixOS/nixpkgs/pull/83888
32 blas,
33
34 # ninja (https://ninja-build.org) must be available to run C++ extensions tests,
35 ninja,
36
37 linuxHeaders_5_19,
38
39 # dependencies for torch.utils.tensorboard
40 pillow, six, future, tensorboard, protobuf,
41
42 isPy3k, pythonOlder,
43
44 # ROCm dependencies
45 rocmSupport ? false,
46 gpuTargets ? [ ],
47 openmp, rocm-core, hip, rccl, miopen, miopengemm, rocrand, rocblas,
48 rocfft, rocsparse, hipsparse, rocthrust, rocprim, hipcub, roctracer,
49 rocsolver, hipfft, hipsolver, hipblas, rocminfo, rocm-thunk, rocm-comgr,
50 rocm-device-libs, rocm-runtime, rocm-opencl-runtime, hipify
51}:
52
53let
54 inherit (lib) lists strings trivial;
55 inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
56in
57
58assert cudaSupport -> stdenv.isLinux;
59assert cudaSupport -> (cudaPackages.cudaMajorVersion == "11");
60
61# confirm that cudatoolkits are sync'd across dependencies
62assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit;
63assert !cudaSupport || magma.cudaPackages.cudatoolkit == cudatoolkit;
64
65let
66 setBool = v: if v then "1" else "0";
67
68 # https://github.com/pytorch/pytorch/blob/v1.13.1/torch/utils/cpp_extension.py#L1751
69 supportedTorchCudaCapabilities =
70 let
71 real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6"];
72 ptx = lists.map (x: "${x}+PTX") real;
73 in
74 real ++ ptx;
75
76 # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements
77 # of the first list *from* the second list. That means:
78 # lists.subtractLists a b = b - a
79
80 # For CUDA
81 supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities;
82 unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities;
83
84 # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified.
85 gpuArchWarner = supported: unsupported:
86 trivial.throwIf (supported == [ ])
87 (
88 "No supported GPU targets specified. Requested GPU targets: "
89 + strings.concatStringsSep ", " unsupported
90 )
91 supported;
92
93 # Create the gpuTargetString.
94 gpuTargetString = strings.concatStringsSep ";" (
95 if gpuTargets != [ ] then
96 # If gpuTargets is specified, it always takes priority.
97 gpuTargets
98 else if cudaSupport then
99 gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities
100 else if rocmSupport then
101 hip.gpuTargets
102 else
103 throw "No GPU targets specified"
104 );
105
106 cudatoolkit_joined = symlinkJoin {
107 name = "${cudatoolkit.name}-unsplit";
108 # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs
109 paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ];
110 };
111
112 # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via
113 # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub
114 # libcuda.so from cudatoolkit for running tests, so that we don’t have
115 # to recompile pytorch on every update to nvidia-x11 or the kernel.
116 cudaStub = linkFarm "cuda-stub" [{
117 name = "libcuda.so.1";
118 path = "${cudatoolkit}/lib/stubs/libcuda.so";
119 }];
120 cudaStubEnv = lib.optionalString cudaSupport
121 "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ";
122
123 rocmtoolkit_joined = symlinkJoin {
124 name = "rocm-merged";
125
126 paths = [
127 rocm-core hip rccl miopen miopengemm rocrand rocblas
128 rocfft rocsparse hipsparse rocthrust rocprim hipcub
129 roctracer rocfft rocsolver hipfft hipsolver hipblas
130 rocminfo rocm-thunk rocm-comgr rocm-device-libs
131 rocm-runtime rocm-opencl-runtime hipify
132 ];
133 };
134in buildPythonPackage rec {
135 pname = "torch";
136 # Don't forget to update torch-bin to the same version.
137 version = "2.0.1";
138 format = "setuptools";
139
140 disabled = pythonOlder "3.8.0";
141
142 outputs = [
143 "out" # output standard python package
144 "dev" # output libtorch headers
145 "lib" # output libtorch libraries
146 ];
147
148 src = fetchFromGitHub {
149 owner = "pytorch";
150 repo = "pytorch";
151 rev = "refs/tags/v${version}";
152 fetchSubmodules = true;
153 hash = "sha256-xUj77yKz3IQ3gd/G32pI4OhL3LoN1zS7eFg0/0nZp5I=";
154 };
155
156 patches = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
157 # pthreadpool added support for Grand Central Dispatch in April
158 # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO)
159 # that is available starting with macOS 10.13. However, our current
160 # base is 10.12. Until we upgrade, we can fall back on the older
161 # pthread support.
162 ./pthreadpool-disable-gcd.diff
163 ];
164
165 postPatch = lib.optionalString rocmSupport ''
166 # https://github.com/facebookincubator/gloo/pull/297
167 substituteInPlace third_party/gloo/cmake/Hipify.cmake \
168 --replace "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}"
169
170 # Replace hard-coded rocm paths
171 substituteInPlace caffe2/CMakeLists.txt \
172 --replace "/opt/rocm" "${rocmtoolkit_joined}" \
173 --replace "hcc/include" "hip/include" \
174 --replace "rocblas/include" "include/rocblas" \
175 --replace "hipsparse/include" "include/hipsparse"
176
177 # Doesn't pick up the environment variable?
178 substituteInPlace third_party/kineto/libkineto/CMakeLists.txt \
179 --replace "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" \
180 --replace "/opt/rocm" "${rocmtoolkit_joined}"
181
182 # Strangely, this is never set in cmake
183 substituteInPlace cmake/public/LoadHIP.cmake \
184 --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \
185 "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." hip.version))})"
186 ''
187 # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc'
188 # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header.
189 + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.targetPlatform.darwinSdkVersion "11.0") ''
190 substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace '#if __cplusplus >= 201703L
191 inline void *aligned_alloc(size_t align, size_t size)' '#if __cplusplus >= 201703L && 0
192 inline void *aligned_alloc(size_t align, size_t size)'
193 '';
194
195 preConfigure = lib.optionalString cudaSupport ''
196 export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
197 export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
198 '' + lib.optionalString (cudaSupport && cudnn != null) ''
199 export CUDNN_INCLUDE_DIR=${cudnn}/include
200 '' + lib.optionalString rocmSupport ''
201 export ROCM_PATH=${rocmtoolkit_joined}
202 export ROCM_SOURCE_DIR=${rocmtoolkit_joined}
203 export PYTORCH_ROCM_ARCH="${gpuTargetString}"
204 export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas"
205 python tools/amd_build/build_amd.py
206 '';
207
208 # Use pytorch's custom configurations
209 dontUseCmakeConfigure = true;
210
211 BUILD_NAMEDTENSOR = setBool true;
212 BUILD_DOCS = setBool buildDocs;
213
214 # We only do an imports check, so do not build tests either.
215 BUILD_TEST = setBool false;
216
217 # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for
218 # it by default. PyTorch currently uses its own vendored version
219 # of oneDNN through Intel iDeep.
220 USE_MKLDNN = setBool mklDnnSupport;
221 USE_MKLDNN_CBLAS = setBool mklDnnSupport;
222
223 # Avoid using pybind11 from git submodule
224 # Also avoids pytorch exporting the headers of pybind11
225 USE_SYSTEM_BIND11 = true;
226
227 preBuild = ''
228 export MAX_JOBS=$NIX_BUILD_CORES
229 ${python.pythonForBuild.interpreter} setup.py build --cmake-only
230 ${cmake}/bin/cmake build
231 '';
232
233 preFixup = ''
234 function join_by { local IFS="$1"; shift; echo "$*"; }
235 function strip2 {
236 IFS=':'
237 read -ra RP <<< $(patchelf --print-rpath $1)
238 IFS=' '
239 RP_NEW=$(join_by : ''${RP[@]:2})
240 patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1"
241 }
242 for f in $(find ''${out} -name 'libcaffe2*.so')
243 do
244 strip2 $f
245 done
246 '';
247
248 # Override the (weirdly) wrong version set by default. See
249 # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038
250 # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267
251 PYTORCH_BUILD_VERSION = version;
252 PYTORCH_BUILD_NUMBER = 0;
253
254 USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL
255
256 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
257 # (upstream seems to have fixed this in the wrong place?)
258 # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc
259 # https://github.com/pytorch/pytorch/issues/22346
260 #
261 # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++:
262 # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17
263 env.NIX_CFLAGS_COMPILE = toString ((lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ]
264 # Suppress gcc regression: avx512 math function raises uninitialized variable warning
265 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
266 # See also: Fails to compile with GCC 12.1.0 https://github.com/pytorch/pytorch/issues/77939
267 ++ lib.optionals (stdenv.cc.isGNU && lib.versionAtLeast stdenv.cc.version "12.0.0") [
268 "-Wno-error=maybe-uninitialized"
269 "-Wno-error=uninitialized"
270 ]
271 # Since pytorch 2.0:
272 # gcc-12.2.0/include/c++/12.2.0/bits/new_allocator.h:158:33: error: ‘void operator delete(void*, std::size_t)’
273 # ... called on pointer ‘<unknown>’ with nonzero offset [1, 9223372036854775800] [-Werror=free-nonheap-object]
274 ++ lib.optionals (stdenv.cc.isGNU && lib.versions.major stdenv.cc.version == "12" ) [
275 "-Wno-error=free-nonheap-object"
276 ]));
277
278 nativeBuildInputs = [
279 cmake
280 util-linux
281 which
282 ninja
283 pybind11
284 pythonRelaxDepsHook
285 removeReferencesTo
286 ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ]
287 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
288
289 buildInputs = [ blas blas.provider pybind11 ]
290 ++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now
291 ++ lib.optionals cudaSupport [ cudnn nccl ]
292 ++ lib.optionals rocmSupport [ openmp ]
293 ++ lib.optionals (cudaSupport || rocmSupport) [ magma ]
294 ++ lib.optionals stdenv.isLinux [ numactl ]
295 ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ];
296
297 propagatedBuildInputs = [
298 cffi
299 click
300 numpy
301 pyyaml
302
303 # From install_requires:
304 filelock
305 typing-extensions
306 sympy
307 networkx
308 jinja2
309
310 # the following are required for tensorboard support
311 pillow six future tensorboard protobuf
312 ]
313 ++ lib.optionals MPISupport [ mpi ]
314 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]
315 # rocm build requires openai-triton;
316 # openai-triton currently requires cuda_nvcc,
317 # so not including it in the cpu-only build;
318 # torch.compile relies on openai-triton,
319 # so we include it for the cuda build as well
320 ++ lib.optionals (rocmSupport || cudaSupport) [
321 openai-triton
322 ];
323
324 # Tests take a long time and may be flaky, so just sanity-check imports
325 doCheck = false;
326
327 pythonImportsCheck = [
328 "torch"
329 ];
330
331 nativeCheckInputs = [ hypothesis ninja psutil ];
332
333 checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [
334 "runHook preCheck"
335 cudaStubEnv
336 "${python.interpreter} test/run_test.py"
337 "--exclude"
338 (concatStringsSep " " [
339 "utils" # utils requires git, which is not allowed in the check phase
340
341 # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors
342 # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build
343
344 # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins
345 (optionalString (majorMinor version == "1.3" ) "tensorboard")
346 ])
347 "runHook postCheck"
348 ];
349
350 pythonRemoveDeps = [
351 # In our dist-info the name is just "triton"
352 "pytorch-triton-rocm"
353 ];
354
355 postInstall = ''
356 find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' +
357
358 mkdir $dev
359 cp -r $out/${python.sitePackages}/torch/include $dev/include
360 cp -r $out/${python.sitePackages}/torch/share $dev/share
361
362 # Fix up library paths for split outputs
363 substituteInPlace \
364 $dev/share/cmake/Torch/TorchConfig.cmake \
365 --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib"
366
367 substituteInPlace \
368 $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \
369 --replace \''${_IMPORT_PREFIX}/lib "$lib/lib"
370
371 mkdir $lib
372 mv $out/${python.sitePackages}/torch/lib $lib/lib
373 ln -s $lib/lib $out/${python.sitePackages}/torch/lib
374 '' + lib.optionalString rocmSupport ''
375 substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \
376 --replace "\''${_IMPORT_PREFIX}/lib64" "$lib/lib"
377
378 substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \
379 --replace "/build/source/torch/include" "$dev/include"
380 '';
381
382 postFixup = lib.optionalString stdenv.isDarwin ''
383 for f in $(ls $lib/lib/*.dylib); do
384 install_name_tool -id $lib/lib/$(basename $f) $f || true
385 done
386
387 install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib
388 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib
389 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib
390
391 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib
392
393 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib
394 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib
395 '';
396
397 # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder.
398 requiredSystemFeatures = [ "big-parallel" ];
399
400 passthru = {
401 inherit cudaSupport cudaPackages;
402 # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability.
403 blasProvider = blas.provider;
404 } // lib.optionalAttrs cudaSupport {
405 # NOTE: supportedCudaCapabilities isn't computed unless cudaSupport is true, so we can't use
406 # it in the passthru set above because a downstream package might try to access it even
407 # when cudaSupport is false. Better to have it missing than null or an empty list by default.
408 cudaCapabilities = supportedCudaCapabilities;
409 };
410
411 meta = with lib; {
412 changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}";
413 # keep PyTorch in the description so the package can be found under that name on search.nixos.org
414 description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration";
415 homepage = "https://pytorch.org/";
416 license = licenses.bsd3;
417 maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds
418 platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin;
419 broken = rocmSupport && cudaSupport; # CUDA and ROCm are mutually exclusive
420 };
421}