1{ stdenv, lib, fetchFromGitHub, buildPythonPackage, python,
2 cudaSupport ? false, cudaPackages, magma,
3 useSystemNccl ? true,
4 MPISupport ? false, mpi,
5 buildDocs ? false,
6
7 # Native build inputs
8 cmake, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo,
9 pythonRelaxDepsHook,
10
11 # Build inputs
12 numactl,
13 Accelerate, CoreServices, libobjc,
14
15 # Propagated build inputs
16 filelock,
17 jinja2,
18 networkx,
19 openai-triton,
20 sympy,
21 numpy, pyyaml, cffi, click, typing-extensions,
22
23 # Unit tests
24 hypothesis, psutil,
25
26 # Disable MKLDNN on aarch64-darwin, it negatively impacts performance,
27 # this is also what official pytorch build does
28 mklDnnSupport ? !(stdenv.isDarwin && stdenv.isAarch64),
29
30 # virtual pkg that consistently instantiates blas across nixpkgs
31 # See https://github.com/NixOS/nixpkgs/pull/83888
32 blas,
33
34 # ninja (https://ninja-build.org) must be available to run C++ extensions tests,
35 ninja,
36
37 linuxHeaders_5_19,
38
39 # dependencies for torch.utils.tensorboard
40 pillow, six, future, tensorboard, protobuf,
41
42 pythonOlder,
43
44 # ROCm dependencies
45 rocmSupport ? false,
46 gpuTargets ? [ ],
47 openmp, rocm-core, hip, rccl, miopen, miopengemm, rocrand, rocblas,
48 rocfft, rocsparse, hipsparse, rocthrust, rocprim, hipcub, roctracer,
49 rocsolver, hipfft, hipsolver, hipblas, rocminfo, rocm-thunk, rocm-comgr,
50 rocm-device-libs, rocm-runtime, rocm-opencl-runtime, hipify
51}:
52
53let
54 inherit (lib) lists strings trivial;
55 inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
56in
57
58assert cudaSupport -> stdenv.isLinux;
59assert cudaSupport -> (cudaPackages.cudaMajorVersion == "11");
60
61# confirm that cudatoolkits are sync'd across dependencies
62assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit;
63assert !cudaSupport || magma.cudaPackages.cudatoolkit == cudatoolkit;
64
65let
66 setBool = v: if v then "1" else "0";
67
68 # https://github.com/pytorch/pytorch/blob/v2.0.1/torch/utils/cpp_extension.py#L1744
69 supportedTorchCudaCapabilities =
70 let
71 real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6" "8.9" "9.0"];
72 ptx = lists.map (x: "${x}+PTX") real;
73 in
74 real ++ ptx;
75
76 # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements
77 # of the first list *from* the second list. That means:
78 # lists.subtractLists a b = b - a
79
80 # For CUDA
81 supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities;
82 unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities;
83
84 # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified.
85 gpuArchWarner = supported: unsupported:
86 trivial.throwIf (supported == [ ])
87 (
88 "No supported GPU targets specified. Requested GPU targets: "
89 + strings.concatStringsSep ", " unsupported
90 )
91 supported;
92
93 # Create the gpuTargetString.
94 gpuTargetString = strings.concatStringsSep ";" (
95 if gpuTargets != [ ] then
96 # If gpuTargets is specified, it always takes priority.
97 gpuTargets
98 else if cudaSupport then
99 gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities
100 else if rocmSupport then
101 hip.gpuTargets
102 else
103 throw "No GPU targets specified"
104 );
105
106 cudatoolkit_joined = symlinkJoin {
107 name = "${cudatoolkit.name}-unsplit";
108 # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs
109 paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ];
110 };
111
112 # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via
113 # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub
114 # libcuda.so from cudatoolkit for running tests, so that we don’t have
115 # to recompile pytorch on every update to nvidia-x11 or the kernel.
116 cudaStub = linkFarm "cuda-stub" [{
117 name = "libcuda.so.1";
118 path = "${cudatoolkit}/lib/stubs/libcuda.so";
119 }];
120 cudaStubEnv = lib.optionalString cudaSupport
121 "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ";
122
123 rocmtoolkit_joined = symlinkJoin {
124 name = "rocm-merged";
125
126 paths = [
127 rocm-core hip rccl miopen miopengemm rocrand rocblas
128 rocfft rocsparse hipsparse rocthrust rocprim hipcub
129 roctracer rocfft rocsolver hipfft hipsolver hipblas
130 rocminfo rocm-thunk rocm-comgr rocm-device-libs
131 rocm-runtime rocm-opencl-runtime hipify
132 ];
133 };
134in buildPythonPackage rec {
135 pname = "torch";
136 # Don't forget to update torch-bin to the same version.
137 version = "2.0.1";
138 format = "setuptools";
139
140 disabled = pythonOlder "3.8.0";
141
142 outputs = [
143 "out" # output standard python package
144 "dev" # output libtorch headers
145 "lib" # output libtorch libraries
146 ];
147
148 src = fetchFromGitHub {
149 owner = "pytorch";
150 repo = "pytorch";
151 rev = "refs/tags/v${version}";
152 fetchSubmodules = true;
153 hash = "sha256-xUj77yKz3IQ3gd/G32pI4OhL3LoN1zS7eFg0/0nZp5I=";
154 };
155
156 patches = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
157 # pthreadpool added support for Grand Central Dispatch in April
158 # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO)
159 # that is available starting with macOS 10.13. However, our current
160 # base is 10.12. Until we upgrade, we can fall back on the older
161 # pthread support.
162 ./pthreadpool-disable-gcd.diff
163 ];
164
165 postPatch = lib.optionalString rocmSupport ''
166 # https://github.com/facebookincubator/gloo/pull/297
167 substituteInPlace third_party/gloo/cmake/Hipify.cmake \
168 --replace "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}"
169
170 # Replace hard-coded rocm paths
171 substituteInPlace caffe2/CMakeLists.txt \
172 --replace "/opt/rocm" "${rocmtoolkit_joined}" \
173 --replace "hcc/include" "hip/include" \
174 --replace "rocblas/include" "include/rocblas" \
175 --replace "hipsparse/include" "include/hipsparse"
176
177 # Doesn't pick up the environment variable?
178 substituteInPlace third_party/kineto/libkineto/CMakeLists.txt \
179 --replace "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" \
180 --replace "/opt/rocm" "${rocmtoolkit_joined}"
181
182 # Strangely, this is never set in cmake
183 substituteInPlace cmake/public/LoadHIP.cmake \
184 --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \
185 "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." hip.version))})"
186 ''
187 # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc'
188 # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header.
189 + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.targetPlatform.darwinSdkVersion "11.0") ''
190 substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace '#if __cplusplus >= 201703L
191 inline void *aligned_alloc(size_t align, size_t size)' '#if __cplusplus >= 201703L && 0
192 inline void *aligned_alloc(size_t align, size_t size)'
193 '';
194
195 preConfigure = lib.optionalString cudaSupport ''
196 export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
197 export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
198 '' + lib.optionalString (cudaSupport && cudnn != null) ''
199 export CUDNN_INCLUDE_DIR=${cudnn}/include
200 '' + lib.optionalString rocmSupport ''
201 export ROCM_PATH=${rocmtoolkit_joined}
202 export ROCM_SOURCE_DIR=${rocmtoolkit_joined}
203 export PYTORCH_ROCM_ARCH="${gpuTargetString}"
204 export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas"
205 python tools/amd_build/build_amd.py
206 '';
207
208 # Use pytorch's custom configurations
209 dontUseCmakeConfigure = true;
210
211 BUILD_NAMEDTENSOR = setBool true;
212 BUILD_DOCS = setBool buildDocs;
213
214 # We only do an imports check, so do not build tests either.
215 BUILD_TEST = setBool false;
216
217 # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for
218 # it by default. PyTorch currently uses its own vendored version
219 # of oneDNN through Intel iDeep.
220 USE_MKLDNN = setBool mklDnnSupport;
221 USE_MKLDNN_CBLAS = setBool mklDnnSupport;
222
223 # Avoid using pybind11 from git submodule
224 # Also avoids pytorch exporting the headers of pybind11
225 USE_SYSTEM_BIND11 = true;
226
227 preBuild = ''
228 export MAX_JOBS=$NIX_BUILD_CORES
229 ${python.pythonForBuild.interpreter} setup.py build --cmake-only
230 ${cmake}/bin/cmake build
231 '';
232
233 preFixup = ''
234 function join_by { local IFS="$1"; shift; echo "$*"; }
235 function strip2 {
236 IFS=':'
237 read -ra RP <<< $(patchelf --print-rpath $1)
238 IFS=' '
239 RP_NEW=$(join_by : ''${RP[@]:2})
240 patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1"
241 }
242 for f in $(find ''${out} -name 'libcaffe2*.so')
243 do
244 strip2 $f
245 done
246 '';
247
248 # Override the (weirdly) wrong version set by default. See
249 # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038
250 # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267
251 PYTORCH_BUILD_VERSION = version;
252 PYTORCH_BUILD_NUMBER = 0;
253
254 USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL
255
256 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
257 # (upstream seems to have fixed this in the wrong place?)
258 # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc
259 # https://github.com/pytorch/pytorch/issues/22346
260 #
261 # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++:
262 # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17
263 env.NIX_CFLAGS_COMPILE = toString ((lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ]
264 # Suppress gcc regression: avx512 math function raises uninitialized variable warning
265 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
266 # See also: Fails to compile with GCC 12.1.0 https://github.com/pytorch/pytorch/issues/77939
267 ++ lib.optionals (stdenv.cc.isGNU && lib.versionAtLeast stdenv.cc.version "12.0.0") [
268 "-Wno-error=maybe-uninitialized"
269 "-Wno-error=uninitialized"
270 ]
271 # Since pytorch 2.0:
272 # gcc-12.2.0/include/c++/12.2.0/bits/new_allocator.h:158:33: error: ‘void operator delete(void*, std::size_t)’
273 # ... called on pointer ‘<unknown>’ with nonzero offset [1, 9223372036854775800] [-Werror=free-nonheap-object]
274 ++ lib.optionals (stdenv.cc.isGNU && lib.versions.major stdenv.cc.version == "12" ) [
275 "-Wno-error=free-nonheap-object"
276 ]));
277
278 nativeBuildInputs = [
279 cmake
280 which
281 ninja
282 pybind11
283 pythonRelaxDepsHook
284 removeReferencesTo
285 ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ]
286 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
287
288 buildInputs = [ blas blas.provider pybind11 ]
289 ++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now
290 ++ lib.optionals cudaSupport [ cudnn nccl ]
291 ++ lib.optionals rocmSupport [ openmp ]
292 ++ lib.optionals (cudaSupport || rocmSupport) [ magma ]
293 ++ lib.optionals stdenv.isLinux [ numactl ]
294 ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ];
295
296 propagatedBuildInputs = [
297 cffi
298 click
299 numpy
300 pyyaml
301
302 # From install_requires:
303 filelock
304 typing-extensions
305 sympy
306 networkx
307 jinja2
308
309 # the following are required for tensorboard support
310 pillow six future tensorboard protobuf
311 ]
312 ++ lib.optionals MPISupport [ mpi ]
313 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]
314 # rocm build requires openai-triton;
315 # openai-triton currently requires cuda_nvcc,
316 # so not including it in the cpu-only build;
317 # torch.compile relies on openai-triton,
318 # so we include it for the cuda build as well
319 ++ lib.optionals (rocmSupport || cudaSupport) [
320 openai-triton
321 ];
322
323 # Tests take a long time and may be flaky, so just sanity-check imports
324 doCheck = false;
325
326 pythonImportsCheck = [
327 "torch"
328 ];
329
330 nativeCheckInputs = [ hypothesis ninja psutil ];
331
332 checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [
333 "runHook preCheck"
334 cudaStubEnv
335 "${python.interpreter} test/run_test.py"
336 "--exclude"
337 (concatStringsSep " " [
338 "utils" # utils requires git, which is not allowed in the check phase
339
340 # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors
341 # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build
342
343 # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins
344 (optionalString (majorMinor version == "1.3" ) "tensorboard")
345 ])
346 "runHook postCheck"
347 ];
348
349 pythonRemoveDeps = [
350 # In our dist-info the name is just "triton"
351 "pytorch-triton-rocm"
352 ];
353
354 postInstall = ''
355 find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' +
356
357 mkdir $dev
358 cp -r $out/${python.sitePackages}/torch/include $dev/include
359 cp -r $out/${python.sitePackages}/torch/share $dev/share
360
361 # Fix up library paths for split outputs
362 substituteInPlace \
363 $dev/share/cmake/Torch/TorchConfig.cmake \
364 --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib"
365
366 substituteInPlace \
367 $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \
368 --replace \''${_IMPORT_PREFIX}/lib "$lib/lib"
369
370 mkdir $lib
371 mv $out/${python.sitePackages}/torch/lib $lib/lib
372 ln -s $lib/lib $out/${python.sitePackages}/torch/lib
373 '' + lib.optionalString rocmSupport ''
374 substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \
375 --replace "\''${_IMPORT_PREFIX}/lib64" "$lib/lib"
376
377 substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \
378 --replace "/build/source/torch/include" "$dev/include"
379 '';
380
381 postFixup = lib.optionalString stdenv.isDarwin ''
382 for f in $(ls $lib/lib/*.dylib); do
383 install_name_tool -id $lib/lib/$(basename $f) $f || true
384 done
385
386 install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib
387 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib
388 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib
389
390 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib
391
392 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib
393 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib
394 '';
395
396 # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder.
397 requiredSystemFeatures = [ "big-parallel" ];
398
399 passthru = {
400 inherit cudaSupport cudaPackages;
401 # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability.
402 blasProvider = blas.provider;
403 } // lib.optionalAttrs cudaSupport {
404 # NOTE: supportedCudaCapabilities isn't computed unless cudaSupport is true, so we can't use
405 # it in the passthru set above because a downstream package might try to access it even
406 # when cudaSupport is false. Better to have it missing than null or an empty list by default.
407 cudaCapabilities = supportedCudaCapabilities;
408 };
409
410 meta = with lib; {
411 changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}";
412 # keep PyTorch in the description so the package can be found under that name on search.nixos.org
413 description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration";
414 homepage = "https://pytorch.org/";
415 license = licenses.bsd3;
416 maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds
417 platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin;
418 broken = rocmSupport && cudaSupport; # CUDA and ROCm are mutually exclusive
419 };
420}