nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at 22.05 326 lines 12 kB view raw
1{ stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python, 2 cudaSupport ? false, cudaPackages, magma, 3 mklDnnSupport ? true, useSystemNccl ? true, 4 MPISupport ? false, mpi, 5 buildDocs ? false, 6 cudaArchList ? null, 7 8 # Native build inputs 9 cmake, util-linux, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo, 10 11 # Build inputs 12 numactl, 13 14 # Propagated build inputs 15 numpy, pyyaml, cffi, click, typing-extensions, 16 17 # Unit tests 18 hypothesis, psutil, 19 20 # virtual pkg that consistently instantiates blas across nixpkgs 21 # See https://github.com/NixOS/nixpkgs/pull/83888 22 blas, 23 24 # ninja (https://ninja-build.org) must be available to run C++ extensions tests, 25 ninja, 26 27 # dependencies for torch.utils.tensorboard 28 pillow, six, future, tensorboard, protobuf, 29 30 isPy3k, pythonOlder }: 31 32let 33 inherit (cudaPackages) cudatoolkit cudnn nccl; 34in 35 36# assert that everything needed for cuda is present and that the correct cuda versions are used 37assert !cudaSupport || (let majorIs = lib.versions.major cudatoolkit.version; 38 in majorIs == "9" || majorIs == "10" || majorIs == "11"); 39 40# confirm that cudatoolkits are sync'd across dependencies 41assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit; 42assert !cudaSupport || magma.cudatoolkit == cudatoolkit; 43 44let 45 setBool = v: if v then "1" else "0"; 46 cudatoolkit_joined = symlinkJoin { 47 name = "${cudatoolkit.name}-unsplit"; 48 # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs 49 paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ]; 50 }; 51 52 # Give an explicit list of supported architectures for the build, See: 53 # - pytorch bug report: https://github.com/pytorch/pytorch/issues/23573 54 # - pytorch-1.2.0 build on nixpks: https://github.com/NixOS/nixpkgs/pull/65041 55 # 56 # This list was selected by omitting the TORCH_CUDA_ARCH_LIST parameter, 57 # observing the fallback option (which selected all architectures known 58 # from cudatoolkit_10_0, pytorch-1.2, and python-3.6), and doing a binary 59 # searching to find offending architectures. 60 # 61 # NOTE: Because of sandboxing, this derivation can't auto-detect the hardware's 62 # cuda architecture, so there is also now a problem around new architectures 63 # not being supported until explicitly added to this derivation. 64 # 65 # FIXME: CMake is throwing the following warning on python-1.2: 66 # 67 # ``` 68 # CMake Warning at cmake/public/utils.cmake:172 (message): 69 # In the future we will require one to explicitly pass TORCH_CUDA_ARCH_LIST 70 # to cmake instead of implicitly setting it as an env variable. This will 71 # become a FATAL_ERROR in future version of pytorch. 72 # ``` 73 # If this is causing problems for your build, this derivation may have to strip 74 # away the standard `buildPythonPackage` and use the 75 # [*Adjust Build Options*](https://github.com/pytorch/pytorch/tree/v1.2.0#adjust-build-options-optional) 76 # instructions. This will also add more flexibility around configurations 77 # (allowing FBGEMM to be built in pytorch-1.1), and may future proof this 78 # derivation. 79 brokenArchs = [ "3.0" ]; # this variable is only used as documentation. 80 81 cudaCapabilities = rec { 82 cuda9 = [ 83 "3.5" 84 "5.0" 85 "5.2" 86 "6.0" 87 "6.1" 88 "7.0" 89 "7.0+PTX" # I am getting a "undefined architecture compute_75" on cuda 9 90 # which leads me to believe this is the final cuda-9-compatible architecture. 91 ]; 92 93 cuda10 = cuda9 ++ [ 94 "7.5" 95 "7.5+PTX" # < most recent architecture as of cudatoolkit_10_0 and pytorch-1.2.0 96 ]; 97 98 cuda11 = cuda10 ++ [ 99 "8.0" 100 "8.0+PTX" # < CUDA toolkit 11.0 101 "8.6" 102 "8.6+PTX" # < CUDA toolkit 11.1 103 ]; 104 }; 105 final_cudaArchList = 106 if !cudaSupport || cudaArchList != null 107 then cudaArchList 108 else cudaCapabilities."cuda${lib.versions.major cudatoolkit.version}"; 109 110 # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via 111 # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub 112 # libcuda.so from cudatoolkit for running tests, so that we don’t have 113 # to recompile pytorch on every update to nvidia-x11 or the kernel. 114 cudaStub = linkFarm "cuda-stub" [{ 115 name = "libcuda.so.1"; 116 path = "${cudatoolkit}/lib/stubs/libcuda.so"; 117 }]; 118 cudaStubEnv = lib.optionalString cudaSupport 119 "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH "; 120 121in buildPythonPackage rec { 122 pname = "pytorch"; 123 # Don't forget to update pytorch-bin to the same version. 124 version = "1.11.0"; 125 format = "setuptools"; 126 127 disabled = pythonOlder "3.7.0"; 128 129 outputs = [ 130 "out" # output standard python package 131 "dev" # output libtorch headers 132 "lib" # output libtorch libraries 133 ]; 134 135 src = fetchFromGitHub { 136 owner = "pytorch"; 137 repo = "pytorch"; 138 rev = "v${version}"; 139 fetchSubmodules = true; 140 sha256 = "sha256-CEu63tdRBAF8CTchO3Qu8gUNObQylX6U08yDTI4/c/0="; 141 }; 142 143 patches = [ 144 # Fix for a breakpad incompatibility with glibc>2.33 145 # https://github.com/pytorch/pytorch/issues/70297 146 # https://github.com/google/breakpad/commit/605c51ed96ad44b34c457bbca320e74e194c317e 147 ./breakpad-sigstksz.patch 148 ] ++ lib.optionals stdenv.isDarwin [ 149 # pthreadpool added support for Grand Central Dispatch in April 150 # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO) 151 # that is available starting with macOS 10.13. However, our current 152 # base is 10.12. Until we upgrade, we can fall back on the older 153 # pthread support. 154 ./pthreadpool-disable-gcd.diff 155 ]; 156 157 preConfigure = lib.optionalString cudaSupport '' 158 export TORCH_CUDA_ARCH_LIST="${lib.strings.concatStringsSep ";" final_cudaArchList}" 159 export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++ 160 '' + lib.optionalString (cudaSupport && cudnn != null) '' 161 export CUDNN_INCLUDE_DIR=${cudnn}/include 162 ''; 163 164 # Use pytorch's custom configurations 165 dontUseCmakeConfigure = true; 166 167 BUILD_NAMEDTENSOR = setBool true; 168 BUILD_DOCS = setBool buildDocs; 169 170 # We only do an imports check, so do not build tests either. 171 BUILD_TEST = setBool false; 172 173 # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for 174 # it by default. PyTorch currently uses its own vendored version 175 # of oneDNN through Intel iDeep. 176 USE_MKLDNN = setBool mklDnnSupport; 177 USE_MKLDNN_CBLAS = setBool mklDnnSupport; 178 179 preBuild = '' 180 export MAX_JOBS=$NIX_BUILD_CORES 181 ${python.interpreter} setup.py build --cmake-only 182 ${cmake}/bin/cmake build 183 ''; 184 185 preFixup = '' 186 function join_by { local IFS="$1"; shift; echo "$*"; } 187 function strip2 { 188 IFS=':' 189 read -ra RP <<< $(patchelf --print-rpath $1) 190 IFS=' ' 191 RP_NEW=$(join_by : ''${RP[@]:2}) 192 patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1" 193 } 194 for f in $(find ''${out} -name 'libcaffe2*.so') 195 do 196 strip2 $f 197 done 198 ''; 199 200 # Override the (weirdly) wrong version set by default. See 201 # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038 202 # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267 203 PYTORCH_BUILD_VERSION = version; 204 PYTORCH_BUILD_NUMBER = 0; 205 206 USE_SYSTEM_NCCL=setBool useSystemNccl; # don't build pytorch's third_party NCCL 207 208 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch 209 # (upstream seems to have fixed this in the wrong place?) 210 # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc 211 # https://github.com/pytorch/pytorch/issues/22346 212 # 213 # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++: 214 # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17 215 NIX_CFLAGS_COMPILE = lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ]; 216 217 nativeBuildInputs = [ 218 cmake 219 util-linux 220 which 221 ninja 222 pybind11 223 removeReferencesTo 224 ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ]; 225 226 buildInputs = [ blas blas.provider ] 227 ++ lib.optionals cudaSupport [ cudnn magma nccl ] 228 ++ lib.optionals stdenv.isLinux [ numactl ]; 229 230 propagatedBuildInputs = [ 231 cffi 232 click 233 numpy 234 pyyaml 235 typing-extensions 236 # the following are required for tensorboard support 237 pillow six future tensorboard protobuf 238 ] ++ lib.optionals MPISupport [ mpi ]; 239 240 checkInputs = [ hypothesis ninja psutil ]; 241 242 # Tests take a long time and may be flaky, so just sanity-check imports 243 doCheck = false; 244 pythonImportsCheck = [ 245 "torch" 246 ]; 247 248 checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [ 249 cudaStubEnv 250 "${python.interpreter} test/run_test.py" 251 "--exclude" 252 (concatStringsSep " " [ 253 "utils" # utils requires git, which is not allowed in the check phase 254 255 # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors 256 # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build 257 258 # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins 259 (optionalString (majorMinor version == "1.3" ) "tensorboard") 260 ]) 261 ]; 262 postInstall = '' 263 find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' + 264 265 mkdir $dev 266 cp -r $out/${python.sitePackages}/torch/include $dev/include 267 cp -r $out/${python.sitePackages}/torch/share $dev/share 268 269 # Fix up library paths for split outputs 270 substituteInPlace \ 271 $dev/share/cmake/Torch/TorchConfig.cmake \ 272 --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib" 273 274 substituteInPlace \ 275 $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \ 276 --replace \''${_IMPORT_PREFIX}/lib "$lib/lib" 277 278 mkdir $lib 279 mv $out/${python.sitePackages}/torch/lib $lib/lib 280 ln -s $lib/lib $out/${python.sitePackages}/torch/lib 281 ''; 282 283 postFixup = lib.optionalString stdenv.isDarwin '' 284 for f in $(ls $lib/lib/*.dylib); do 285 install_name_tool -id $lib/lib/$(basename $f) $f || true 286 done 287 288 install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib 289 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib 290 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib 291 292 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib 293 294 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libcaffe2_observers.dylib 295 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libcaffe2_observers.dylib 296 297 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libcaffe2_module_test_dynamic.dylib 298 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libcaffe2_module_test_dynamic.dylib 299 300 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libcaffe2_detectron_ops.dylib 301 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libcaffe2_detectron_ops.dylib 302 303 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib 304 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib 305 ''; 306 307 # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder. 308 requiredSystemFeatures = [ "big-parallel" ]; 309 310 passthru = { 311 inherit cudaSupport cudaPackages; 312 cudaArchList = final_cudaArchList; 313 # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability. 314 blasProvider = blas.provider; 315 }; 316 317 meta = with lib; { 318 # darwin: error: use of undeclared identifier 'noU'; did you mean 'no'? 319 broken = (stdenv.isLinux && stdenv.isAarch64) || stdenv.isDarwin; 320 description = "Open source, prototype-to-production deep learning platform"; 321 homepage = "https://pytorch.org/"; 322 license = licenses.bsd3; 323 platforms = with platforms; linux ++ lib.optionals (!cudaSupport) darwin; 324 maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds 325 }; 326}