Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at 20.03 238 lines 9.7 kB view raw
1{ stdenv, fetchurl, fetchgit, buildPythonPackage, python, pythonOlder, 2 cudaSupport ? false, cudatoolkit ? null, cudnn ? null, nccl ? null, magma ? null, 3 mklSupport ? false, mkl ? null, 4 openMPISupport ? false, openmpi ? null, 5 buildNamedTensor ? false, 6 buildBinaries ? false, 7 cudaArchList ? null, 8 fetchFromGitHub, lib, numpy, pyyaml, cffi, click, typing, cmake, hypothesis, numactl, 9 linkFarm, symlinkJoin, 10 11 # ninja (https://ninja-build.org) must be available to run C++ extensions tests, 12 ninja, 13 14 # dependencies for torch.utils.tensorboard 15 tensorboardSupport ? true, pillow, six, future, tensorflow-tensorboard, 16 17 utillinux, which, isPy3k }: 18 19assert !openMPISupport || openmpi != null; 20assert !tensorboardSupport || tensorflow-tensorboard != null; 21 22# assert that everything needed for cuda is present and that the correct cuda versions are used 23assert !cudaSupport || cudatoolkit != null; 24assert cudnn == null || cudatoolkit != null; 25assert !cudaSupport || (let majorIs = lib.versions.major cudatoolkit.version; 26 in majorIs == "9" || majorIs == "10"); 27 28let 29 hasDependency = dep: pkg: lib.lists.any (inp: inp == dep) pkg.buildInputs; 30 matchesCudatoolkit = hasDependency cudatoolkit; 31 matchesMkl = hasDependency mkl; 32in 33# confirm that cudatoolkits are sync'd across dependencies 34assert !(openMPISupport && cudaSupport) || matchesCudatoolkit openmpi; 35assert !cudaSupport || matchesCudatoolkit magma; 36 37# confirm that mkl is sync'd across dependencies 38assert !mklSupport || mkl != null; 39assert !(mklSupport && cudaSupport) || matchesMkl magma; 40assert !mklSupport || (numpy.blasImplementation == "mkl" && numpy.blas == mkl); 41 42let 43 cudatoolkit_joined = symlinkJoin { 44 name = "${cudatoolkit.name}-unsplit"; 45 # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs 46 paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ]; 47 }; 48 49 # Give an explicit list of supported architectures for the build, See: 50 # - pytorch bug report: https://github.com/pytorch/pytorch/issues/23573 51 # - pytorch-1.2.0 build on nixpks: https://github.com/NixOS/nixpkgs/pull/65041 52 # 53 # This list was selected by omitting the TORCH_CUDA_ARCH_LIST parameter, 54 # observing the fallback option (which selected all architectures known 55 # from cudatoolkit_10_0, pytorch-1.2, and python-3.6), and doing a binary 56 # searching to find offending architectures. 57 # 58 # NOTE: Because of sandboxing, this derivation can't auto-detect the hardware's 59 # cuda architecture, so there is also now a problem around new architectures 60 # not being supported until explicitly added to this derivation. 61 # 62 # FIXME: CMake is throwing the following warning on python-1.2: 63 # 64 # ``` 65 # CMake Warning at cmake/public/utils.cmake:172 (message): 66 # In the future we will require one to explicitly pass TORCH_CUDA_ARCH_LIST 67 # to cmake instead of implicitly setting it as an env variable. This will 68 # become a FATAL_ERROR in future version of pytorch. 69 # ``` 70 # If this is causing problems for your build, this derivation may have to strip 71 # away the standard `buildPythonPackage` and use the 72 # [*Adjust Build Options*](https://github.com/pytorch/pytorch/tree/v1.2.0#adjust-build-options-optional) 73 # instructions. This will also add more flexibility around configurations 74 # (allowing FBGEMM to be built in pytorch-1.1), and may future proof this 75 # derivation. 76 brokenArchs = [ "3.0" ]; # this variable is only used as documentation. 77 cuda9ArchList = [ 78 "3.5" 79 "5.0" 80 "5.2" 81 "6.0" 82 "6.1" 83 "7.0" 84 "7.0+PTX" # I am getting a "undefined architecture compute_75" on cuda 9 85 # which leads me to believe this is the final cuda-9-compatible architecture. 86 ]; 87 cuda10ArchList = cuda9ArchList ++ [ 88 "7.5" 89 "7.5+PTX" # < most recent architecture as of cudatoolkit_10_0 and pytorch-1.2.0 90 ]; 91 final_cudaArchList = 92 if !cudaSupport || cudaArchList != null 93 then cudaArchList 94 else 95 if lib.versions.major cudatoolkit.version == "9" 96 then cuda9ArchList 97 else cuda10ArchList; # the assert above removes any ambiguity here. 98 99 # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via 100 # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub 101 # libcuda.so from cudatoolkit for running tests, so that we don’t have 102 # to recompile pytorch on every update to nvidia-x11 or the kernel. 103 cudaStub = linkFarm "cuda-stub" [{ 104 name = "libcuda.so.1"; 105 path = "${cudatoolkit}/lib/stubs/libcuda.so"; 106 }]; 107 cudaStubEnv = lib.optionalString cudaSupport 108 "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH "; 109 110in buildPythonPackage rec { 111 version = "1.2.0"; 112 pname = "pytorch"; 113 disabled = !isPy3k; 114 115 outputs = [ 116 "out" # output standard python package 117 "dev" # output libtorch only 118 ]; 119 120 src = fetchFromGitHub { 121 owner = "pytorch"; 122 repo = "pytorch"; 123 rev = "v${version}"; 124 fetchSubmodules = true; 125 sha256 = "1biyq2p48chakf2xw7hazzqmr5ps1nx475ql8vkmxjg5zaa071cz"; 126 }; 127 128 dontUseCmakeConfigure = true; 129 130 preConfigure = lib.optionalString cudaSupport '' 131 export TORCH_CUDA_ARCH_LIST="${lib.strings.concatStringsSep ";" final_cudaArchList}" 132 export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++ 133 '' + lib.optionalString (cudaSupport && cudnn != null) '' 134 export CUDNN_INCLUDE_DIR=${cudnn}/include 135 ''; 136 137 preFixup = '' 138 function join_by { local IFS="$1"; shift; echo "$*"; } 139 function strip2 { 140 IFS=':' 141 read -ra RP <<< $(patchelf --print-rpath $1) 142 IFS=' ' 143 RP_NEW=$(join_by : ''${RP[@]:2}) 144 patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1" 145 } 146 for f in $(find ''${out} -name 'libcaffe2*.so') 147 do 148 strip2 $f 149 done 150 ''; 151 152 # Override the (weirdly) wrong version set by default. See 153 # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038 154 # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267 155 PYTORCH_BUILD_VERSION = version; 156 PYTORCH_BUILD_NUMBER = 0; 157 158 BUILD_NAMEDTENSOR = buildNamedTensor; # experimental feature 159 USE_SYSTEM_NCCL=true; # don't build pytorch's third_party NCCL 160 161 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch 162 # (upstream seems to have fixed this in the wrong place?) 163 # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc 164 # https://github.com/pytorch/pytorch/issues/22346 165 # 166 # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++: 167 # https://github.com/pytorch/pytorch/blob/v1.2.0/setup.py#L17 168 NIX_CFLAGS_COMPILE = lib.optionals (numpy.blas == mkl) [ "-Wno-error=array-bounds" ]; 169 170 nativeBuildInputs = [ 171 cmake 172 utillinux 173 which 174 ninja 175 ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ]; 176 177 buildInputs = [ 178 numpy.blas 179 ] ++ lib.optionals cudaSupport [ cudnn magma nccl ] 180 ++ lib.optionals stdenv.isLinux [ numactl ]; 181 182 propagatedBuildInputs = [ 183 cffi 184 click 185 numpy 186 pyyaml 187 ] ++ lib.optionals openMPISupport [ openmpi ] 188 ++ lib.optional (pythonOlder "3.5") typing 189 ++ lib.optionals tensorboardSupport [pillow six future tensorflow-tensorboard]; 190 191 checkInputs = [ hypothesis ninja ]; 192 193 doCheck = false; # tests take a long time for channel release, so doCheck should be overridden only when developing 194 checkPhase = "${cudaStubEnv}python test/run_test.py" 195 + " --exclude utils" # utils requires git, which is not allowed in the check phase 196 197 # Other tests which have been disabled in previous nix derivations of pytorch. 198 # --exclude dataloader sparse torch utils thd_distributed distributed cpp_extensions 199 ; 200 postInstall = '' 201 mkdir $dev 202 cp -r $out/${python.sitePackages}/torch/lib $dev/lib 203 cp -r $out/${python.sitePackages}/torch/include $dev/include 204 ''; 205 206 postFixup = stdenv.lib.optionalString stdenv.isDarwin '' 207 for f in $(ls $dev/lib/*.dylib); do 208 install_name_tool -id $dev/lib/$(basename $f) $f || true 209 done 210 211 install_name_tool -change @rpath/libshm.dylib $dev/lib/libshm.dylib $dev/lib/libtorch_python.dylib 212 install_name_tool -change @rpath/libtorch.dylib $dev/lib/libtorch.dylib $dev/lib/libtorch_python.dylib 213 install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libtorch_python.dylib 214 215 install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libtorch.dylib 216 217 install_name_tool -change @rpath/libtorch.dylib $dev/lib/libtorch.dylib $dev/lib/libcaffe2_observers.dylib 218 install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libcaffe2_observers.dylib 219 220 install_name_tool -change @rpath/libtorch.dylib $dev/lib/libtorch.dylib $dev/lib/libcaffe2_module_test_dynamic.dylib 221 install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libcaffe2_module_test_dynamic.dylib 222 223 install_name_tool -change @rpath/libtorch.dylib $dev/lib/libtorch.dylib $dev/lib/libcaffe2_detectron_ops.dylib 224 install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libcaffe2_detectron_ops.dylib 225 226 install_name_tool -change @rpath/libtorch.dylib $dev/lib/libtorch.dylib $dev/lib/libshm.dylib 227 install_name_tool -change @rpath/libc10.dylib $dev/lib/libc10.dylib $dev/lib/libshm.dylib 228 ''; 229 230 231 meta = { 232 description = "Open source, prototype-to-production deep learning platform"; 233 homepage = https://pytorch.org/; 234 license = lib.licenses.bsd3; 235 platforms = with lib.platforms; linux ++ lib.optionals (!cudaSupport) darwin; 236 maintainers = with lib.maintainers; [ teh thoughtpolice stites tscholak ]; # tscholak esp. for darwin-related builds 237 }; 238}