Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at 23.05 421 lines 16 kB view raw
1{ stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python, 2 cudaSupport ? false, cudaPackages, magma, 3 useSystemNccl ? true, 4 MPISupport ? false, mpi, 5 buildDocs ? false, 6 7 # Native build inputs 8 cmake, util-linux, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo, 9 pythonRelaxDepsHook, 10 11 # Build inputs 12 numactl, 13 Accelerate, CoreServices, libobjc, 14 15 # Propagated build inputs 16 filelock, 17 jinja2, 18 networkx, 19 openai-triton, 20 sympy, 21 numpy, pyyaml, cffi, click, typing-extensions, 22 23 # Unit tests 24 hypothesis, psutil, 25 26 # Disable MKLDNN on aarch64-darwin, it negatively impacts performance, 27 # this is also what official pytorch build does 28 mklDnnSupport ? !(stdenv.isDarwin && stdenv.isAarch64), 29 30 # virtual pkg that consistently instantiates blas across nixpkgs 31 # See https://github.com/NixOS/nixpkgs/pull/83888 32 blas, 33 34 # ninja (https://ninja-build.org) must be available to run C++ extensions tests, 35 ninja, 36 37 linuxHeaders_5_19, 38 39 # dependencies for torch.utils.tensorboard 40 pillow, six, future, tensorboard, protobuf, 41 42 isPy3k, pythonOlder, 43 44 # ROCm dependencies 45 rocmSupport ? false, 46 gpuTargets ? [ ], 47 openmp, rocm-core, hip, rccl, miopen, miopengemm, rocrand, rocblas, 48 rocfft, rocsparse, hipsparse, rocthrust, rocprim, hipcub, roctracer, 49 rocsolver, hipfft, hipsolver, hipblas, rocminfo, rocm-thunk, rocm-comgr, 50 rocm-device-libs, rocm-runtime, rocm-opencl-runtime, hipify 51}: 52 53let 54 inherit (lib) lists strings trivial; 55 inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl; 56in 57 58assert cudaSupport -> stdenv.isLinux; 59assert cudaSupport -> (cudaPackages.cudaMajorVersion == "11"); 60 61# confirm that cudatoolkits are sync'd across dependencies 62assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit; 63assert !cudaSupport || magma.cudaPackages.cudatoolkit == cudatoolkit; 64 65let 66 setBool = v: if v then "1" else "0"; 67 68 # https://github.com/pytorch/pytorch/blob/v1.13.1/torch/utils/cpp_extension.py#L1751 69 supportedTorchCudaCapabilities = 70 let 71 real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6"]; 72 ptx = lists.map (x: "${x}+PTX") real; 73 in 74 real ++ ptx; 75 76 # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements 77 # of the first list *from* the second list. That means: 78 # lists.subtractLists a b = b - a 79 80 # For CUDA 81 supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities; 82 unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities; 83 84 # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified. 85 gpuArchWarner = supported: unsupported: 86 trivial.throwIf (supported == [ ]) 87 ( 88 "No supported GPU targets specified. Requested GPU targets: " 89 + strings.concatStringsSep ", " unsupported 90 ) 91 supported; 92 93 # Create the gpuTargetString. 94 gpuTargetString = strings.concatStringsSep ";" ( 95 if gpuTargets != [ ] then 96 # If gpuTargets is specified, it always takes priority. 97 gpuTargets 98 else if cudaSupport then 99 gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities 100 else if rocmSupport then 101 hip.gpuTargets 102 else 103 throw "No GPU targets specified" 104 ); 105 106 cudatoolkit_joined = symlinkJoin { 107 name = "${cudatoolkit.name}-unsplit"; 108 # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs 109 paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ]; 110 }; 111 112 # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via 113 # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub 114 # libcuda.so from cudatoolkit for running tests, so that we don’t have 115 # to recompile pytorch on every update to nvidia-x11 or the kernel. 116 cudaStub = linkFarm "cuda-stub" [{ 117 name = "libcuda.so.1"; 118 path = "${cudatoolkit}/lib/stubs/libcuda.so"; 119 }]; 120 cudaStubEnv = lib.optionalString cudaSupport 121 "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH "; 122 123 rocmtoolkit_joined = symlinkJoin { 124 name = "rocm-merged"; 125 126 paths = [ 127 rocm-core hip rccl miopen miopengemm rocrand rocblas 128 rocfft rocsparse hipsparse rocthrust rocprim hipcub 129 roctracer rocfft rocsolver hipfft hipsolver hipblas 130 rocminfo rocm-thunk rocm-comgr rocm-device-libs 131 rocm-runtime rocm-opencl-runtime hipify 132 ]; 133 }; 134in buildPythonPackage rec { 135 pname = "torch"; 136 # Don't forget to update torch-bin to the same version. 137 version = "2.0.1"; 138 format = "setuptools"; 139 140 disabled = pythonOlder "3.8.0"; 141 142 outputs = [ 143 "out" # output standard python package 144 "dev" # output libtorch headers 145 "lib" # output libtorch libraries 146 ]; 147 148 src = fetchFromGitHub { 149 owner = "pytorch"; 150 repo = "pytorch"; 151 rev = "refs/tags/v${version}"; 152 fetchSubmodules = true; 153 hash = "sha256-xUj77yKz3IQ3gd/G32pI4OhL3LoN1zS7eFg0/0nZp5I="; 154 }; 155 156 patches = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [ 157 # pthreadpool added support for Grand Central Dispatch in April 158 # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO) 159 # that is available starting with macOS 10.13. However, our current 160 # base is 10.12. Until we upgrade, we can fall back on the older 161 # pthread support. 162 ./pthreadpool-disable-gcd.diff 163 ]; 164 165 postPatch = lib.optionalString rocmSupport '' 166 # https://github.com/facebookincubator/gloo/pull/297 167 substituteInPlace third_party/gloo/cmake/Hipify.cmake \ 168 --replace "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}" 169 170 # Replace hard-coded rocm paths 171 substituteInPlace caffe2/CMakeLists.txt \ 172 --replace "/opt/rocm" "${rocmtoolkit_joined}" \ 173 --replace "hcc/include" "hip/include" \ 174 --replace "rocblas/include" "include/rocblas" \ 175 --replace "hipsparse/include" "include/hipsparse" 176 177 # Doesn't pick up the environment variable? 178 substituteInPlace third_party/kineto/libkineto/CMakeLists.txt \ 179 --replace "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" \ 180 --replace "/opt/rocm" "${rocmtoolkit_joined}" 181 182 # Strangely, this is never set in cmake 183 substituteInPlace cmake/public/LoadHIP.cmake \ 184 --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \ 185 "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." hip.version))})" 186 '' 187 # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc' 188 # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header. 189 + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.targetPlatform.darwinSdkVersion "11.0") '' 190 substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace '#if __cplusplus >= 201703L 191 inline void *aligned_alloc(size_t align, size_t size)' '#if __cplusplus >= 201703L && 0 192 inline void *aligned_alloc(size_t align, size_t size)' 193 ''; 194 195 preConfigure = lib.optionalString cudaSupport '' 196 export TORCH_CUDA_ARCH_LIST="${gpuTargetString}" 197 export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++ 198 '' + lib.optionalString (cudaSupport && cudnn != null) '' 199 export CUDNN_INCLUDE_DIR=${cudnn}/include 200 '' + lib.optionalString rocmSupport '' 201 export ROCM_PATH=${rocmtoolkit_joined} 202 export ROCM_SOURCE_DIR=${rocmtoolkit_joined} 203 export PYTORCH_ROCM_ARCH="${gpuTargetString}" 204 export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas" 205 python tools/amd_build/build_amd.py 206 ''; 207 208 # Use pytorch's custom configurations 209 dontUseCmakeConfigure = true; 210 211 BUILD_NAMEDTENSOR = setBool true; 212 BUILD_DOCS = setBool buildDocs; 213 214 # We only do an imports check, so do not build tests either. 215 BUILD_TEST = setBool false; 216 217 # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for 218 # it by default. PyTorch currently uses its own vendored version 219 # of oneDNN through Intel iDeep. 220 USE_MKLDNN = setBool mklDnnSupport; 221 USE_MKLDNN_CBLAS = setBool mklDnnSupport; 222 223 # Avoid using pybind11 from git submodule 224 # Also avoids pytorch exporting the headers of pybind11 225 USE_SYSTEM_BIND11 = true; 226 227 preBuild = '' 228 export MAX_JOBS=$NIX_BUILD_CORES 229 ${python.pythonForBuild.interpreter} setup.py build --cmake-only 230 ${cmake}/bin/cmake build 231 ''; 232 233 preFixup = '' 234 function join_by { local IFS="$1"; shift; echo "$*"; } 235 function strip2 { 236 IFS=':' 237 read -ra RP <<< $(patchelf --print-rpath $1) 238 IFS=' ' 239 RP_NEW=$(join_by : ''${RP[@]:2}) 240 patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1" 241 } 242 for f in $(find ''${out} -name 'libcaffe2*.so') 243 do 244 strip2 $f 245 done 246 ''; 247 248 # Override the (weirdly) wrong version set by default. See 249 # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038 250 # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267 251 PYTORCH_BUILD_VERSION = version; 252 PYTORCH_BUILD_NUMBER = 0; 253 254 USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL 255 256 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch 257 # (upstream seems to have fixed this in the wrong place?) 258 # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc 259 # https://github.com/pytorch/pytorch/issues/22346 260 # 261 # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++: 262 # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17 263 env.NIX_CFLAGS_COMPILE = toString ((lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ] 264 # Suppress gcc regression: avx512 math function raises uninitialized variable warning 265 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 266 # See also: Fails to compile with GCC 12.1.0 https://github.com/pytorch/pytorch/issues/77939 267 ++ lib.optionals (stdenv.cc.isGNU && lib.versionAtLeast stdenv.cc.version "12.0.0") [ 268 "-Wno-error=maybe-uninitialized" 269 "-Wno-error=uninitialized" 270 ] 271 # Since pytorch 2.0: 272 # gcc-12.2.0/include/c++/12.2.0/bits/new_allocator.h:158:33: error: ‘void operator delete(void*, std::size_t)’ 273 # ... called on pointer ‘<unknown>’ with nonzero offset [1, 9223372036854775800] [-Werror=free-nonheap-object] 274 ++ lib.optionals (stdenv.cc.isGNU && lib.versions.major stdenv.cc.version == "12" ) [ 275 "-Wno-error=free-nonheap-object" 276 ])); 277 278 nativeBuildInputs = [ 279 cmake 280 util-linux 281 which 282 ninja 283 pybind11 284 pythonRelaxDepsHook 285 removeReferencesTo 286 ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ] 287 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; 288 289 buildInputs = [ blas blas.provider pybind11 ] 290 ++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now 291 ++ lib.optionals cudaSupport [ cudnn nccl ] 292 ++ lib.optionals rocmSupport [ openmp ] 293 ++ lib.optionals (cudaSupport || rocmSupport) [ magma ] 294 ++ lib.optionals stdenv.isLinux [ numactl ] 295 ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ]; 296 297 propagatedBuildInputs = [ 298 cffi 299 click 300 numpy 301 pyyaml 302 303 # From install_requires: 304 filelock 305 typing-extensions 306 sympy 307 networkx 308 jinja2 309 310 # the following are required for tensorboard support 311 pillow six future tensorboard protobuf 312 ] 313 ++ lib.optionals MPISupport [ mpi ] 314 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ] 315 # rocm build requires openai-triton; 316 # openai-triton currently requires cuda_nvcc, 317 # so not including it in the cpu-only build; 318 # torch.compile relies on openai-triton, 319 # so we include it for the cuda build as well 320 ++ lib.optionals (rocmSupport || cudaSupport) [ 321 openai-triton 322 ]; 323 324 # Tests take a long time and may be flaky, so just sanity-check imports 325 doCheck = false; 326 327 pythonImportsCheck = [ 328 "torch" 329 ]; 330 331 nativeCheckInputs = [ hypothesis ninja psutil ]; 332 333 checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [ 334 "runHook preCheck" 335 cudaStubEnv 336 "${python.interpreter} test/run_test.py" 337 "--exclude" 338 (concatStringsSep " " [ 339 "utils" # utils requires git, which is not allowed in the check phase 340 341 # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors 342 # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build 343 344 # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins 345 (optionalString (majorMinor version == "1.3" ) "tensorboard") 346 ]) 347 "runHook postCheck" 348 ]; 349 350 pythonRemoveDeps = [ 351 # In our dist-info the name is just "triton" 352 "pytorch-triton-rocm" 353 ]; 354 355 postInstall = '' 356 find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' + 357 358 mkdir $dev 359 cp -r $out/${python.sitePackages}/torch/include $dev/include 360 cp -r $out/${python.sitePackages}/torch/share $dev/share 361 362 # Fix up library paths for split outputs 363 substituteInPlace \ 364 $dev/share/cmake/Torch/TorchConfig.cmake \ 365 --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib" 366 367 substituteInPlace \ 368 $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \ 369 --replace \''${_IMPORT_PREFIX}/lib "$lib/lib" 370 371 mkdir $lib 372 mv $out/${python.sitePackages}/torch/lib $lib/lib 373 ln -s $lib/lib $out/${python.sitePackages}/torch/lib 374 '' + lib.optionalString rocmSupport '' 375 substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \ 376 --replace "\''${_IMPORT_PREFIX}/lib64" "$lib/lib" 377 378 substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \ 379 --replace "/build/source/torch/include" "$dev/include" 380 ''; 381 382 postFixup = lib.optionalString stdenv.isDarwin '' 383 for f in $(ls $lib/lib/*.dylib); do 384 install_name_tool -id $lib/lib/$(basename $f) $f || true 385 done 386 387 install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib 388 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib 389 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib 390 391 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib 392 393 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib 394 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib 395 ''; 396 397 # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder. 398 requiredSystemFeatures = [ "big-parallel" ]; 399 400 passthru = { 401 inherit cudaSupport cudaPackages; 402 # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability. 403 blasProvider = blas.provider; 404 } // lib.optionalAttrs cudaSupport { 405 # NOTE: supportedCudaCapabilities isn't computed unless cudaSupport is true, so we can't use 406 # it in the passthru set above because a downstream package might try to access it even 407 # when cudaSupport is false. Better to have it missing than null or an empty list by default. 408 cudaCapabilities = supportedCudaCapabilities; 409 }; 410 411 meta = with lib; { 412 changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}"; 413 # keep PyTorch in the description so the package can be found under that name on search.nixos.org 414 description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration"; 415 homepage = "https://pytorch.org/"; 416 license = licenses.bsd3; 417 maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds 418 platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin; 419 broken = rocmSupport && cudaSupport; # CUDA and ROCm are mutually exclusive 420 }; 421}