Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1{ stdenv, lib, fetchFromGitHub, buildPythonPackage, python, 2 cudaSupport ? false, cudaPackages, magma, 3 useSystemNccl ? true, 4 MPISupport ? false, mpi, 5 buildDocs ? false, 6 7 # Native build inputs 8 cmake, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo, 9 pythonRelaxDepsHook, 10 11 # Build inputs 12 numactl, 13 Accelerate, CoreServices, libobjc, 14 15 # Propagated build inputs 16 filelock, 17 jinja2, 18 networkx, 19 openai-triton, 20 sympy, 21 numpy, pyyaml, cffi, click, typing-extensions, 22 23 # Unit tests 24 hypothesis, psutil, 25 26 # Disable MKLDNN on aarch64-darwin, it negatively impacts performance, 27 # this is also what official pytorch build does 28 mklDnnSupport ? !(stdenv.isDarwin && stdenv.isAarch64), 29 30 # virtual pkg that consistently instantiates blas across nixpkgs 31 # See https://github.com/NixOS/nixpkgs/pull/83888 32 blas, 33 34 # ninja (https://ninja-build.org) must be available to run C++ extensions tests, 35 ninja, 36 37 linuxHeaders_5_19, 38 39 # dependencies for torch.utils.tensorboard 40 pillow, six, future, tensorboard, protobuf, 41 42 pythonOlder, 43 44 # ROCm dependencies 45 rocmSupport ? false, 46 gpuTargets ? [ ], 47 openmp, rocm-core, hip, rccl, miopen, miopengemm, rocrand, rocblas, 48 rocfft, rocsparse, hipsparse, rocthrust, rocprim, hipcub, roctracer, 49 rocsolver, hipfft, hipsolver, hipblas, rocminfo, rocm-thunk, rocm-comgr, 50 rocm-device-libs, rocm-runtime, rocm-opencl-runtime, hipify 51}: 52 53let 54 inherit (lib) lists strings trivial; 55 inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl; 56in 57 58assert cudaSupport -> stdenv.isLinux; 59assert cudaSupport -> (cudaPackages.cudaMajorVersion == "11"); 60 61# confirm that cudatoolkits are sync'd across dependencies 62assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit; 63assert !cudaSupport || magma.cudaPackages.cudatoolkit == cudatoolkit; 64 65let 66 setBool = v: if v then "1" else "0"; 67 68 # https://github.com/pytorch/pytorch/blob/v2.0.1/torch/utils/cpp_extension.py#L1744 69 supportedTorchCudaCapabilities = 70 let 71 real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6" "8.9" "9.0"]; 72 ptx = lists.map (x: "${x}+PTX") real; 73 in 74 real ++ ptx; 75 76 # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements 77 # of the first list *from* the second list. That means: 78 # lists.subtractLists a b = b - a 79 80 # For CUDA 81 supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities; 82 unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities; 83 84 # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified. 85 gpuArchWarner = supported: unsupported: 86 trivial.throwIf (supported == [ ]) 87 ( 88 "No supported GPU targets specified. Requested GPU targets: " 89 + strings.concatStringsSep ", " unsupported 90 ) 91 supported; 92 93 # Create the gpuTargetString. 94 gpuTargetString = strings.concatStringsSep ";" ( 95 if gpuTargets != [ ] then 96 # If gpuTargets is specified, it always takes priority. 97 gpuTargets 98 else if cudaSupport then 99 gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities 100 else if rocmSupport then 101 hip.gpuTargets 102 else 103 throw "No GPU targets specified" 104 ); 105 106 cudatoolkit_joined = symlinkJoin { 107 name = "${cudatoolkit.name}-unsplit"; 108 # nccl is here purely for semantic grouping it could be moved to nativeBuildInputs 109 paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ]; 110 }; 111 112 # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via 113 # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub 114 # libcuda.so from cudatoolkit for running tests, so that we don’t have 115 # to recompile pytorch on every update to nvidia-x11 or the kernel. 116 cudaStub = linkFarm "cuda-stub" [{ 117 name = "libcuda.so.1"; 118 path = "${cudatoolkit}/lib/stubs/libcuda.so"; 119 }]; 120 cudaStubEnv = lib.optionalString cudaSupport 121 "LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH "; 122 123 rocmtoolkit_joined = symlinkJoin { 124 name = "rocm-merged"; 125 126 paths = [ 127 rocm-core hip rccl miopen miopengemm rocrand rocblas 128 rocfft rocsparse hipsparse rocthrust rocprim hipcub 129 roctracer rocfft rocsolver hipfft hipsolver hipblas 130 rocminfo rocm-thunk rocm-comgr rocm-device-libs 131 rocm-runtime rocm-opencl-runtime hipify 132 ]; 133 }; 134in buildPythonPackage rec { 135 pname = "torch"; 136 # Don't forget to update torch-bin to the same version. 137 version = "2.0.1"; 138 format = "setuptools"; 139 140 disabled = pythonOlder "3.8.0"; 141 142 outputs = [ 143 "out" # output standard python package 144 "dev" # output libtorch headers 145 "lib" # output libtorch libraries 146 ]; 147 148 src = fetchFromGitHub { 149 owner = "pytorch"; 150 repo = "pytorch"; 151 rev = "refs/tags/v${version}"; 152 fetchSubmodules = true; 153 hash = "sha256-xUj77yKz3IQ3gd/G32pI4OhL3LoN1zS7eFg0/0nZp5I="; 154 }; 155 156 patches = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [ 157 # pthreadpool added support for Grand Central Dispatch in April 158 # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO) 159 # that is available starting with macOS 10.13. However, our current 160 # base is 10.12. Until we upgrade, we can fall back on the older 161 # pthread support. 162 ./pthreadpool-disable-gcd.diff 163 ]; 164 165 postPatch = lib.optionalString rocmSupport '' 166 # https://github.com/facebookincubator/gloo/pull/297 167 substituteInPlace third_party/gloo/cmake/Hipify.cmake \ 168 --replace "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}" 169 170 # Replace hard-coded rocm paths 171 substituteInPlace caffe2/CMakeLists.txt \ 172 --replace "/opt/rocm" "${rocmtoolkit_joined}" \ 173 --replace "hcc/include" "hip/include" \ 174 --replace "rocblas/include" "include/rocblas" \ 175 --replace "hipsparse/include" "include/hipsparse" 176 177 # Doesn't pick up the environment variable? 178 substituteInPlace third_party/kineto/libkineto/CMakeLists.txt \ 179 --replace "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" \ 180 --replace "/opt/rocm" "${rocmtoolkit_joined}" 181 182 # Strangely, this is never set in cmake 183 substituteInPlace cmake/public/LoadHIP.cmake \ 184 --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \ 185 "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." hip.version))})" 186 '' 187 # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc' 188 # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header. 189 + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.targetPlatform.darwinSdkVersion "11.0") '' 190 substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace '#if __cplusplus >= 201703L 191 inline void *aligned_alloc(size_t align, size_t size)' '#if __cplusplus >= 201703L && 0 192 inline void *aligned_alloc(size_t align, size_t size)' 193 ''; 194 195 preConfigure = lib.optionalString cudaSupport '' 196 export TORCH_CUDA_ARCH_LIST="${gpuTargetString}" 197 export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++ 198 '' + lib.optionalString (cudaSupport && cudnn != null) '' 199 export CUDNN_INCLUDE_DIR=${cudnn}/include 200 '' + lib.optionalString rocmSupport '' 201 export ROCM_PATH=${rocmtoolkit_joined} 202 export ROCM_SOURCE_DIR=${rocmtoolkit_joined} 203 export PYTORCH_ROCM_ARCH="${gpuTargetString}" 204 export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas" 205 python tools/amd_build/build_amd.py 206 ''; 207 208 # Use pytorch's custom configurations 209 dontUseCmakeConfigure = true; 210 211 BUILD_NAMEDTENSOR = setBool true; 212 BUILD_DOCS = setBool buildDocs; 213 214 # We only do an imports check, so do not build tests either. 215 BUILD_TEST = setBool false; 216 217 # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for 218 # it by default. PyTorch currently uses its own vendored version 219 # of oneDNN through Intel iDeep. 220 USE_MKLDNN = setBool mklDnnSupport; 221 USE_MKLDNN_CBLAS = setBool mklDnnSupport; 222 223 # Avoid using pybind11 from git submodule 224 # Also avoids pytorch exporting the headers of pybind11 225 USE_SYSTEM_BIND11 = true; 226 227 preBuild = '' 228 export MAX_JOBS=$NIX_BUILD_CORES 229 ${python.pythonForBuild.interpreter} setup.py build --cmake-only 230 ${cmake}/bin/cmake build 231 ''; 232 233 preFixup = '' 234 function join_by { local IFS="$1"; shift; echo "$*"; } 235 function strip2 { 236 IFS=':' 237 read -ra RP <<< $(patchelf --print-rpath $1) 238 IFS=' ' 239 RP_NEW=$(join_by : ''${RP[@]:2}) 240 patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1" 241 } 242 for f in $(find ''${out} -name 'libcaffe2*.so') 243 do 244 strip2 $f 245 done 246 ''; 247 248 # Override the (weirdly) wrong version set by default. See 249 # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038 250 # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267 251 PYTORCH_BUILD_VERSION = version; 252 PYTORCH_BUILD_NUMBER = 0; 253 254 USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL 255 256 # Suppress a weird warning in mkl-dnn, part of ideep in pytorch 257 # (upstream seems to have fixed this in the wrong place?) 258 # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc 259 # https://github.com/pytorch/pytorch/issues/22346 260 # 261 # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++: 262 # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17 263 env.NIX_CFLAGS_COMPILE = toString ((lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ] 264 # Suppress gcc regression: avx512 math function raises uninitialized variable warning 265 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 266 # See also: Fails to compile with GCC 12.1.0 https://github.com/pytorch/pytorch/issues/77939 267 ++ lib.optionals (stdenv.cc.isGNU && lib.versionAtLeast stdenv.cc.version "12.0.0") [ 268 "-Wno-error=maybe-uninitialized" 269 "-Wno-error=uninitialized" 270 ] 271 # Since pytorch 2.0: 272 # gcc-12.2.0/include/c++/12.2.0/bits/new_allocator.h:158:33: error: ‘void operator delete(void*, std::size_t)’ 273 # ... called on pointer ‘<unknown>’ with nonzero offset [1, 9223372036854775800] [-Werror=free-nonheap-object] 274 ++ lib.optionals (stdenv.cc.isGNU && lib.versions.major stdenv.cc.version == "12" ) [ 275 "-Wno-error=free-nonheap-object" 276 ])); 277 278 nativeBuildInputs = [ 279 cmake 280 which 281 ninja 282 pybind11 283 pythonRelaxDepsHook 284 removeReferencesTo 285 ] ++ lib.optionals cudaSupport [ cudatoolkit_joined ] 286 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ]; 287 288 buildInputs = [ blas blas.provider pybind11 ] 289 ++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now 290 ++ lib.optionals cudaSupport [ cudnn nccl ] 291 ++ lib.optionals rocmSupport [ openmp ] 292 ++ lib.optionals (cudaSupport || rocmSupport) [ magma ] 293 ++ lib.optionals stdenv.isLinux [ numactl ] 294 ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ]; 295 296 propagatedBuildInputs = [ 297 cffi 298 click 299 numpy 300 pyyaml 301 302 # From install_requires: 303 filelock 304 typing-extensions 305 sympy 306 networkx 307 jinja2 308 309 # the following are required for tensorboard support 310 pillow six future tensorboard protobuf 311 ] 312 ++ lib.optionals MPISupport [ mpi ] 313 ++ lib.optionals rocmSupport [ rocmtoolkit_joined ] 314 # rocm build requires openai-triton; 315 # openai-triton currently requires cuda_nvcc, 316 # so not including it in the cpu-only build; 317 # torch.compile relies on openai-triton, 318 # so we include it for the cuda build as well 319 ++ lib.optionals (rocmSupport || cudaSupport) [ 320 openai-triton 321 ]; 322 323 # Tests take a long time and may be flaky, so just sanity-check imports 324 doCheck = false; 325 326 pythonImportsCheck = [ 327 "torch" 328 ]; 329 330 nativeCheckInputs = [ hypothesis ninja psutil ]; 331 332 checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [ 333 "runHook preCheck" 334 cudaStubEnv 335 "${python.interpreter} test/run_test.py" 336 "--exclude" 337 (concatStringsSep " " [ 338 "utils" # utils requires git, which is not allowed in the check phase 339 340 # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors 341 # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build 342 343 # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins 344 (optionalString (majorMinor version == "1.3" ) "tensorboard") 345 ]) 346 "runHook postCheck" 347 ]; 348 349 pythonRemoveDeps = [ 350 # In our dist-info the name is just "triton" 351 "pytorch-triton-rocm" 352 ]; 353 354 postInstall = '' 355 find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' + 356 357 mkdir $dev 358 cp -r $out/${python.sitePackages}/torch/include $dev/include 359 cp -r $out/${python.sitePackages}/torch/share $dev/share 360 361 # Fix up library paths for split outputs 362 substituteInPlace \ 363 $dev/share/cmake/Torch/TorchConfig.cmake \ 364 --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib" 365 366 substituteInPlace \ 367 $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \ 368 --replace \''${_IMPORT_PREFIX}/lib "$lib/lib" 369 370 mkdir $lib 371 mv $out/${python.sitePackages}/torch/lib $lib/lib 372 ln -s $lib/lib $out/${python.sitePackages}/torch/lib 373 '' + lib.optionalString rocmSupport '' 374 substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \ 375 --replace "\''${_IMPORT_PREFIX}/lib64" "$lib/lib" 376 377 substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \ 378 --replace "/build/source/torch/include" "$dev/include" 379 ''; 380 381 postFixup = lib.optionalString stdenv.isDarwin '' 382 for f in $(ls $lib/lib/*.dylib); do 383 install_name_tool -id $lib/lib/$(basename $f) $f || true 384 done 385 386 install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib 387 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib 388 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib 389 390 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib 391 392 install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib 393 install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib 394 ''; 395 396 # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder. 397 requiredSystemFeatures = [ "big-parallel" ]; 398 399 passthru = { 400 inherit cudaSupport cudaPackages; 401 # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability. 402 blasProvider = blas.provider; 403 } // lib.optionalAttrs cudaSupport { 404 # NOTE: supportedCudaCapabilities isn't computed unless cudaSupport is true, so we can't use 405 # it in the passthru set above because a downstream package might try to access it even 406 # when cudaSupport is false. Better to have it missing than null or an empty list by default. 407 cudaCapabilities = supportedCudaCapabilities; 408 }; 409 410 meta = with lib; { 411 changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}"; 412 # keep PyTorch in the description so the package can be found under that name on search.nixos.org 413 description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration"; 414 homepage = "https://pytorch.org/"; 415 license = licenses.bsd3; 416 maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds 417 platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin; 418 broken = rocmSupport && cudaSupport; # CUDA and ROCm are mutually exclusive 419 }; 420}