Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at fix-function-merge 140 lines 4.3 kB view raw
1{ lib 2, gcc11Stdenv 3, fetchFromGitHub 4, autoAddDriverRunpath 5, catch2 6, cmake 7, cudaPackages_10_2 8, cudaPackages_11_8 9, cudaPackages_12 10, fmt_9 11, git 12, jsoncpp 13, libevent 14, plog 15, python3 16, symlinkJoin 17, tclap_1_4 18, yaml-cpp 19}: 20let 21 # Flags copied from DCGM's libevent build script 22 libevent-nossl = libevent.override { sslSupport = false; }; 23 libevent-nossl-static = libevent-nossl.overrideAttrs (super: { 24 CFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC"; 25 CXXFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC"; 26 configureFlags = super.configureFlags ++ [ "--disable-shared" "--with-pic" ]; 27 }); 28 29 jsoncpp-static = jsoncpp.override { enableStatic = true; }; 30 31 # DCGM depends on 3 different versions of CUDA at the same time. 32 # The runtime closure, thankfully, is quite small because most things 33 # are statically linked. 34 cudaPackageSetByVersion = [ 35 { 36 version = "10"; 37 # Nixpkgs cudaPackages_10 doesn't have redist packages broken out. 38 pkgSet = [ 39 cudaPackages_10_2.cudatoolkit 40 cudaPackages_10_2.cudatoolkit.lib 41 ]; 42 } 43 { 44 version = "11"; 45 pkgSet = getCudaPackages cudaPackages_11_8; 46 } 47 { 48 version = "12"; 49 pkgSet = getCudaPackages cudaPackages_12; 50 } 51 ]; 52 53 # Select needed redist packages from cudaPackages 54 # C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/scripts/0080_cuda.sh#L24-L39 55 getCudaPackages = p: with p; [ 56 cuda_cccl 57 cuda_cudart 58 cuda_nvcc 59 cuda_nvml_dev 60 libcublas 61 libcufft 62 libcurand 63 ]; 64 65 # Builds CMake code to add CUDA paths for include and lib. 66 mkAppendCudaPaths = { version, pkgSet }: 67 let 68 # The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must 69 # combine everything together for headers to work. 70 # It would be more convenient to use symlinkJoin on *just* the include subdirectories 71 # of each package, but not all of them have an include directory and making that work 72 # is more effort than it's worth for this temporary, build-time package. 73 combined = symlinkJoin { 74 name = "cuda-combined-${version}"; 75 paths = pkgSet; 76 }; 77 # The combined package above breaks the build for some reason so we just configure 78 # each package's library path. 79 libs = lib.concatMapStringsSep " " (x: ''"${x}/lib"'') pkgSet; 80 in '' 81 list(APPEND Cuda${version}_INCLUDE_PATHS "${combined}/include") 82 list(APPEND Cuda${version}_LIB_PATHS ${libs}) 83 ''; 84 85# gcc11 is required by DCGM's very particular build system 86# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22 87in gcc11Stdenv.mkDerivation rec { 88 pname = "dcgm"; 89 version = "3.2.5"; # N.B: If you change this, be sure prometheus-dcgm-exporter supports this version. 90 91 src = fetchFromGitHub { 92 owner = "NVIDIA"; 93 repo = "DCGM"; 94 rev = "refs/tags/v${version}"; 95 hash = "sha256-iMyYOr3dSpdRV2S/TlB/tEOAWYhK09373ZRbd5vzogQ="; 96 }; 97 98 # Add our paths to the CUDA paths so FindCuda.cmake can find them. 99 EXTRA_CUDA_PATHS = lib.concatMapStringsSep "\n" mkAppendCudaPaths cudaPackageSetByVersion; 100 prePatch = '' 101 echo "$EXTRA_CUDA_PATHS"$'\n'"$(cat cmake/FindCuda.cmake)" > cmake/FindCuda.cmake 102 ''; 103 104 hardeningDisable = [ "all" ]; 105 106 strictDeps = true; 107 108 nativeBuildInputs = [ 109 # autoAddDriverRunpath does not actually depend on or incur any dependency 110 # of cudaPackages. It merely adds an impure, non-Nix PATH to the RPATHs of 111 # executables that need to use cuda at runtime. 112 autoAddDriverRunpath 113 114 cmake 115 git 116 python3 117 ]; 118 119 buildInputs = [ 120 plog.dev # header-only 121 tclap_1_4 # header-only 122 123 catch2 124 fmt_9 125 jsoncpp-static 126 libevent-nossl-static 127 yaml-cpp 128 ]; 129 130 disallowedReferences = lib.concatMap (x: x.pkgSet) cudaPackageSetByVersion; 131 132 meta = with lib; { 133 description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs"; 134 homepage = "https://developer.nvidia.com/dcgm"; 135 license = licenses.asl20; 136 maintainers = teams.deshaw.members; 137 mainProgram = "dcgmi"; 138 platforms = platforms.linux; 139 }; 140}