at 24.05-pre 139 lines 4.3 kB view raw
1{ lib 2, gcc11Stdenv 3, fetchFromGitHub 4, catch2 5, cmake 6, cudaPackages_10_2 7, cudaPackages_11_8 8, cudaPackages_12 9, fmt_9 10, git 11, jsoncpp 12, libevent 13, plog 14, python3 15, symlinkJoin 16, tclap_1_4 17, yaml-cpp 18}: 19let 20 # Flags copied from DCGM's libevent build script 21 libevent-nossl = libevent.override { sslSupport = false; }; 22 libevent-nossl-static = libevent-nossl.overrideAttrs (super: { 23 CFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC"; 24 CXXFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC"; 25 configureFlags = super.configureFlags ++ [ "--disable-shared" "--with-pic" ]; 26 }); 27 28 jsoncpp-static = jsoncpp.override { enableStatic = true; }; 29 30 # DCGM depends on 3 different versions of CUDA at the same time. 31 # The runtime closure, thankfully, is quite small because most things 32 # are statically linked. 33 cudaPackageSetByVersion = [ 34 { 35 version = "10"; 36 # Nixpkgs cudaPackages_10 doesn't have redist packages broken out. 37 pkgSet = [ 38 cudaPackages_10_2.cudatoolkit 39 cudaPackages_10_2.cudatoolkit.lib 40 ]; 41 } 42 { 43 version = "11"; 44 pkgSet = getCudaPackages cudaPackages_11_8; 45 } 46 { 47 version = "12"; 48 pkgSet = getCudaPackages cudaPackages_12; 49 } 50 ]; 51 52 # Select needed redist packages from cudaPackages 53 # C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/scripts/0080_cuda.sh#L24-L39 54 getCudaPackages = p: with p; [ 55 cuda_cccl 56 cuda_cudart 57 cuda_nvcc 58 cuda_nvml_dev 59 libcublas 60 libcufft 61 libcurand 62 ]; 63 64 # Builds CMake code to add CUDA paths for include and lib. 65 mkAppendCudaPaths = { version, pkgSet }: 66 let 67 # The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must 68 # combine everything together for headers to work. 69 # It would be more convenient to use symlinkJoin on *just* the include subdirectories 70 # of each package, but not all of them have an include directory and making that work 71 # is more effort than it's worth for this temporary, build-time package. 72 combined = symlinkJoin { 73 name = "cuda-combined-${version}"; 74 paths = pkgSet; 75 }; 76 # The combined package above breaks the build for some reason so we just configure 77 # each package's library path. 78 libs = lib.concatMapStringsSep " " (x: ''"${x}/lib"'') pkgSet; 79 in '' 80 list(APPEND Cuda${version}_INCLUDE_PATHS "${combined}/include") 81 list(APPEND Cuda${version}_LIB_PATHS ${libs}) 82 ''; 83 84# gcc11 is required by DCGM's very particular build system 85# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22 86in gcc11Stdenv.mkDerivation rec { 87 pname = "dcgm"; 88 version = "3.2.5"; # N.B: If you change this, be sure prometheus-dcgm-exporter supports this version. 89 90 src = fetchFromGitHub { 91 owner = "NVIDIA"; 92 repo = "DCGM"; 93 rev = "refs/tags/v${version}"; 94 hash = "sha256-iMyYOr3dSpdRV2S/TlB/tEOAWYhK09373ZRbd5vzogQ="; 95 }; 96 97 # Add our paths to the CUDA paths so FindCuda.cmake can find them. 98 EXTRA_CUDA_PATHS = lib.concatMapStringsSep "\n" mkAppendCudaPaths cudaPackageSetByVersion; 99 prePatch = '' 100 echo "$EXTRA_CUDA_PATHS"$'\n'"$(cat cmake/FindCuda.cmake)" > cmake/FindCuda.cmake 101 ''; 102 103 hardeningDisable = [ "all" ]; 104 105 strictDeps = true; 106 107 nativeBuildInputs = [ 108 # autoAddOpenGLRunpathHook does not actually depend on or incur any dependency 109 # of cudaPackages. It merely adds an impure, non-Nix PATH to the RPATHs of 110 # executables that need to use cuda at runtime. 111 cudaPackages_12.autoAddOpenGLRunpathHook 112 113 cmake 114 git 115 python3 116 ]; 117 118 buildInputs = [ 119 plog.dev # header-only 120 tclap_1_4 # header-only 121 122 catch2 123 fmt_9 124 jsoncpp-static 125 libevent-nossl-static 126 yaml-cpp 127 ]; 128 129 disallowedReferences = lib.concatMap (x: x.pkgSet) cudaPackageSetByVersion; 130 131 meta = with lib; { 132 description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs."; 133 homepage = "https://developer.nvidia.com/dcgm"; 134 license = licenses.asl20; 135 maintainers = teams.deshaw.members; 136 mainProgram = "dcgmi"; 137 platforms = platforms.linux; 138 }; 139}