1{ lib
2, gcc11Stdenv
3, fetchFromGitHub
4, catch2
5, cmake
6, cudaPackages_10_2
7, cudaPackages_11_8
8, cudaPackages_12
9, fmt_9
10, git
11, jsoncpp
12, libevent
13, plog
14, python3
15, symlinkJoin
16, tclap_1_4
17, yaml-cpp
18}:
19let
20 # Flags copied from DCGM's libevent build script
21 libevent-nossl = libevent.override { sslSupport = false; };
22 libevent-nossl-static = libevent-nossl.overrideAttrs (super: {
23 CFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";
24 CXXFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";
25 configureFlags = super.configureFlags ++ [ "--disable-shared" "--with-pic" ];
26 });
27
28 jsoncpp-static = jsoncpp.override { enableStatic = true; };
29
30 # DCGM depends on 3 different versions of CUDA at the same time.
31 # The runtime closure, thankfully, is quite small because most things
32 # are statically linked.
33 cudaPackageSetByVersion = [
34 {
35 version = "10";
36 # Nixpkgs cudaPackages_10 doesn't have redist packages broken out.
37 pkgSet = [
38 cudaPackages_10_2.cudatoolkit
39 cudaPackages_10_2.cudatoolkit.lib
40 ];
41 }
42 {
43 version = "11";
44 pkgSet = getCudaPackages cudaPackages_11_8;
45 }
46 {
47 version = "12";
48 pkgSet = getCudaPackages cudaPackages_12;
49 }
50 ];
51
52 # Select needed redist packages from cudaPackages
53 # C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/scripts/0080_cuda.sh#L24-L39
54 getCudaPackages = p: with p; [
55 cuda_cccl
56 cuda_cudart
57 cuda_nvcc
58 cuda_nvml_dev
59 libcublas
60 libcufft
61 libcurand
62 ];
63
64 # Builds CMake code to add CUDA paths for include and lib.
65 mkAppendCudaPaths = { version, pkgSet }:
66 let
67 # The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must
68 # combine everything together for headers to work.
69 # It would be more convenient to use symlinkJoin on *just* the include subdirectories
70 # of each package, but not all of them have an include directory and making that work
71 # is more effort than it's worth for this temporary, build-time package.
72 combined = symlinkJoin {
73 name = "cuda-combined-${version}";
74 paths = pkgSet;
75 };
76 # The combined package above breaks the build for some reason so we just configure
77 # each package's library path.
78 libs = lib.concatMapStringsSep " " (x: ''"${x}/lib"'') pkgSet;
79 in ''
80 list(APPEND Cuda${version}_INCLUDE_PATHS "${combined}/include")
81 list(APPEND Cuda${version}_LIB_PATHS ${libs})
82 '';
83
84# gcc11 is required by DCGM's very particular build system
85# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22
86in gcc11Stdenv.mkDerivation rec {
87 pname = "dcgm";
88 version = "3.2.5"; # N.B: If you change this, be sure prometheus-dcgm-exporter supports this version.
89
90 src = fetchFromGitHub {
91 owner = "NVIDIA";
92 repo = "DCGM";
93 rev = "refs/tags/v${version}";
94 hash = "sha256-iMyYOr3dSpdRV2S/TlB/tEOAWYhK09373ZRbd5vzogQ=";
95 };
96
97 # Add our paths to the CUDA paths so FindCuda.cmake can find them.
98 EXTRA_CUDA_PATHS = lib.concatMapStringsSep "\n" mkAppendCudaPaths cudaPackageSetByVersion;
99 prePatch = ''
100 echo "$EXTRA_CUDA_PATHS"$'\n'"$(cat cmake/FindCuda.cmake)" > cmake/FindCuda.cmake
101 '';
102
103 hardeningDisable = [ "all" ];
104
105 strictDeps = true;
106
107 nativeBuildInputs = [
108 # autoAddOpenGLRunpathHook does not actually depend on or incur any dependency
109 # of cudaPackages. It merely adds an impure, non-Nix PATH to the RPATHs of
110 # executables that need to use cuda at runtime.
111 cudaPackages_12.autoAddOpenGLRunpathHook
112
113 cmake
114 git
115 python3
116 ];
117
118 buildInputs = [
119 plog.dev # header-only
120 tclap_1_4 # header-only
121
122 catch2
123 fmt_9
124 jsoncpp-static
125 libevent-nossl-static
126 yaml-cpp
127 ];
128
129 disallowedReferences = lib.concatMap (x: x.pkgSet) cudaPackageSetByVersion;
130
131 meta = with lib; {
132 description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs.";
133 homepage = "https://developer.nvidia.com/dcgm";
134 license = licenses.asl20;
135 maintainers = teams.deshaw.members;
136 mainProgram = "dcgmi";
137 platforms = platforms.linux;
138 };
139}