pkgs/os-specific/linux/dcgm/default.nix at 24.05-pre · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / os-specific / linux / dcgm / default.nix
at 24.05-pre 139 lines 4.3 kB view raw
  1{ lib
  2, gcc11Stdenv
  3, fetchFromGitHub
  4, catch2
  5, cmake
  6, cudaPackages_10_2
  7, cudaPackages_11_8
  8, cudaPackages_12
  9, fmt_9
 10, git
 11, jsoncpp
 12, libevent
 13, plog
 14, python3
 15, symlinkJoin
 16, tclap_1_4
 17, yaml-cpp
 18}:
 19let
 20  # Flags copied from DCGM's libevent build script
 21  libevent-nossl = libevent.override { sslSupport = false; };
 22  libevent-nossl-static = libevent-nossl.overrideAttrs (super: {
 23    CFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";
 24    CXXFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";
 25    configureFlags = super.configureFlags ++ [ "--disable-shared" "--with-pic" ];
 26  });
 27
 28  jsoncpp-static = jsoncpp.override { enableStatic = true; };
 29
 30  # DCGM depends on 3 different versions of CUDA at the same time.
 31  # The runtime closure, thankfully, is quite small because most things
 32  # are statically linked.
 33  cudaPackageSetByVersion = [
 34    {
 35      version = "10";
 36      # Nixpkgs cudaPackages_10 doesn't have redist packages broken out.
 37      pkgSet = [
 38        cudaPackages_10_2.cudatoolkit
 39        cudaPackages_10_2.cudatoolkit.lib
 40      ];
 41    }
 42    {
 43      version = "11";
 44      pkgSet = getCudaPackages cudaPackages_11_8;
 45    }
 46    {
 47      version = "12";
 48      pkgSet = getCudaPackages cudaPackages_12;
 49    }
 50  ];
 51
 52  # Select needed redist packages from cudaPackages
 53  # C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/scripts/0080_cuda.sh#L24-L39
 54  getCudaPackages = p: with p; [
 55    cuda_cccl
 56    cuda_cudart
 57    cuda_nvcc
 58    cuda_nvml_dev
 59    libcublas
 60    libcufft
 61    libcurand
 62  ];
 63
 64  # Builds CMake code to add CUDA paths for include and lib.
 65  mkAppendCudaPaths = { version, pkgSet }:
 66    let
 67      # The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must
 68      # combine everything together for headers to work.
 69      # It would be more convenient to use symlinkJoin on *just* the include subdirectories
 70      # of each package, but not all of them have an include directory and making that work
 71      # is more effort than it's worth for this temporary, build-time package.
 72      combined = symlinkJoin {
 73        name = "cuda-combined-${version}";
 74        paths = pkgSet;
 75      };
 76      # The combined package above breaks the build for some reason so we just configure
 77      # each package's library path.
 78      libs = lib.concatMapStringsSep " " (x: ''"${x}/lib"'') pkgSet;
 79    in ''
 80      list(APPEND Cuda${version}_INCLUDE_PATHS "${combined}/include")
 81      list(APPEND Cuda${version}_LIB_PATHS ${libs})
 82    '';
 83
 84# gcc11 is required by DCGM's very particular build system
 85# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22
 86in gcc11Stdenv.mkDerivation rec {
 87  pname = "dcgm";
 88  version = "3.2.5"; # N.B: If you change this, be sure prometheus-dcgm-exporter supports this version.
 89
 90  src = fetchFromGitHub {
 91    owner = "NVIDIA";
 92    repo = "DCGM";
 93    rev = "refs/tags/v${version}";
 94    hash = "sha256-iMyYOr3dSpdRV2S/TlB/tEOAWYhK09373ZRbd5vzogQ=";
 95  };
 96
 97  # Add our paths to the CUDA paths so FindCuda.cmake can find them.
 98  EXTRA_CUDA_PATHS = lib.concatMapStringsSep "\n" mkAppendCudaPaths cudaPackageSetByVersion;
 99  prePatch = ''
100    echo "$EXTRA_CUDA_PATHS"$'\n'"$(cat cmake/FindCuda.cmake)" > cmake/FindCuda.cmake
101  '';
102
103  hardeningDisable = [ "all" ];
104
105  strictDeps = true;
106
107  nativeBuildInputs = [
108    # autoAddOpenGLRunpathHook does not actually depend on or incur any dependency
109    # of cudaPackages. It merely adds an impure, non-Nix PATH to the RPATHs of
110    # executables that need to use cuda at runtime.
111    cudaPackages_12.autoAddOpenGLRunpathHook
112
113    cmake
114    git
115    python3
116  ];
117
118  buildInputs = [
119    plog.dev # header-only
120    tclap_1_4 # header-only
121
122    catch2
123    fmt_9
124    jsoncpp-static
125    libevent-nossl-static
126    yaml-cpp
127  ];
128
129  disallowedReferences = lib.concatMap (x: x.pkgSet) cudaPackageSetByVersion;
130
131  meta = with lib; {
132    description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs.";
133    homepage = "https://developer.nvidia.com/dcgm";
134    license = licenses.asl20;
135    maintainers = teams.deshaw.members;
136    mainProgram = "dcgmi";
137    platforms = platforms.linux;
138  };
139}