pkgs/os-specific/linux/dcgm/default.nix at fix-function-merge · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / pkgs / os-specific / linux / dcgm / default.nix
at fix-function-merge 140 lines 4.3 kB view raw
  1{ lib
  2, gcc11Stdenv
  3, fetchFromGitHub
  4, autoAddDriverRunpath
  5, catch2
  6, cmake
  7, cudaPackages_10_2
  8, cudaPackages_11_8
  9, cudaPackages_12
 10, fmt_9
 11, git
 12, jsoncpp
 13, libevent
 14, plog
 15, python3
 16, symlinkJoin
 17, tclap_1_4
 18, yaml-cpp
 19}:
 20let
 21  # Flags copied from DCGM's libevent build script
 22  libevent-nossl = libevent.override { sslSupport = false; };
 23  libevent-nossl-static = libevent-nossl.overrideAttrs (super: {
 24    CFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";
 25    CXXFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";
 26    configureFlags = super.configureFlags ++ [ "--disable-shared" "--with-pic" ];
 27  });
 28
 29  jsoncpp-static = jsoncpp.override { enableStatic = true; };
 30
 31  # DCGM depends on 3 different versions of CUDA at the same time.
 32  # The runtime closure, thankfully, is quite small because most things
 33  # are statically linked.
 34  cudaPackageSetByVersion = [
 35    {
 36      version = "10";
 37      # Nixpkgs cudaPackages_10 doesn't have redist packages broken out.
 38      pkgSet = [
 39        cudaPackages_10_2.cudatoolkit
 40        cudaPackages_10_2.cudatoolkit.lib
 41      ];
 42    }
 43    {
 44      version = "11";
 45      pkgSet = getCudaPackages cudaPackages_11_8;
 46    }
 47    {
 48      version = "12";
 49      pkgSet = getCudaPackages cudaPackages_12;
 50    }
 51  ];
 52
 53  # Select needed redist packages from cudaPackages
 54  # C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/scripts/0080_cuda.sh#L24-L39
 55  getCudaPackages = p: with p; [
 56    cuda_cccl
 57    cuda_cudart
 58    cuda_nvcc
 59    cuda_nvml_dev
 60    libcublas
 61    libcufft
 62    libcurand
 63  ];
 64
 65  # Builds CMake code to add CUDA paths for include and lib.
 66  mkAppendCudaPaths = { version, pkgSet }:
 67    let
 68      # The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must
 69      # combine everything together for headers to work.
 70      # It would be more convenient to use symlinkJoin on *just* the include subdirectories
 71      # of each package, but not all of them have an include directory and making that work
 72      # is more effort than it's worth for this temporary, build-time package.
 73      combined = symlinkJoin {
 74        name = "cuda-combined-${version}";
 75        paths = pkgSet;
 76      };
 77      # The combined package above breaks the build for some reason so we just configure
 78      # each package's library path.
 79      libs = lib.concatMapStringsSep " " (x: ''"${x}/lib"'') pkgSet;
 80    in ''
 81      list(APPEND Cuda${version}_INCLUDE_PATHS "${combined}/include")
 82      list(APPEND Cuda${version}_LIB_PATHS ${libs})
 83    '';
 84
 85# gcc11 is required by DCGM's very particular build system
 86# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22
 87in gcc11Stdenv.mkDerivation rec {
 88  pname = "dcgm";
 89  version = "3.2.5"; # N.B: If you change this, be sure prometheus-dcgm-exporter supports this version.
 90
 91  src = fetchFromGitHub {
 92    owner = "NVIDIA";
 93    repo = "DCGM";
 94    rev = "refs/tags/v${version}";
 95    hash = "sha256-iMyYOr3dSpdRV2S/TlB/tEOAWYhK09373ZRbd5vzogQ=";
 96  };
 97
 98  # Add our paths to the CUDA paths so FindCuda.cmake can find them.
 99  EXTRA_CUDA_PATHS = lib.concatMapStringsSep "\n" mkAppendCudaPaths cudaPackageSetByVersion;
100  prePatch = ''
101    echo "$EXTRA_CUDA_PATHS"$'\n'"$(cat cmake/FindCuda.cmake)" > cmake/FindCuda.cmake
102  '';
103
104  hardeningDisable = [ "all" ];
105
106  strictDeps = true;
107
108  nativeBuildInputs = [
109    # autoAddDriverRunpath does not actually depend on or incur any dependency
110    # of cudaPackages. It merely adds an impure, non-Nix PATH to the RPATHs of
111    # executables that need to use cuda at runtime.
112    autoAddDriverRunpath
113
114    cmake
115    git
116    python3
117  ];
118
119  buildInputs = [
120    plog.dev # header-only
121    tclap_1_4 # header-only
122
123    catch2
124    fmt_9
125    jsoncpp-static
126    libevent-nossl-static
127    yaml-cpp
128  ];
129
130  disallowedReferences = lib.concatMap (x: x.pkgSet) cudaPackageSetByVersion;
131
132  meta = with lib; {
133    description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs";
134    homepage = "https://developer.nvidia.com/dcgm";
135    license = licenses.asl20;
136    maintainers = teams.deshaw.members;
137    mainProgram = "dcgmi";
138    platforms = platforms.linux;
139  };
140}