nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{ lib, pkgs, ... }:
2let
3 slurmconfig = {
4 services.slurm = {
5 controlMachine = "control";
6 nodeName = [ "node[1-3] CPUs=1 State=UNKNOWN" ];
7 partitionName = [ "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP" ];
8 extraConfig = ''
9 AccountingStorageHost=dbd
10 AccountingStorageType=accounting_storage/slurmdbd
11 '';
12 };
13 environment.systemPackages = [ mpitest ];
14 networking.firewall.enable = false;
15 systemd.tmpfiles.rules = [
16 "f /etc/munge/munge.key 0400 munge munge - mungeverryweakkeybuteasytointegratoinatest"
17 ];
18 };
19
20 mpitest =
21 let
22 mpitestC = pkgs.writeText "mpitest.c" ''
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <mpi.h>
26
27 int
28 main (int argc, char *argv[])
29 {
30 int rank, size, length;
31 char name[512];
32
33 MPI_Init (&argc, &argv);
34 MPI_Comm_rank (MPI_COMM_WORLD, &rank);
35 MPI_Comm_size (MPI_COMM_WORLD, &size);
36 MPI_Get_processor_name (name, &length);
37
38 if ( rank == 0 ) printf("size=%d\n", size);
39
40 printf ("%s: hello world from process %d of %d\n", name, rank, size);
41
42 MPI_Finalize ();
43
44 return EXIT_SUCCESS;
45 }
46 '';
47 in
48 pkgs.runCommand "mpitest" { } ''
49 mkdir -p $out/bin
50 ${lib.getDev pkgs.mpi}/bin/mpicc ${mpitestC} -o $out/bin/mpitest
51 '';
52in
53{
54 name = "slurm";
55
56 meta.maintainers = [ lib.maintainers.markuskowa ];
57
58 nodes =
59 let
60 computeNode =
61 { ... }:
62 {
63 imports = [ slurmconfig ];
64 # TODO slurmd port and slurmctld port should be configurations and
65 # automatically allowed by the firewall.
66 services.slurm = {
67 client.enable = true;
68 };
69 };
70 in
71 {
72
73 control =
74 { ... }:
75 {
76 imports = [ slurmconfig ];
77 services.slurm = {
78 server.enable = true;
79 };
80 };
81
82 submit =
83 { ... }:
84 {
85 imports = [ slurmconfig ];
86 services.slurm = {
87 enableStools = true;
88 };
89 };
90
91 dbd =
92 { pkgs, ... }:
93 let
94 passFile = pkgs.writeText "dbdpassword" "password123";
95 in
96 {
97 networking.firewall.enable = false;
98 systemd.tmpfiles.rules = [
99 "f /etc/munge/munge.key 0400 munge munge - mungeverryweakkeybuteasytointegratoinatest"
100 ];
101 services.slurm.dbdserver = {
102 enable = true;
103 storagePassFile = "${passFile}";
104 };
105 services.mysql = {
106 enable = true;
107 package = pkgs.mariadb;
108 initialScript = pkgs.writeText "mysql-init.sql" ''
109 CREATE USER 'slurm'@'localhost' IDENTIFIED BY 'password123';
110 GRANT ALL PRIVILEGES ON slurm_acct_db.* TO 'slurm'@'localhost';
111 '';
112 ensureDatabases = [ "slurm_acct_db" ];
113 ensureUsers = [
114 {
115 ensurePermissions = {
116 "slurm_acct_db.*" = "ALL PRIVILEGES";
117 };
118 name = "slurm";
119 }
120 ];
121 };
122 };
123
124 node1 = computeNode;
125 node2 = computeNode;
126 node3 = computeNode;
127 };
128
129 testScript = ''
130 start_all()
131
132 # Make sure DBD is up after DB initialzation
133 with subtest("can_start_slurmdbd"):
134 dbd.succeed("systemctl restart slurmdbd")
135 dbd.wait_for_unit("slurmdbd.service")
136 dbd.wait_for_open_port(6819)
137
138 # there needs to be an entry for the current
139 # cluster in the database before slurmctld is restarted
140 with subtest("add_account"):
141 control.succeed("sacctmgr -i add cluster default")
142 # check for cluster entry
143 control.succeed("sacctmgr list cluster | awk '{ print $1 }' | grep default")
144
145 with subtest("can_start_slurmctld"):
146 control.succeed("systemctl restart slurmctld")
147 control.wait_for_unit("slurmctld.service")
148
149 with subtest("can_start_slurmd"):
150 for node in [node1, node2, node3]:
151 node.succeed("systemctl restart slurmd.service")
152 node.wait_for_unit("slurmd")
153
154 # Test that the cluster works and can distribute jobs;
155
156 with subtest("run_distributed_command"):
157 # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
158 # The output must contain the 3 different names
159 submit.succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq")
160
161 with subtest("check_slurm_dbd"):
162 # find the srun job from above in the database
163 control.succeed("sleep 5")
164 control.succeed("sacct | grep hostname")
165
166 with subtest("run_PMIx_mpitest"):
167 submit.succeed("srun -N 3 --mpi=pmix mpitest | grep size=3")
168 '';
169}