Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0+
3#
4# Run a series of tests on remote systems under KVM.
5#
6# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
7# kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
8#
9# Copyright (C) 2021 Facebook, Inc.
10#
11# Authors: Paul E. McKenney <paulmck@kernel.org>
12
13scriptname=$0
14args="$*"
15
16if ! test -d tools/testing/selftests/rcutorture/bin
17then
18 echo $scriptname must be run from top-level directory of kernel source tree.
19 exit 1
20fi
21
22KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
23PATH=${KVM}/bin:$PATH; export PATH
24. functions.sh
25
26starttime="`get_starttime`"
27
28systems="$1"
29if test -z "$systems"
30then
31 echo $scriptname: Empty list of systems will go nowhere good, giving up.
32 exit 1
33fi
34shift
35
36# Pathnames:
37# T: /tmp/kvm-remote.sh.$$
38# resdir: /tmp/kvm-remote.sh.$$/res
39# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix)
40# oldrun: `pwd`/tools/testing/.../res/$otherds
41#
42# Pathname segments:
43# TD: kvm-remote.sh.$$
44# ds: yyyy.mm.dd-hh.mm.ss-remote
45
46TD=kvm-remote.sh.$$
47T=${TMPDIR-/tmp}/$TD
48trap 'rm -rf $T' 0
49mkdir $T
50
51resdir="$T/res"
52ds=`date +%Y.%m.%d-%H.%M.%S`-remote
53rundir=$resdir/$ds
54echo Results directory: $rundir
55echo $scriptname $args
56if echo $1 | grep -q '^--'
57then
58 # Fresh build. Create a datestamp unless the caller supplied one.
59 datestamp="`echo "$@" | awk -v ds="$ds" '{
60 for (i = 1; i < NF; i++) {
61 if ($i == "--datestamp") {
62 ds = "";
63 break;
64 }
65 }
66 if (ds != "")
67 print "--datestamp " ds;
68 }'`"
69 kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
70 ret=$?
71 if test "$ret" -ne 0
72 then
73 echo $scriptname: kvm.sh failed exit code $?
74 cat $T/kvm.sh.out
75 exit 2
76 fi
77 oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
78 touch "$oldrun/remote-log"
79 echo $scriptname $args >> "$oldrun/remote-log"
80 echo | tee -a "$oldrun/remote-log"
81 echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
82 cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
83 # We are going to run this, so remove the buildonly files.
84 rm -f "$oldrun"/*/buildonly
85 kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
86 ret=$?
87 if test "$ret" -ne 0
88 then
89 echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
90 cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
91 exit 2
92 fi
93else
94 # Re-use old run.
95 oldrun="$1"
96 if ! echo $oldrun | grep -q '^/'
97 then
98 oldrun="`pwd`/$oldrun"
99 fi
100 shift
101 touch "$oldrun/remote-log"
102 echo $scriptname $args >> "$oldrun/remote-log"
103 kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
104 ret=$?
105 if test "$ret" -ne 0
106 then
107 echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
108 cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
109 exit 2
110 fi
111 cp -a "$rundir" "$KVM/res/"
112 oldrun="$KVM/res/$ds"
113fi
114echo | tee -a "$oldrun/remote-log"
115echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
116cat $T/kvm-again.sh.out
117echo | tee -a "$oldrun/remote-log"
118echo Remote run directory: $rundir | tee -a "$oldrun/remote-log"
119echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
120
121# Create the kvm-remote-N.sh scripts in the bin directory.
122awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
123{
124 n = $1;
125 sub(/\./, "", n);
126 fn = dest "/kvm-remote-" n ".sh"
127 print "kvm-remote-noreap.sh " rundir " &" > fn;
128 scenarios = "";
129 for (i = 2; i <= NF; i++)
130 scenarios = scenarios " " $i;
131 print "kvm-test-1-run-batch.sh" scenarios >> fn;
132 print "sync" >> fn;
133 print "rm " rundir "/remote.run" >> fn;
134}'
135chmod +x $T/bin/kvm-remote-*.sh
136( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
137
138# Check first to avoid the need for cleanup for system-name typos
139for i in $systems
140do
141 ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
142 echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
143 ret=$?
144 if test "$ret" -ne 0
145 then
146 echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
147 exit 4 | tee -a "$oldrun/remote-log"
148 fi
149done
150
151# Download and expand the tarball on all systems.
152echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
153for i in $systems
154do
155 echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
156 cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
157 ret=$?
158 if test "$ret" -ne 0
159 then
160 echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
161 exit 10 | tee -a "$oldrun/remote-log"
162 fi
163done
164
165# Function to check for presence of a file on the specified system.
166# Complain if the system cannot be reached, and retry after a wait.
167# Currently just waits forever if a machine disappears.
168#
169# Usage: checkremotefile system pathname
170checkremotefile () {
171 local ret
172 local sleeptime=60
173
174 while :
175 do
176 ssh $1 "test -f \"$2\""
177 ret=$?
178 if test "$ret" -eq 255
179 then
180 echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
181 elif test "$ret" -eq 0
182 then
183 return 0
184 elif test "$ret" -eq 1
185 then
186 echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\"
187 return 1
188 else
189 echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date`
190 return $ret
191 fi
192 sleep $sleeptime
193 done
194}
195
196# Function to start batches on idle remote $systems
197#
198# Usage: startbatches curbatch nbatches
199#
200# Batches are numbered starting at 1. Returns the next batch to start.
201# Be careful to redirect all debug output to FD 2 (stderr).
202startbatches () {
203 local curbatch="$1"
204 local nbatches="$2"
205 local ret
206
207 # Each pass through the following loop examines one system.
208 for i in $systems
209 do
210 if test "$curbatch" -gt "$nbatches"
211 then
212 echo $((nbatches + 1))
213 return 0
214 fi
215 if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
216 then
217 continue # System still running last test, skip.
218 fi
219 ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
220 ret=$?
221 if test "$ret" -ne 0
222 then
223 echo ssh $i failed: exitcode $ret 1>&2
224 exit 11
225 fi
226 echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
227 curbatch=$((curbatch + 1))
228 done
229 echo $curbatch
230}
231
232# Launch all the scenarios.
233nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
234curbatch=1
235while test "$curbatch" -le "$nbatches"
236do
237 startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
238 curbatch="`cat $T/curbatch`"
239 if test -s "$T/startbatches.stderr"
240 then
241 cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
242 fi
243 if test "$curbatch" -le "$nbatches"
244 then
245 sleep 30
246 fi
247done
248echo All batches started. `date`
249
250# Wait for all remaining scenarios to complete and collect results.
251for i in $systems
252do
253 while checkremotefile "$i" "$resdir/$ds/remote.run"
254 do
255 sleep 30
256 done
257 echo " ---" Collecting results from $i `date`
258 ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
259done
260
261( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
262exit "`cat $T/exitcode`"