Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0-only
3
4KSELFTESTS_SKIP=4
5
6. ./eeh-functions.sh
7
8if ! eeh_supported ; then
9 echo "EEH not supported on this system, skipping"
10 exit $KSELFTESTS_SKIP;
11fi
12
13if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
14 [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
15 echo "debugfs EEH testing files are missing. Is debugfs mounted?"
16 exit $KSELFTESTS_SKIP;
17fi
18
19pre_lspci=`mktemp`
20lspci > $pre_lspci
21
22# Bump the max freeze count to something absurd so we don't
23# trip over it while breaking things.
24echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
25
26# record the devices that we break in here. Assuming everything
27# goes to plan we should get them back once the recover process
28# is finished.
29devices=""
30
31# Build up a list of candidate devices.
32for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
33 # skip bridges since we can't recover them (yet...)
34 if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
35 echo "$dev, Skipped: bridge"
36 continue;
37 fi
38
39 # Skip VFs for now since we don't have a reliable way
40 # to break them.
41 if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then
42 echo "$dev, Skipped: virtfn"
43 continue;
44 fi
45
46 if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
47 echo "$dev, Skipped: ahci doesn't support recovery"
48 continue
49 fi
50
51 # Don't inject errosr into an already-frozen PE. This happens with
52 # PEs that contain multiple PCI devices (e.g. multi-function cards)
53 # and injecting new errors during the recovery process will probably
54 # result in the recovery failing and the device being marked as
55 # failed.
56 if ! pe_ok $dev ; then
57 echo "$dev, Skipped: Bad initial PE state"
58 continue;
59 fi
60
61 echo "$dev, Added"
62
63 # Add to this list of device to check
64 devices="$devices $dev"
65done
66
67dev_count="$(echo $devices | wc -w)"
68echo "Found ${dev_count} breakable devices..."
69
70failed=0
71for dev in $devices ; do
72 echo "Breaking $dev..."
73
74 if ! pe_ok $dev ; then
75 echo "Skipping $dev, Initial PE state is not ok"
76 failed="$((failed + 1))"
77 continue;
78 fi
79
80 if ! eeh_one_dev $dev ; then
81 failed="$((failed + 1))"
82 fi
83done
84
85echo "$failed devices failed to recover ($dev_count tested)"
86lspci | diff -u $pre_lspci -
87rm -f $pre_lspci
88
89test "$failed" == 0
90exit $?