Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0-only
3
4. ./eeh-functions.sh
5
6if ! eeh_supported ; then
7 echo "EEH not supported on this system, skipping"
8 exit 0;
9fi
10
11if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
12 [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
13 echo "debugfs EEH testing files are missing. Is debugfs mounted?"
14 exit 1;
15fi
16
17pre_lspci=`mktemp`
18lspci > $pre_lspci
19
20# Bump the max freeze count to something absurd so we don't
21# trip over it while breaking things.
22echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
23
24# record the devices that we break in here. Assuming everything
25# goes to plan we should get them back once the recover process
26# is finished.
27devices=""
28
29# Build up a list of candidate devices.
30for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
31 # skip bridges since we can't recover them (yet...)
32 if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
33 echo "$dev, Skipped: bridge"
34 continue;
35 fi
36
37 # Skip VFs for now since we don't have a reliable way
38 # to break them.
39 if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then
40 echo "$dev, Skipped: virtfn"
41 continue;
42 fi
43
44 if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
45 echo "$dev, Skipped: ahci doesn't support recovery"
46 continue
47 fi
48
49 # Don't inject errosr into an already-frozen PE. This happens with
50 # PEs that contain multiple PCI devices (e.g. multi-function cards)
51 # and injecting new errors during the recovery process will probably
52 # result in the recovery failing and the device being marked as
53 # failed.
54 if ! pe_ok $dev ; then
55 echo "$dev, Skipped: Bad initial PE state"
56 continue;
57 fi
58
59 echo "$dev, Added"
60
61 # Add to this list of device to check
62 devices="$devices $dev"
63done
64
65dev_count="$(echo $devices | wc -w)"
66echo "Found ${dev_count} breakable devices..."
67
68failed=0
69for dev in $devices ; do
70 echo "Breaking $dev..."
71
72 if ! pe_ok $dev ; then
73 echo "Skipping $dev, Initial PE state is not ok"
74 failed="$((failed + 1))"
75 continue;
76 fi
77
78 if ! eeh_one_dev $dev ; then
79 failed="$((failed + 1))"
80 fi
81done
82
83echo "$failed devices failed to recover ($dev_count tested)"
84lspci | diff -u $pre_lspci -
85rm -f $pre_lspci
86
87exit $failed