selftests/powerpc: Add VF recovery tests

The basic EEH test ignores VFs since we the way the eeh_dev_break debugfs
interface works means that if multiple VFs are enabled we may cause errors
on all them them. However, we can work around that by only enabling a
single VF at a time.

This patch adds some infrastructure for finding SR-IOV capable devices and
enabling / disabling VFs so we can exercise the VF specific EEH recovery
paths. Two new tests are added, one for testing EEH aware devices and one
for EEH un-aware VFs.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201103044503.917128-3-oohall@gmail.com
This commit is contained in:
Oliver O'Halloran 2020-11-03 15:45:03 +11:00 committed by Michael Ellerman
parent d6749ccba7
commit 38132cc0e5
3 changed files with 188 additions and 0 deletions

View File

@ -135,3 +135,111 @@ eeh_one_dev() {
return 0;
}
eeh_has_driver() {
test -e /sys/bus/pci/devices/$1/driver;
return $?
}
eeh_can_recover() {
# we'll get an IO error if the device's current driver doesn't support
# error recovery
echo $1 > '/sys/kernel/debug/powerpc/eeh_dev_can_recover' 2>/dev/null
return $?
}
eeh_find_all_pfs() {
devices=""
# SR-IOV on pseries requires hypervisor support, so check for that
is_pseries=""
if grep -q pSeries /proc/cpuinfo ; then
if [ ! -f /proc/device-tree/rtas/ibm,open-sriov-allow-unfreeze ] ||
[ ! -f /proc/device-tree/rtas/ibm,open-sriov-map-pe-number ] ; then
return 1;
fi
is_pseries="true"
fi
for dev in `ls -1 /sys/bus/pci/devices/` ; do
sysfs="/sys/bus/pci/devices/$dev"
if [ ! -e "$sysfs/sriov_numvfs" ] ; then
continue
fi
# skip unsupported PFs on pseries
if [ -z "$is_pseries" ] &&
[ ! -f "$sysfs/of_node/ibm,is-open-sriov-pf" ] &&
[ ! -f "$sysfs/of_node/ibm,open-sriov-vf-bar-info" ] ; then
continue;
fi
# no driver, no vfs
if ! eeh_has_driver $dev ; then
continue
fi
devices="$devices $dev"
done
if [ -z "$devices" ] ; then
return 1;
fi
echo $devices
return 0;
}
# attempts to enable one VF on each PF so we can do VF specific tests.
# stdout: list of enabled VFs, one per line
# return code: 0 if vfs are found, 1 otherwise
eeh_enable_vfs() {
pf_list="$(eeh_find_all_pfs)"
vfs=0
for dev in $pf_list ; do
pf_sysfs="/sys/bus/pci/devices/$dev"
# make sure we have a single VF
echo 0 > "$pf_sysfs/sriov_numvfs"
echo 1 > "$pf_sysfs/sriov_numvfs"
if [ "$?" != 0 ] ; then
log "Unable to enable VFs on $pf, skipping"
continue;
fi
vf="$(basename $(realpath "$pf_sysfs/virtfn0"))"
if [ $? != 0 ] ; then
log "unable to find enabled vf on $pf"
echo 0 > "$pf_sysfs/sriov_numvfs"
continue;
fi
if ! eeh_can_break $vf ; then
log "skipping "
echo 0 > "$pf_sysfs/sriov_numvfs"
continue;
fi
vfs="$((vfs + 1))"
echo $vf
done
test "$vfs" != 0
return $?
}
eeh_disable_vfs() {
pf_list="$(eeh_find_all_pfs)"
if [ -z "$pf_list" ] ; then
return 1;
fi
for dev in $pf_list ; do
echo 0 > "/sys/bus/pci/devices/$dev/sriov_numvfs"
done
return 0;
}

View File

@ -0,0 +1,45 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only
. ./eeh-functions.sh
eeh_test_prep # NB: may exit
vf_list="$(eeh_enable_vfs)";
if $? != 0 ; then
log "No usable VFs found. Skipping EEH unaware VF test"
exit $KSELFTESTS_SKIP;
fi
log "Enabled VFs: $vf_list"
tested=0
passed=0
for vf in $vf_list ; do
log "Testing $vf"
if ! eeh_can_recover $vf ; then
log "Driver for $vf doesn't support error recovery, skipping"
continue;
fi
tested="$((tested + 1))"
log "Breaking $vf..."
if ! eeh_one_dev $vf ; then
log "$vf failed to recover"
continue;
fi
passed="$((passed + 1))"
done
eeh_disable_vfs
if [ "$tested" == 0 ] ; then
echo "No VFs with EEH aware drivers found, skipping"
exit $KSELFTESTS_SKIP
fi
test "$failed" != 0
exit $?;

View File

@ -0,0 +1,35 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only
. ./eeh-functions.sh
eeh_test_prep # NB: may exit
vf_list="$(eeh_enable_vfs)";
if $? != 0 ; then
log "No usable VFs found. Skipping EEH unaware VF test"
exit $KSELFTESTS_SKIP;
fi
log "Enabled VFs: $vf_list"
failed=0
for vf in $vf_list ; do
log "Testing $vf"
if eeh_can_recover $vf ; then
log "Driver for $vf supports error recovery. Unbinding..."
echo "$vf" > /sys/bus/pci/devices/$vf/driver/unbind
fi
log "Breaking $vf..."
if ! eeh_one_dev $vf ; then
log "$vf failed to recover"
failed="$((failed + 1))"
fi
done
eeh_disable_vfs
test "$failed" != 0
exit $?;