Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/ublk: add test for async partition scan

Add test_generic_15.sh to verify that async partition scan prevents
IO hang when reading partition tables.

The test creates ublk devices with fault_inject target and very large
delay (60s) to simulate blocked partition table reads, then kills the
daemon to verify proper state transitions without hanging:

1. Without recovery support:
- Create device with fault_inject and 60s delay
- Kill daemon while partition scan may be blocked
- Verify device transitions to DEAD state

2. With recovery support (-r 1):
- Create device with fault_inject, 60s delay, and recovery
- Kill daemon while partition scan may be blocked
- Verify device transitions to QUIESCED state

Before the async partition scan fix, killing the daemon during
partition scan would cause deadlock as partition scan held ub->mutex
while waiting for IO. With the async fix, partition scan happens in
a work function and flush_work() ensures proper synchronization.

Add _add_ublk_dev_no_settle() helper function to skip udevadm settle,
which would otherwise hang waiting for partition scan events to
complete when partition table read is delayed.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Ming Lei and committed by
Jens Axboe
60cf8637 7fc4da6a

+81 -4
+1
tools/testing/selftests/ublk/Makefile
··· 22 22 TEST_PROGS += test_generic_12.sh 23 23 TEST_PROGS += test_generic_13.sh 24 24 TEST_PROGS += test_generic_14.sh 25 + TEST_PROGS += test_generic_15.sh 25 26 26 27 TEST_PROGS += test_null_01.sh 27 28 TEST_PROGS += test_null_02.sh
+12 -4
tools/testing/selftests/ublk/test_common.sh
··· 178 178 _create_ublk_dev() { 179 179 local dev_id; 180 180 local cmd=$1 181 + local settle=$2 181 182 182 - shift 1 183 + shift 2 183 184 184 185 if [ ! -c /dev/ublk-control ]; then 185 186 return ${UBLK_SKIP_CODE} ··· 195 194 echo "fail to add ublk dev $*" 196 195 return 255 197 196 fi 198 - udevadm settle 197 + 198 + if [ "$settle" = "yes" ]; then 199 + udevadm settle 200 + fi 199 201 200 202 if [[ "$dev_id" =~ ^[0-9]+$ ]]; then 201 203 echo "${dev_id}" ··· 208 204 } 209 205 210 206 _add_ublk_dev() { 211 - _create_ublk_dev "add" "$@" 207 + _create_ublk_dev "add" "yes" "$@" 208 + } 209 + 210 + _add_ublk_dev_no_settle() { 211 + _create_ublk_dev "add" "no" "$@" 212 212 } 213 213 214 214 _recover_ublk_dev() { 215 215 local dev_id 216 216 local state 217 217 218 - dev_id=$(_create_ublk_dev "recover" "$@") 218 + dev_id=$(_create_ublk_dev "recover" "yes" "$@") 219 219 for ((j=0;j<20;j++)); do 220 220 state=$(_get_ublk_dev_state "${dev_id}") 221 221 [ "$state" == "LIVE" ] && break
+68
tools/testing/selftests/ublk/test_generic_15.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh 5 + 6 + TID="generic_15" 7 + ERR_CODE=0 8 + 9 + _test_partition_scan_no_hang() 10 + { 11 + local recovery_flag=$1 12 + local expected_state=$2 13 + local dev_id 14 + local state 15 + local daemon_pid 16 + local start_time 17 + local elapsed 18 + 19 + # Create ublk device with fault_inject target and very large delay 20 + # to simulate hang during partition table read 21 + # --delay_us 60000000 = 60 seconds delay 22 + # Use _add_ublk_dev_no_settle to avoid udevadm settle hang waiting 23 + # for partition scan events to complete 24 + if [ "$recovery_flag" = "yes" ]; then 25 + echo "Testing partition scan with recovery support..." 26 + dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000 -r 1) 27 + else 28 + echo "Testing partition scan without recovery..." 29 + dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000) 30 + fi 31 + 32 + _check_add_dev "$TID" $? 33 + 34 + # The add command should return quickly because partition scan is async. 35 + # Now sleep briefly to let the async partition scan work start and hit 36 + # the delay in the fault_inject handler. 37 + sleep 1 38 + 39 + # Kill the ublk daemon while partition scan is potentially blocked 40 + # And check state transitions properly 41 + start_time=${SECONDS} 42 + daemon_pid=$(_get_ublk_daemon_pid "${dev_id}") 43 + state=$(__ublk_kill_daemon "${dev_id}" "${expected_state}") 44 + elapsed=$((SECONDS - start_time)) 45 + 46 + # Verify the device transitioned to expected state 47 + if [ "$state" != "${expected_state}" ]; then 48 + echo "FAIL: Device state is $state, expected ${expected_state}" 49 + ERR_CODE=255 50 + ${UBLK_PROG} del -n "${dev_id}" > /dev/null 2>&1 51 + return 52 + fi 53 + echo "PASS: Device transitioned to ${expected_state} in ${elapsed}s without hanging" 54 + 55 + # Clean up the device 56 + ${UBLK_PROG} del -n "${dev_id}" > /dev/null 2>&1 57 + } 58 + 59 + _prep_test "partition_scan" "verify async partition scan prevents IO hang" 60 + 61 + # Test 1: Without recovery support - should transition to DEAD 62 + _test_partition_scan_no_hang "no" "DEAD" 63 + 64 + # Test 2: With recovery support - should transition to QUIESCED 65 + _test_partition_scan_no_hang "yes" "QUIESCED" 66 + 67 + _cleanup_test "partition_scan" 68 + _show_result $TID $ERR_CODE