Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ublk: scan partition in async way

Implement async partition scan to avoid IO hang when reading partition
tables. Similar to nvme_partition_scan_work(), partition scanning is
deferred to a work queue to prevent deadlocks.

When partition scan happens synchronously during add_disk(), IO errors
can cause the partition scan to wait while holding ub->mutex, which
can deadlock with other operations that need the mutex.

Changes:
- Add partition_scan_work to ublk_device structure
- Implement ublk_partition_scan_work() to perform async scan
- Always suppress sync partition scan during add_disk()
- Schedule async work after add_disk() for trusted daemons
- Add flush_work() in ublk_stop_dev() before grabbing ub->mutex

Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Reported-by: Yoav Cohen <yoav@nvidia.com>
Closes: https://lore.kernel.org/linux-block/DM4PR12MB63280C5637917C071C2F0D65A9A8A@DM4PR12MB6328.namprd12.prod.outlook.com/
Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Ming Lei and committed by
Jens Axboe
7fc4da6a 04bdb1a0

+32 -3
+32 -3
drivers/block/ublk_drv.c
··· 237 237 bool canceling; 238 238 pid_t ublksrv_tgid; 239 239 struct delayed_work exit_work; 240 + struct work_struct partition_scan_work; 240 241 241 242 struct ublk_queue *queues[]; 242 243 }; ··· 254 253 static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, 255 254 u16 q_id, u16 tag, struct ublk_io *io, size_t offset); 256 255 static inline unsigned int ublk_req_build_flags(struct request *req); 256 + 257 + static void ublk_partition_scan_work(struct work_struct *work) 258 + { 259 + struct ublk_device *ub = 260 + container_of(work, struct ublk_device, partition_scan_work); 261 + 262 + if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN, 263 + &ub->ub_disk->state))) 264 + return; 265 + 266 + mutex_lock(&ub->ub_disk->open_mutex); 267 + bdev_disk_changed(ub->ub_disk, false); 268 + mutex_unlock(&ub->ub_disk->open_mutex); 269 + } 257 270 258 271 static inline struct ublksrv_io_desc * 259 272 ublk_get_iod(const struct ublk_queue *ubq, unsigned tag) ··· 2041 2026 mutex_lock(&ub->mutex); 2042 2027 ublk_stop_dev_unlocked(ub); 2043 2028 mutex_unlock(&ub->mutex); 2029 + flush_work(&ub->partition_scan_work); 2044 2030 ublk_cancel_dev(ub); 2045 2031 } 2046 2032 ··· 2970 2954 2971 2955 ublk_apply_params(ub); 2972 2956 2973 - /* don't probe partitions if any daemon task is un-trusted */ 2974 - if (ub->unprivileged_daemons) 2975 - set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); 2957 + /* 2958 + * Suppress partition scan to avoid potential IO hang. 2959 + * 2960 + * If ublk server error occurs during partition scan, the IO may 2961 + * wait while holding ub->mutex, which can deadlock with other 2962 + * operations that need the mutex. Defer partition scan to async 2963 + * work. 2964 + * For unprivileged daemons, keep GD_SUPPRESS_PART_SCAN set 2965 + * permanently. 2966 + */ 2967 + set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); 2976 2968 2977 2969 ublk_get_device(ub); 2978 2970 ub->dev_info.state = UBLK_S_DEV_LIVE; ··· 2996 2972 goto out_put_cdev; 2997 2973 2998 2974 set_bit(UB_STATE_USED, &ub->state); 2975 + 2976 + /* Schedule async partition scan for trusted daemons */ 2977 + if (!ub->unprivileged_daemons) 2978 + schedule_work(&ub->partition_scan_work); 2999 2979 3000 2980 out_put_cdev: 3001 2981 if (ret) { ··· 3166 3138 mutex_init(&ub->mutex); 3167 3139 spin_lock_init(&ub->lock); 3168 3140 mutex_init(&ub->cancel_mutex); 3141 + INIT_WORK(&ub->partition_scan_work, ublk_partition_scan_work); 3169 3142 3170 3143 ret = ublk_alloc_dev_number(ub, header->dev_id); 3171 3144 if (ret < 0)