Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vhost: Release worker mutex during flushes

In the next patches where the worker can be killed while in use, we
need to be able to take the worker mutex and kill queued works for
new IO and flushes, and set some new flags to prevent new
__vhost_vq_attach_worker calls from swapping in/out killed workers.

If we are holding the worker mutex during a flush and the flush's work
is still in the queue, the worker code that will handle the SIGKILL
cleanup won't be able to take the mutex and perform it's cleanup. So
this patch has us drop the worker mutex while waiting for the flush
to complete.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
Message-Id: <20240316004707.45557-8-michael.christie@oracle.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

authored by

Mike Christie and committed by
Michael S. Tsirkin
ba704ff4 34cf9ba5

+30 -14
+30 -14
drivers/vhost/vhost.c
··· 264 264 EXPORT_SYMBOL_GPL(vhost_vq_work_queue); 265 265 266 266 /** 267 - * vhost_worker_flush - flush a worker 267 + * __vhost_worker_flush - flush a worker 268 268 * @worker: worker to flush 269 269 * 270 - * This does not use RCU to protect the worker, so the device or worker 271 - * mutex must be held. 270 + * The worker's flush_mutex must be held. 272 271 */ 273 - static void vhost_worker_flush(struct vhost_worker *worker) 272 + static void __vhost_worker_flush(struct vhost_worker *worker) 274 273 { 275 274 struct vhost_flush_struct flush; 275 + 276 + if (!worker->attachment_cnt) 277 + return; 276 278 277 279 init_completion(&flush.wait_event); 278 280 vhost_work_init(&flush.work, vhost_flush_work); 279 281 280 282 vhost_worker_queue(worker, &flush.work); 283 + /* 284 + * Drop mutex in case our worker is killed and it needs to take the 285 + * mutex to force cleanup. 286 + */ 287 + mutex_unlock(&worker->mutex); 281 288 wait_for_completion(&flush.wait_event); 289 + mutex_lock(&worker->mutex); 290 + } 291 + 292 + static void vhost_worker_flush(struct vhost_worker *worker) 293 + { 294 + mutex_lock(&worker->mutex); 295 + __vhost_worker_flush(worker); 296 + mutex_unlock(&worker->mutex); 282 297 } 283 298 284 299 void vhost_dev_flush(struct vhost_dev *dev) ··· 301 286 struct vhost_worker *worker; 302 287 unsigned long i; 303 288 304 - xa_for_each(&dev->worker_xa, i, worker) { 305 - mutex_lock(&worker->mutex); 306 - if (!worker->attachment_cnt) { 307 - mutex_unlock(&worker->mutex); 308 - continue; 309 - } 289 + xa_for_each(&dev->worker_xa, i, worker) 310 290 vhost_worker_flush(worker); 311 - mutex_unlock(&worker->mutex); 312 - } 313 291 } 314 292 EXPORT_SYMBOL_GPL(vhost_dev_flush); 315 293 ··· 681 673 * device wide flushes which doesn't use RCU for execution. 682 674 */ 683 675 mutex_lock(&old_worker->mutex); 684 - old_worker->attachment_cnt--; 685 676 /* 686 677 * We don't want to call synchronize_rcu for every vq during setup 687 678 * because it will slow down VM startup. If we haven't done ··· 691 684 mutex_lock(&vq->mutex); 692 685 if (!vhost_vq_get_backend(vq) && !vq->kick) { 693 686 mutex_unlock(&vq->mutex); 687 + 688 + old_worker->attachment_cnt--; 694 689 mutex_unlock(&old_worker->mutex); 695 690 /* 696 691 * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID. ··· 708 699 /* Make sure new vq queue/flush/poll calls see the new worker */ 709 700 synchronize_rcu(); 710 701 /* Make sure whatever was queued gets run */ 711 - vhost_worker_flush(old_worker); 702 + __vhost_worker_flush(old_worker); 703 + old_worker->attachment_cnt--; 712 704 mutex_unlock(&old_worker->mutex); 713 705 } 714 706 ··· 762 752 mutex_unlock(&worker->mutex); 763 753 return -EBUSY; 764 754 } 755 + /* 756 + * A flush might have raced and snuck in before attachment_cnt was set 757 + * to zero. Make sure flushes are flushed from the queue before 758 + * freeing. 759 + */ 760 + __vhost_worker_flush(worker); 765 761 mutex_unlock(&worker->mutex); 766 762 767 763 vhost_worker_destroy(dev, worker);