Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'vfio-for-linus' of git://github.com/awilliam/linux-vfio

Pull vfio fixes from Alex Williamson:
"VFIO doc update and virqfd race fix"

* tag 'vfio-for-linus' of git://github.com/awilliam/linux-vfio:
vfio: Fix virqfd release race
vfio: Trivial Documentation correction

+57 -21
+1 -1
Documentation/vfio.txt
··· 133 133 $ lspci -n -s 0000:06:0d.0 134 134 06:0d.0 0401: 1102:0002 (rev 08) 135 135 # echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind 136 - # echo 1102 0002 > /sys/bus/pci/drivers/vfio/new_id 136 + # echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id 137 137 138 138 Now we need to look at what other devices are in the group to free 139 139 it for use by VFIO:
+56 -20
drivers/vfio/pci/vfio_pci_intrs.c
··· 76 76 schedule_work(&virqfd->inject); 77 77 } 78 78 79 - if (flags & POLLHUP) 80 - /* The eventfd is closing, detach from VFIO */ 81 - virqfd_deactivate(virqfd); 79 + if (flags & POLLHUP) { 80 + unsigned long flags; 81 + spin_lock_irqsave(&virqfd->vdev->irqlock, flags); 82 + 83 + /* 84 + * The eventfd is closing, if the virqfd has not yet been 85 + * queued for release, as determined by testing whether the 86 + * vdev pointer to it is still valid, queue it now. As 87 + * with kvm irqfds, we know we won't race against the virqfd 88 + * going away because we hold wqh->lock to get here. 89 + */ 90 + if (*(virqfd->pvirqfd) == virqfd) { 91 + *(virqfd->pvirqfd) = NULL; 92 + virqfd_deactivate(virqfd); 93 + } 94 + 95 + spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); 96 + } 82 97 83 98 return 0; 84 99 } ··· 108 93 static void virqfd_shutdown(struct work_struct *work) 109 94 { 110 95 struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); 111 - struct virqfd **pvirqfd = virqfd->pvirqfd; 112 96 u64 cnt; 113 97 114 98 eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); ··· 115 101 eventfd_ctx_put(virqfd->eventfd); 116 102 117 103 kfree(virqfd); 118 - *pvirqfd = NULL; 119 104 } 120 105 121 106 static void virqfd_inject(struct work_struct *work) ··· 135 122 int ret = 0; 136 123 unsigned int events; 137 124 138 - if (*pvirqfd) 139 - return -EBUSY; 140 - 141 125 virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); 142 126 if (!virqfd) 143 127 return -ENOMEM; 144 128 145 129 virqfd->pvirqfd = pvirqfd; 146 - *pvirqfd = virqfd; 147 130 virqfd->vdev = vdev; 148 131 virqfd->handler = handler; 149 132 virqfd->thread = thread; ··· 161 152 } 162 153 163 154 virqfd->eventfd = ctx; 155 + 156 + /* 157 + * virqfds can be released by closing the eventfd or directly 158 + * through ioctl. These are both done through a workqueue, so 159 + * we update the pointer to the virqfd under lock to avoid 160 + * pushing multiple jobs to release the same virqfd. 161 + */ 162 + spin_lock_irq(&vdev->irqlock); 163 + 164 + if (*pvirqfd) { 165 + spin_unlock_irq(&vdev->irqlock); 166 + ret = -EBUSY; 167 + goto fail; 168 + } 169 + *pvirqfd = virqfd; 170 + 171 + spin_unlock_irq(&vdev->irqlock); 164 172 165 173 /* 166 174 * Install our own custom wake-up handling so we are notified via ··· 213 187 fput(file); 214 188 215 189 kfree(virqfd); 216 - *pvirqfd = NULL; 217 190 218 191 return ret; 219 192 } 220 193 221 - static void virqfd_disable(struct virqfd *virqfd) 194 + static void virqfd_disable(struct vfio_pci_device *vdev, 195 + struct virqfd **pvirqfd) 222 196 { 223 - if (!virqfd) 224 - return; 197 + unsigned long flags; 225 198 226 - virqfd_deactivate(virqfd); 199 + spin_lock_irqsave(&vdev->irqlock, flags); 227 200 228 - /* Block until we know all outstanding shutdown jobs have completed. */ 201 + if (*pvirqfd) { 202 + virqfd_deactivate(*pvirqfd); 203 + *pvirqfd = NULL; 204 + } 205 + 206 + spin_unlock_irqrestore(&vdev->irqlock, flags); 207 + 208 + /* 209 + * Block until we know all outstanding shutdown jobs have completed. 210 + * Even if we don't queue the job, flush the wq to be sure it's 211 + * been released. 212 + */ 229 213 flush_workqueue(vfio_irqfd_cleanup_wq); 230 214 } 231 215 ··· 428 392 static void vfio_intx_disable(struct vfio_pci_device *vdev) 429 393 { 430 394 vfio_intx_set_signal(vdev, -1); 431 - virqfd_disable(vdev->ctx[0].unmask); 432 - virqfd_disable(vdev->ctx[0].mask); 395 + virqfd_disable(vdev, &vdev->ctx[0].unmask); 396 + virqfd_disable(vdev, &vdev->ctx[0].mask); 433 397 vdev->irq_type = VFIO_PCI_NUM_IRQS; 434 398 vdev->num_ctx = 0; 435 399 kfree(vdev->ctx); ··· 575 539 vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); 576 540 577 541 for (i = 0; i < vdev->num_ctx; i++) { 578 - virqfd_disable(vdev->ctx[i].unmask); 579 - virqfd_disable(vdev->ctx[i].mask); 542 + virqfd_disable(vdev, &vdev->ctx[i].unmask); 543 + virqfd_disable(vdev, &vdev->ctx[i].mask); 580 544 } 581 545 582 546 if (msix) { ··· 613 577 vfio_send_intx_eventfd, NULL, 614 578 &vdev->ctx[0].unmask, fd); 615 579 616 - virqfd_disable(vdev->ctx[0].unmask); 580 + virqfd_disable(vdev, &vdev->ctx[0].unmask); 617 581 } 618 582 619 583 return 0;