Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PCI: Add pci_try_reset_function(), pci_try_reset_slot(), pci_try_reset_bus()

When doing a function/slot/bus reset PCI grabs the device_lock for each
device to block things like suspend and driver probes, but call paths exist
where this lock may already be held. This creates an opportunity for
deadlock. For instance, vfio allows userspace to issue resets so long as
it owns the device(s). If a driver unbind .remove callback races with
userspace issuing a reset, we have a deadlock as userspace gets stuck
waiting on device_lock while another thread has device_lock and waits for
.remove to complete. To resolve this, we can make a version of the reset
interfaces which use trylock. With this, we can safely attempt a reset and
return error to userspace if there is contention.

[bhelgaas: the deadlock happens when A (userspace) has a file descriptor for
the device, and B waits in this path:

driver_detach
device_lock # take device_lock
__device_release_driver
pci_device_remove # pci_bus_type.remove
vfio_pci_remove # pci_driver .remove
vfio_del_group_dev
wait_event(vfio.release_q, !vfio_dev_present) # wait (holding device_lock)

Now B is stuck until A gives up the file descriptor. If A tries to acquire
device_lock for any reason, we deadlock because A is waiting for B to release
the lock, and B is waiting for A to release the file descriptor.]

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>

authored by

Alex Williamson and committed by
Bjorn Helgaas
61cf16d8 a870614a

+158
+155
drivers/pci/pci.c
··· 3250 3250 device_lock(&dev->dev); 3251 3251 } 3252 3252 3253 + /* Return 1 on successful lock, 0 on contention */ 3254 + static int pci_dev_trylock(struct pci_dev *dev) 3255 + { 3256 + if (pci_cfg_access_trylock(dev)) { 3257 + if (device_trylock(&dev->dev)) 3258 + return 1; 3259 + pci_cfg_access_unlock(dev); 3260 + } 3261 + 3262 + return 0; 3263 + } 3264 + 3253 3265 static void pci_dev_unlock(struct pci_dev *dev) 3254 3266 { 3255 3267 device_unlock(&dev->dev); ··· 3405 3393 } 3406 3394 EXPORT_SYMBOL_GPL(pci_reset_function); 3407 3395 3396 + /** 3397 + * pci_try_reset_function - quiesce and reset a PCI device function 3398 + * @dev: PCI device to reset 3399 + * 3400 + * Same as above, except return -EAGAIN if unable to lock device. 3401 + */ 3402 + int pci_try_reset_function(struct pci_dev *dev) 3403 + { 3404 + int rc; 3405 + 3406 + rc = pci_dev_reset(dev, 1); 3407 + if (rc) 3408 + return rc; 3409 + 3410 + pci_dev_save_and_disable(dev); 3411 + 3412 + if (pci_dev_trylock(dev)) { 3413 + rc = __pci_dev_reset(dev, 0); 3414 + pci_dev_unlock(dev); 3415 + } else 3416 + rc = -EAGAIN; 3417 + 3418 + pci_dev_restore(dev); 3419 + 3420 + return rc; 3421 + } 3422 + EXPORT_SYMBOL_GPL(pci_try_reset_function); 3423 + 3408 3424 /* Lock devices from the top of the tree down */ 3409 3425 static void pci_bus_lock(struct pci_bus *bus) 3410 3426 { ··· 3455 3415 pci_bus_unlock(dev->subordinate); 3456 3416 pci_dev_unlock(dev); 3457 3417 } 3418 + } 3419 + 3420 + /* Return 1 on successful lock, 0 on contention */ 3421 + static int pci_bus_trylock(struct pci_bus *bus) 3422 + { 3423 + struct pci_dev *dev; 3424 + 3425 + list_for_each_entry(dev, &bus->devices, bus_list) { 3426 + if (!pci_dev_trylock(dev)) 3427 + goto unlock; 3428 + if (dev->subordinate) { 3429 + if (!pci_bus_trylock(dev->subordinate)) { 3430 + pci_dev_unlock(dev); 3431 + goto unlock; 3432 + } 3433 + } 3434 + } 3435 + return 1; 3436 + 3437 + unlock: 3438 + list_for_each_entry_continue_reverse(dev, &bus->devices, bus_list) { 3439 + if (dev->subordinate) 3440 + pci_bus_unlock(dev->subordinate); 3441 + pci_dev_unlock(dev); 3442 + } 3443 + return 0; 3458 3444 } 3459 3445 3460 3446 /* Lock devices from the top of the tree down */ ··· 3509 3443 pci_bus_unlock(dev->subordinate); 3510 3444 pci_dev_unlock(dev); 3511 3445 } 3446 + } 3447 + 3448 + /* Return 1 on successful lock, 0 on contention */ 3449 + static int pci_slot_trylock(struct pci_slot *slot) 3450 + { 3451 + struct pci_dev *dev; 3452 + 3453 + list_for_each_entry(dev, &slot->bus->devices, bus_list) { 3454 + if (!dev->slot || dev->slot != slot) 3455 + continue; 3456 + if (!pci_dev_trylock(dev)) 3457 + goto unlock; 3458 + if (dev->subordinate) { 3459 + if (!pci_bus_trylock(dev->subordinate)) { 3460 + pci_dev_unlock(dev); 3461 + goto unlock; 3462 + } 3463 + } 3464 + } 3465 + return 1; 3466 + 3467 + unlock: 3468 + list_for_each_entry_continue_reverse(dev, 3469 + &slot->bus->devices, bus_list) { 3470 + if (!dev->slot || dev->slot != slot) 3471 + continue; 3472 + if (dev->subordinate) 3473 + pci_bus_unlock(dev->subordinate); 3474 + pci_dev_unlock(dev); 3475 + } 3476 + return 0; 3512 3477 } 3513 3478 3514 3479 /* Save and disable devices from the top of the tree down */ ··· 3665 3568 } 3666 3569 EXPORT_SYMBOL_GPL(pci_reset_slot); 3667 3570 3571 + /** 3572 + * pci_try_reset_slot - Try to reset a PCI slot 3573 + * @slot: PCI slot to reset 3574 + * 3575 + * Same as above except return -EAGAIN if the slot cannot be locked 3576 + */ 3577 + int pci_try_reset_slot(struct pci_slot *slot) 3578 + { 3579 + int rc; 3580 + 3581 + rc = pci_slot_reset(slot, 1); 3582 + if (rc) 3583 + return rc; 3584 + 3585 + pci_slot_save_and_disable(slot); 3586 + 3587 + if (pci_slot_trylock(slot)) { 3588 + might_sleep(); 3589 + rc = pci_reset_hotplug_slot(slot->hotplug, 0); 3590 + pci_slot_unlock(slot); 3591 + } else 3592 + rc = -EAGAIN; 3593 + 3594 + pci_slot_restore(slot); 3595 + 3596 + return rc; 3597 + } 3598 + EXPORT_SYMBOL_GPL(pci_try_reset_slot); 3599 + 3668 3600 static int pci_bus_reset(struct pci_bus *bus, int probe) 3669 3601 { 3670 3602 if (!bus->self) ··· 3751 3625 return rc; 3752 3626 } 3753 3627 EXPORT_SYMBOL_GPL(pci_reset_bus); 3628 + 3629 + /** 3630 + * pci_try_reset_bus - Try to reset a PCI bus 3631 + * @bus: top level PCI bus to reset 3632 + * 3633 + * Same as above except return -EAGAIN if the bus cannot be locked 3634 + */ 3635 + int pci_try_reset_bus(struct pci_bus *bus) 3636 + { 3637 + int rc; 3638 + 3639 + rc = pci_bus_reset(bus, 1); 3640 + if (rc) 3641 + return rc; 3642 + 3643 + pci_bus_save_and_disable(bus); 3644 + 3645 + if (pci_bus_trylock(bus)) { 3646 + might_sleep(); 3647 + pci_reset_bridge_secondary_bus(bus->self); 3648 + pci_bus_unlock(bus); 3649 + } else 3650 + rc = -EAGAIN; 3651 + 3652 + pci_bus_restore(bus); 3653 + 3654 + return rc; 3655 + } 3656 + EXPORT_SYMBOL_GPL(pci_try_reset_bus); 3754 3657 3755 3658 /** 3756 3659 * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count
+3
include/linux/pci.h
··· 948 948 int __pci_reset_function(struct pci_dev *dev); 949 949 int __pci_reset_function_locked(struct pci_dev *dev); 950 950 int pci_reset_function(struct pci_dev *dev); 951 + int pci_try_reset_function(struct pci_dev *dev); 951 952 int pci_probe_reset_slot(struct pci_slot *slot); 952 953 int pci_reset_slot(struct pci_slot *slot); 954 + int pci_try_reset_slot(struct pci_slot *slot); 953 955 int pci_probe_reset_bus(struct pci_bus *bus); 954 956 int pci_reset_bus(struct pci_bus *bus); 957 + int pci_try_reset_bus(struct pci_bus *bus); 955 958 void pci_reset_bridge_secondary_bus(struct pci_dev *dev); 956 959 void pci_update_resource(struct pci_dev *dev, int resno); 957 960 int __must_check pci_assign_resource(struct pci_dev *dev, int i);