Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390/pci: introduce lock to synchronize state of zpci_dev's

There's a number of tasks that need the state of a zpci device
to be stable. Other tasks need to be synchronized as they change the state.

State changes could be generated by the system as availability or error
events, or be requested by the user through manipulations in sysfs.
Some other actions accessible through sysfs - like device resets - need the
state to be stable.

Unsynchronized state handling could lead to unusable devices. This has
been observed in cases of concurrent state changes through systemd udev
rules and DPM boot control. Some breakage can be provoked by artificial
tests, e.g. through repetitively injecting "recover" on a PCI function
through sysfs while running a "hotplug remove/add" in a loop through a
PCI slot's "power" attribute in sysfs. After a few iterations this could
result in a kernel oops.

So introduce a new mutex "state_lock" to guard the state property of the
struct zpci_dev. Acquire this lock in all task that modify the state:

- hotplug add and remove, through the PCI hotplug slot entry,
- avaiability events, as reported by the platform,
- error events, as reported by the platform,
- during device resets, explicit through sysfs requests or
implict through the common PCI layer.

Break out an inner _do_recover() routine out of recover_store() to
separte the necessary synchronizations from the actual manipulations of
the zpci_dev required for the reset.

With the following changes I was able to run the inject loops for hours
without hitting an error.

Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>

authored by

Gerd Bayer and committed by
Heiko Carstens
bcb5d6c7 0d48566d

+106 -52
+1
arch/s390/include/asm/pci.h
··· 122 122 struct rcu_head rcu; 123 123 struct hotplug_slot hotplug_slot; 124 124 125 + struct mutex state_lock; /* protect state changes */ 125 126 enum zpci_state state; 126 127 u32 fid; /* function ID, used by sclp */ 127 128 u32 fh; /* function handle, used by insn's */
+9 -2
arch/s390/pci/pci.c
··· 28 28 #include <linux/jump_label.h> 29 29 #include <linux/pci.h> 30 30 #include <linux/printk.h> 31 + #include <linux/lockdep.h> 31 32 32 33 #include <asm/isc.h> 33 34 #include <asm/airq.h> ··· 731 730 * equivalent to its state during boot when first probing a driver. 732 731 * Consequently after reset the PCI function requires re-initialization via the 733 732 * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors() 734 - * and enabling the function via e.g.pci_enablde_device_flags().The caller 733 + * and enabling the function via e.g. pci_enable_device_flags(). The caller 735 734 * must guard against concurrent reset attempts. 736 735 * 737 736 * In most cases this function should not be called directly but through 738 737 * pci_reset_function() or pci_reset_bus() which handle the save/restore and 739 - * locking. 738 + * locking - asserted by lockdep. 740 739 * 741 740 * Return: 0 on success and an error value otherwise 742 741 */ ··· 745 744 u8 status; 746 745 int rc; 747 746 747 + lockdep_assert_held(&zdev->state_lock); 748 748 zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh); 749 749 if (zdev_enabled(zdev)) { 750 750 /* Disables device access, DMAs and IRQs (reset state) */ ··· 808 806 zdev->state = state; 809 807 810 808 kref_init(&zdev->kref); 809 + mutex_init(&zdev->state_lock); 811 810 mutex_init(&zdev->fmb_lock); 812 811 mutex_init(&zdev->kzdev_lock); 813 812 ··· 872 869 int zpci_deconfigure_device(struct zpci_dev *zdev) 873 870 { 874 871 int rc; 872 + 873 + lockdep_assert_held(&zdev->state_lock); 874 + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) 875 + return 0; 875 876 876 877 if (zdev->zbus->bus) 877 878 zpci_bus_remove_device(zdev, false);
+10 -1
arch/s390/pci/pci_event.c
··· 267 267 zpci_err_hex(ccdf, sizeof(*ccdf)); 268 268 269 269 if (zdev) { 270 + mutex_lock(&zdev->state_lock); 270 271 zpci_update_fh(zdev, ccdf->fh); 271 272 if (zdev->zbus->bus) 272 273 pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); ··· 295 294 } 296 295 pci_dev_put(pdev); 297 296 no_pdev: 297 + if (zdev) 298 + mutex_unlock(&zdev->state_lock); 298 299 zpci_zdev_put(zdev); 299 300 } 300 301 ··· 329 326 330 327 zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", 331 328 ccdf->fid, ccdf->fh, ccdf->pec); 329 + 330 + if (existing_zdev) 331 + mutex_lock(&zdev->state_lock); 332 + 332 333 switch (ccdf->pec) { 333 334 case 0x0301: /* Reserved|Standby -> Configured */ 334 335 if (!zdev) { ··· 390 383 default: 391 384 break; 392 385 } 393 - if (existing_zdev) 386 + if (existing_zdev) { 387 + mutex_unlock(&zdev->state_lock); 394 388 zpci_zdev_put(zdev); 389 + } 395 390 } 396 391 397 392 void zpci_event_availability(void *data)
+43 -27
arch/s390/pci/pci_sysfs.c
··· 49 49 } 50 50 static DEVICE_ATTR_RO(mio_enabled); 51 51 52 + static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) 53 + { 54 + u8 status; 55 + int ret; 56 + 57 + pci_stop_and_remove_bus_device(pdev); 58 + if (zdev_enabled(zdev)) { 59 + ret = zpci_disable_device(zdev); 60 + /* 61 + * Due to a z/VM vs LPAR inconsistency in the error 62 + * state the FH may indicate an enabled device but 63 + * disable says the device is already disabled don't 64 + * treat it as an error here. 65 + */ 66 + if (ret == -EINVAL) 67 + ret = 0; 68 + if (ret) 69 + return ret; 70 + } 71 + 72 + ret = zpci_enable_device(zdev); 73 + if (ret) 74 + return ret; 75 + 76 + if (zdev->dma_table) { 77 + ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 78 + virt_to_phys(zdev->dma_table), &status); 79 + if (ret) 80 + zpci_disable_device(zdev); 81 + } 82 + return ret; 83 + } 84 + 52 85 static ssize_t recover_store(struct device *dev, struct device_attribute *attr, 53 86 const char *buf, size_t count) 54 87 { ··· 89 56 struct pci_dev *pdev = to_pci_dev(dev); 90 57 struct zpci_dev *zdev = to_zpci(pdev); 91 58 int ret = 0; 92 - u8 status; 93 59 94 60 /* Can't use device_remove_self() here as that would lead us to lock 95 61 * the pci_rescan_remove_lock while holding the device' kernfs lock. ··· 102 70 */ 103 71 kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); 104 72 WARN_ON_ONCE(!kn); 73 + 74 + /* Device needs to be configured and state must not change */ 75 + mutex_lock(&zdev->state_lock); 76 + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) 77 + goto out; 78 + 105 79 /* device_remove_file() serializes concurrent calls ignoring all but 106 80 * the first 107 81 */ ··· 120 82 */ 121 83 pci_lock_rescan_remove(); 122 84 if (pci_dev_is_added(pdev)) { 123 - pci_stop_and_remove_bus_device(pdev); 124 - if (zdev_enabled(zdev)) { 125 - ret = zpci_disable_device(zdev); 126 - /* 127 - * Due to a z/VM vs LPAR inconsistency in the error 128 - * state the FH may indicate an enabled device but 129 - * disable says the device is already disabled don't 130 - * treat it as an error here. 131 - */ 132 - if (ret == -EINVAL) 133 - ret = 0; 134 - if (ret) 135 - goto out; 136 - } 137 - 138 - ret = zpci_enable_device(zdev); 139 - if (ret) 140 - goto out; 141 - 142 - if (zdev->dma_table) { 143 - ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 144 - virt_to_phys(zdev->dma_table), &status); 145 - if (ret) 146 - zpci_disable_device(zdev); 147 - } 85 + ret = _do_recover(pdev, zdev); 148 86 } 149 - out: 150 87 pci_rescan_bus(zdev->zbus->bus); 151 88 pci_unlock_rescan_remove(); 89 + 90 + out: 91 + mutex_unlock(&zdev->state_lock); 152 92 if (kn) 153 93 sysfs_unbreak_active_protection(kn); 154 94 return ret ? ret : count;
+43 -22
drivers/pci/hotplug/s390_pci_hpc.c
··· 26 26 hotplug_slot); 27 27 int rc; 28 28 29 - if (zdev->state != ZPCI_FN_STATE_STANDBY) 30 - return -EIO; 29 + mutex_lock(&zdev->state_lock); 30 + if (zdev->state != ZPCI_FN_STATE_STANDBY) { 31 + rc = -EIO; 32 + goto out; 33 + } 31 34 32 35 rc = sclp_pci_configure(zdev->fid); 33 36 zpci_dbg(3, "conf fid:%x, rc:%d\n", zdev->fid, rc); 34 37 if (rc) 35 - return rc; 38 + goto out; 36 39 zdev->state = ZPCI_FN_STATE_CONFIGURED; 37 40 38 - return zpci_scan_configured_device(zdev, zdev->fh); 41 + rc = zpci_scan_configured_device(zdev, zdev->fh); 42 + out: 43 + mutex_unlock(&zdev->state_lock); 44 + return rc; 39 45 } 40 46 41 47 static int disable_slot(struct hotplug_slot *hotplug_slot) 42 48 { 43 49 struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, 44 50 hotplug_slot); 45 - struct pci_dev *pdev; 51 + struct pci_dev *pdev = NULL; 52 + int rc; 46 53 47 - if (zdev->state != ZPCI_FN_STATE_CONFIGURED) 48 - return -EIO; 54 + mutex_lock(&zdev->state_lock); 55 + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) { 56 + rc = -EIO; 57 + goto out; 58 + } 49 59 50 60 pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); 51 61 if (pdev && pci_num_vf(pdev)) { 52 62 pci_dev_put(pdev); 53 - return -EBUSY; 63 + rc = -EBUSY; 64 + goto out; 54 65 } 55 - pci_dev_put(pdev); 56 66 57 - return zpci_deconfigure_device(zdev); 67 + rc = zpci_deconfigure_device(zdev); 68 + out: 69 + mutex_unlock(&zdev->state_lock); 70 + if (pdev) 71 + pci_dev_put(pdev); 72 + return rc; 58 73 } 59 74 60 75 static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe) 61 76 { 62 77 struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, 63 78 hotplug_slot); 79 + int rc = -EIO; 64 80 65 - if (zdev->state != ZPCI_FN_STATE_CONFIGURED) 66 - return -EIO; 67 81 /* 68 - * We can't take the zdev->lock as reset_slot may be called during 69 - * probing and/or device removal which already happens under the 70 - * zdev->lock. Instead the user should use the higher level 71 - * pci_reset_function() or pci_bus_reset() which hold the PCI device 72 - * lock preventing concurrent removal. If not using these functions 73 - * holding the PCI device lock is required. 82 + * If we can't get the zdev->state_lock the device state is 83 + * currently undergoing a transition and we bail out - just 84 + * the same as if the device's state is not configured at all. 74 85 */ 86 + if (!mutex_trylock(&zdev->state_lock)) 87 + return rc; 75 88 76 - /* As long as the function is configured we can reset */ 77 - if (probe) 78 - return 0; 89 + /* We can reset only if the function is configured */ 90 + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) 91 + goto out; 79 92 80 - return zpci_hot_reset_device(zdev); 93 + if (probe) { 94 + rc = 0; 95 + goto out; 96 + } 97 + 98 + rc = zpci_hot_reset_device(zdev); 99 + out: 100 + mutex_unlock(&zdev->state_lock); 101 + return rc; 81 102 } 82 103 83 104 static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)