Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: sd: Fix TCG OPAL unlock on system resume

Commit 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop
management") introduced the manage_system_start_stop scsi_device flag to
allow libata to indicate to the SCSI disk driver that nothing should be
done when resuming a disk on system resume. This change turned the
execution of sd_resume() into a no-op for ATA devices on system
resume. While this solved deadlock issues during device resume, this change
also wrongly removed the execution of opal_unlock_from_suspend(). As a
result, devices with TCG OPAL locking enabled remain locked and
inaccessible after a system resume from sleep.

To fix this issue, introduce the SCSI driver resume method and implement it
with the sd_resume() function calling opal_unlock_from_suspend(). The
former sd_resume() function is renamed to sd_resume_common() and modified
to call the new sd_resume() function. For non-ATA devices, this result in
no functional changes.

In order for libata to explicitly execute sd_resume() when a device is
resumed during system restart, the function scsi_resume_device() is
introduced. libata calls this function from the revalidation work executed
on devie resume, a state that is indicated with the new device flag
ATA_DFLAG_RESUMING. Doing so, locked TCG OPAL enabled devices are unlocked
on resume, allowing normal operation.

Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218538
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20240319071209.1179257-1-dlemoal@kernel.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Damien Le Moal and committed by
Martin K. Petersen
0c76106c 27f58c04

+69 -5
+4 -1
drivers/ata/libata-eh.c
··· 712 712 ehc->saved_ncq_enabled |= 1 << devno; 713 713 714 714 /* If we are resuming, wake up the device */ 715 - if (ap->pflags & ATA_PFLAG_RESUMING) 715 + if (ap->pflags & ATA_PFLAG_RESUMING) { 716 + dev->flags |= ATA_DFLAG_RESUMING; 716 717 ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; 718 + } 717 719 } 718 720 } 719 721 ··· 3171 3169 return 0; 3172 3170 3173 3171 err: 3172 + dev->flags &= ~ATA_DFLAG_RESUMING; 3174 3173 *r_failed_dev = dev; 3175 3174 return rc; 3176 3175 }
+9
drivers/ata/libata-scsi.c
··· 4730 4730 struct ata_link *link; 4731 4731 struct ata_device *dev; 4732 4732 unsigned long flags; 4733 + bool do_resume; 4733 4734 int ret = 0; 4734 4735 4735 4736 mutex_lock(&ap->scsi_scan_mutex); ··· 4752 4751 if (scsi_device_get(sdev)) 4753 4752 continue; 4754 4753 4754 + do_resume = dev->flags & ATA_DFLAG_RESUMING; 4755 + 4755 4756 spin_unlock_irqrestore(ap->lock, flags); 4757 + if (do_resume) { 4758 + ret = scsi_resume_device(sdev); 4759 + if (ret == -EWOULDBLOCK) 4760 + goto unlock; 4761 + dev->flags &= ~ATA_DFLAG_RESUMING; 4762 + } 4756 4763 ret = scsi_rescan_device(sdev); 4757 4764 scsi_device_put(sdev); 4758 4765 spin_lock_irqsave(ap->lock, flags);
+34
drivers/scsi/scsi_scan.c
··· 1642 1642 } 1643 1643 EXPORT_SYMBOL(scsi_add_device); 1644 1644 1645 + int scsi_resume_device(struct scsi_device *sdev) 1646 + { 1647 + struct device *dev = &sdev->sdev_gendev; 1648 + int ret = 0; 1649 + 1650 + device_lock(dev); 1651 + 1652 + /* 1653 + * Bail out if the device or its queue are not running. Otherwise, 1654 + * the rescan may block waiting for commands to be executed, with us 1655 + * holding the device lock. This can result in a potential deadlock 1656 + * in the power management core code when system resume is on-going. 1657 + */ 1658 + if (sdev->sdev_state != SDEV_RUNNING || 1659 + blk_queue_pm_only(sdev->request_queue)) { 1660 + ret = -EWOULDBLOCK; 1661 + goto unlock; 1662 + } 1663 + 1664 + if (dev->driver && try_module_get(dev->driver->owner)) { 1665 + struct scsi_driver *drv = to_scsi_driver(dev->driver); 1666 + 1667 + if (drv->resume) 1668 + ret = drv->resume(dev); 1669 + module_put(dev->driver->owner); 1670 + } 1671 + 1672 + unlock: 1673 + device_unlock(dev); 1674 + 1675 + return ret; 1676 + } 1677 + EXPORT_SYMBOL(scsi_resume_device); 1678 + 1645 1679 int scsi_rescan_device(struct scsi_device *sdev) 1646 1680 { 1647 1681 struct device *dev = &sdev->sdev_gendev;
+19 -4
drivers/scsi/sd.c
··· 4108 4108 return sd_suspend_common(dev, true); 4109 4109 } 4110 4110 4111 - static int sd_resume(struct device *dev, bool runtime) 4111 + static int sd_resume(struct device *dev) 4112 + { 4113 + struct scsi_disk *sdkp = dev_get_drvdata(dev); 4114 + 4115 + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); 4116 + 4117 + if (opal_unlock_from_suspend(sdkp->opal_dev)) { 4118 + sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); 4119 + return -EIO; 4120 + } 4121 + 4122 + return 0; 4123 + } 4124 + 4125 + static int sd_resume_common(struct device *dev, bool runtime) 4112 4126 { 4113 4127 struct scsi_disk *sdkp = dev_get_drvdata(dev); 4114 4128 int ret; ··· 4138 4124 sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); 4139 4125 ret = sd_start_stop_device(sdkp, 1); 4140 4126 if (!ret) { 4141 - opal_unlock_from_suspend(sdkp->opal_dev); 4127 + sd_resume(dev); 4142 4128 sdkp->suspended = false; 4143 4129 } 4144 4130 ··· 4157 4143 return 0; 4158 4144 } 4159 4145 4160 - return sd_resume(dev, false); 4146 + return sd_resume_common(dev, false); 4161 4147 } 4162 4148 4163 4149 static int sd_resume_runtime(struct device *dev) ··· 4184 4170 "Failed to clear sense data\n"); 4185 4171 } 4186 4172 4187 - return sd_resume(dev, true); 4173 + return sd_resume_common(dev, true); 4188 4174 } 4189 4175 4190 4176 static const struct dev_pm_ops sd_pm_ops = { ··· 4207 4193 .pm = &sd_pm_ops, 4208 4194 }, 4209 4195 .rescan = sd_rescan, 4196 + .resume = sd_resume, 4210 4197 .init_command = sd_init_command, 4211 4198 .uninit_command = sd_uninit_command, 4212 4199 .done = sd_done,
+1
include/linux/libata.h
··· 107 107 108 108 ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ 109 109 ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */ 110 + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ 110 111 ATA_DFLAG_DETACH = (1 << 24), 111 112 ATA_DFLAG_DETACHED = (1 << 25), 112 113 ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */
+1
include/scsi/scsi_driver.h
··· 12 12 struct scsi_driver { 13 13 struct device_driver gendrv; 14 14 15 + int (*resume)(struct device *); 15 16 void (*rescan)(struct device *); 16 17 blk_status_t (*init_command)(struct scsi_cmnd *); 17 18 void (*uninit_command)(struct scsi_cmnd *);
+1
include/scsi/scsi_host.h
··· 767 767 #define scsi_template_proc_dir(sht) NULL 768 768 #endif 769 769 extern void scsi_scan_host(struct Scsi_Host *); 770 + extern int scsi_resume_device(struct scsi_device *sdev); 770 771 extern int scsi_rescan_device(struct scsi_device *sdev); 771 772 extern void scsi_remove_host(struct Scsi_Host *); 772 773 extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);