Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge patch series "ufs: Do not requeue while ungating the clock"

Bart Van Assche <bvanassche@acm.org> says:

In the traces we recorded while testing zoned storage we noticed that UFS
commands are requeued while the clock is being ungated. Command requeueing
makes it harder than necessary to preserve the command order. Hence this
patch series that modifies the SCSI core and also the UFS driver such that
clock ungating does not trigger command requeueing.

Link: https://lore.kernel.org/r/20230529202640.11883-1-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

+54 -76
+1
drivers/scsi/hosts.c
··· 441 441 shost->cmd_per_lun = sht->cmd_per_lun; 442 442 shost->no_write_same = sht->no_write_same; 443 443 shost->host_tagset = sht->host_tagset; 444 + shost->queuecommand_may_block = sht->queuecommand_may_block; 444 445 445 446 if (shost_eh_deadline == -1 || !sht->eh_host_reset_handler) 446 447 shost->eh_deadline = -1;
+16 -11
drivers/scsi/scsi_lib.c
··· 1984 1984 tag_set->flags = BLK_MQ_F_SHOULD_MERGE; 1985 1985 tag_set->flags |= 1986 1986 BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy); 1987 + if (shost->queuecommand_may_block) 1988 + tag_set->flags |= BLK_MQ_F_BLOCKING; 1987 1989 tag_set->driver_data = shost; 1988 1990 if (shost->host_tagset) 1989 1991 tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; ··· 2945 2943 } 2946 2944 EXPORT_SYMBOL_GPL(scsi_target_unblock); 2947 2945 2946 + /** 2947 + * scsi_host_block - Try to transition all logical units to the SDEV_BLOCK state 2948 + * @shost: device to block 2949 + * 2950 + * Pause SCSI command processing for all logical units associated with the SCSI 2951 + * host and wait until pending scsi_queue_rq() calls have finished. 2952 + * 2953 + * Returns zero if successful or a negative error code upon failure. 2954 + */ 2948 2955 int 2949 2956 scsi_host_block(struct Scsi_Host *shost) 2950 2957 { 2951 2958 struct scsi_device *sdev; 2952 - int ret = 0; 2959 + int ret; 2953 2960 2954 2961 /* 2955 2962 * Call scsi_internal_device_block_nowait so we can avoid ··· 2970 2959 mutex_unlock(&sdev->state_mutex); 2971 2960 if (ret) { 2972 2961 scsi_device_put(sdev); 2973 - break; 2962 + return ret; 2974 2963 } 2975 2964 } 2976 2965 2977 - /* 2978 - * SCSI never enables blk-mq's BLK_MQ_F_BLOCKING flag so 2979 - * calling synchronize_rcu() once is enough. 2980 - */ 2981 - WARN_ON_ONCE(shost->tag_set.flags & BLK_MQ_F_BLOCKING); 2966 + /* Wait for ongoing scsi_queue_rq() calls to finish. */ 2967 + blk_mq_wait_quiesce_done(&shost->tag_set); 2982 2968 2983 - if (!ret) 2984 - synchronize_rcu(); 2985 - 2986 - return ret; 2969 + return 0; 2987 2970 } 2988 2971 EXPORT_SYMBOL_GPL(scsi_host_block); 2989 2972
+1 -1
drivers/ufs/core/ufs-sysfs.c
··· 168 168 } 169 169 170 170 pm_runtime_get_sync(hba->dev); 171 - ufshcd_hold(hba, false); 171 + ufshcd_hold(hba); 172 172 ahit = ufshcd_readl(hba, REG_AUTO_HIBERNATE_IDLE_TIMER); 173 173 ufshcd_release(hba); 174 174 pm_runtime_put_sync(hba->dev);
+1 -1
drivers/ufs/core/ufshcd-crypto.c
··· 24 24 u32 slot_offset = hba->crypto_cfg_register + slot * sizeof(*cfg); 25 25 int err = 0; 26 26 27 - ufshcd_hold(hba, false); 27 + ufshcd_hold(hba); 28 28 29 29 if (hba->vops && hba->vops->program_key) { 30 30 err = hba->vops->program_key(hba, cfg, slot);
-3
drivers/ufs/core/ufshcd-priv.h
··· 84 84 int ufshcd_read_string_desc(struct ufs_hba *hba, u8 desc_index, 85 85 u8 **buf, bool ascii); 86 86 87 - int ufshcd_hold(struct ufs_hba *hba, bool async); 88 - void ufshcd_release(struct ufs_hba *hba); 89 - 90 87 int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd); 91 88 92 89 int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba,
+28 -59
drivers/ufs/core/ufshcd.c
··· 1189 1189 bool timeout = false, do_last_check = false; 1190 1190 ktime_t start; 1191 1191 1192 - ufshcd_hold(hba, false); 1192 + ufshcd_hold(hba); 1193 1193 spin_lock_irqsave(hba->host->host_lock, flags); 1194 1194 /* 1195 1195 * Wait for all the outstanding tasks/transfer requests. ··· 1310 1310 } 1311 1311 1312 1312 /* let's not get into low power until clock scaling is completed */ 1313 - ufshcd_hold(hba, false); 1313 + ufshcd_hold(hba); 1314 1314 1315 1315 out: 1316 1316 return ret; ··· 1640 1640 goto out; 1641 1641 1642 1642 ufshcd_rpm_get_sync(hba); 1643 - ufshcd_hold(hba, false); 1643 + ufshcd_hold(hba); 1644 1644 1645 1645 hba->clk_scaling.is_enabled = value; 1646 1646 ··· 1723 1723 spin_lock_irqsave(hba->host->host_lock, flags); 1724 1724 if (hba->clk_gating.state == CLKS_ON) { 1725 1725 spin_unlock_irqrestore(hba->host->host_lock, flags); 1726 - goto unblock_reqs; 1726 + return; 1727 1727 } 1728 1728 1729 1729 spin_unlock_irqrestore(hba->host->host_lock, flags); ··· 1746 1746 } 1747 1747 hba->clk_gating.is_suspended = false; 1748 1748 } 1749 - unblock_reqs: 1750 - ufshcd_scsi_unblock_requests(hba); 1751 1749 } 1752 1750 1753 1751 /** 1754 1752 * ufshcd_hold - Enable clocks that were gated earlier due to ufshcd_release. 1755 1753 * Also, exit from hibern8 mode and set the link as active. 1756 1754 * @hba: per adapter instance 1757 - * @async: This indicates whether caller should ungate clocks asynchronously. 1758 1755 */ 1759 - int ufshcd_hold(struct ufs_hba *hba, bool async) 1756 + void ufshcd_hold(struct ufs_hba *hba) 1760 1757 { 1761 - int rc = 0; 1762 1758 bool flush_result; 1763 1759 unsigned long flags; 1764 1760 1765 1761 if (!ufshcd_is_clkgating_allowed(hba) || 1766 1762 !hba->clk_gating.is_initialized) 1767 - goto out; 1763 + return; 1768 1764 spin_lock_irqsave(hba->host->host_lock, flags); 1769 1765 hba->clk_gating.active_reqs++; 1770 1766 ··· 1777 1781 */ 1778 1782 if (ufshcd_can_hibern8_during_gating(hba) && 1779 1783 ufshcd_is_link_hibern8(hba)) { 1780 - if (async) { 1781 - rc = -EAGAIN; 1782 - hba->clk_gating.active_reqs--; 1783 - break; 1784 - } 1785 1784 spin_unlock_irqrestore(hba->host->host_lock, flags); 1786 1785 flush_result = flush_work(&hba->clk_gating.ungate_work); 1787 1786 if (hba->clk_gating.is_suspended && !flush_result) 1788 - goto out; 1787 + return; 1789 1788 spin_lock_irqsave(hba->host->host_lock, flags); 1790 1789 goto start; 1791 1790 } ··· 1802 1811 hba->clk_gating.state = REQ_CLKS_ON; 1803 1812 trace_ufshcd_clk_gating(dev_name(hba->dev), 1804 1813 hba->clk_gating.state); 1805 - if (queue_work(hba->clk_gating.clk_gating_workq, 1806 - &hba->clk_gating.ungate_work)) 1807 - ufshcd_scsi_block_requests(hba); 1814 + queue_work(hba->clk_gating.clk_gating_workq, 1815 + &hba->clk_gating.ungate_work); 1808 1816 /* 1809 1817 * fall through to check if we should wait for this 1810 1818 * work to be done or not. 1811 1819 */ 1812 1820 fallthrough; 1813 1821 case REQ_CLKS_ON: 1814 - if (async) { 1815 - rc = -EAGAIN; 1816 - hba->clk_gating.active_reqs--; 1817 - break; 1818 - } 1819 - 1820 1822 spin_unlock_irqrestore(hba->host->host_lock, flags); 1821 1823 flush_work(&hba->clk_gating.ungate_work); 1822 1824 /* Make sure state is CLKS_ON before returning */ ··· 1821 1837 break; 1822 1838 } 1823 1839 spin_unlock_irqrestore(hba->host->host_lock, flags); 1824 - out: 1825 - return rc; 1826 1840 } 1827 1841 EXPORT_SYMBOL_GPL(ufshcd_hold); 1828 1842 ··· 2052 2070 ufshcd_remove_clk_gating_sysfs(hba); 2053 2071 2054 2072 /* Ungate the clock if necessary. */ 2055 - ufshcd_hold(hba, false); 2073 + ufshcd_hold(hba); 2056 2074 hba->clk_gating.is_initialized = false; 2057 2075 ufshcd_release(hba); 2058 2076 ··· 2450 2468 if (hba->quirks & UFSHCD_QUIRK_BROKEN_UIC_CMD) 2451 2469 return 0; 2452 2470 2453 - ufshcd_hold(hba, false); 2471 + ufshcd_hold(hba); 2454 2472 mutex_lock(&hba->uic_cmd_mutex); 2455 2473 ufshcd_add_delay_before_dme_cmd(hba); 2456 2474 ··· 2853 2871 2854 2872 WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n", tag); 2855 2873 2856 - /* 2857 - * Allows the UFS error handler to wait for prior ufshcd_queuecommand() 2858 - * calls. 2859 - */ 2860 - rcu_read_lock(); 2861 - 2862 2874 switch (hba->ufshcd_state) { 2863 2875 case UFSHCD_STATE_OPERATIONAL: 2864 2876 break; ··· 2898 2922 2899 2923 hba->req_abort_count = 0; 2900 2924 2901 - err = ufshcd_hold(hba, true); 2902 - if (err) { 2903 - err = SCSI_MLQUEUE_HOST_BUSY; 2904 - goto out; 2905 - } 2906 - WARN_ON(ufshcd_is_clkgating_allowed(hba) && 2907 - (hba->clk_gating.state != CLKS_ON)); 2925 + ufshcd_hold(hba); 2908 2926 2909 2927 lrbp = &hba->lrb[tag]; 2910 2928 WARN_ON(lrbp->cmd); ··· 2928 2958 ufshcd_send_command(hba, tag, hwq); 2929 2959 2930 2960 out: 2931 - rcu_read_unlock(); 2932 - 2933 2961 if (ufs_trigger_eh()) { 2934 2962 unsigned long flags; 2935 2963 ··· 3221 3253 3222 3254 BUG_ON(!hba); 3223 3255 3224 - ufshcd_hold(hba, false); 3256 + ufshcd_hold(hba); 3225 3257 mutex_lock(&hba->dev_cmd.lock); 3226 3258 ufshcd_init_query(hba, &request, &response, opcode, idn, index, 3227 3259 selector); ··· 3295 3327 return -EINVAL; 3296 3328 } 3297 3329 3298 - ufshcd_hold(hba, false); 3330 + ufshcd_hold(hba); 3299 3331 3300 3332 mutex_lock(&hba->dev_cmd.lock); 3301 3333 ufshcd_init_query(hba, &request, &response, opcode, idn, index, ··· 3391 3423 return -EINVAL; 3392 3424 } 3393 3425 3394 - ufshcd_hold(hba, false); 3426 + ufshcd_hold(hba); 3395 3427 3396 3428 mutex_lock(&hba->dev_cmd.lock); 3397 3429 ufshcd_init_query(hba, &request, &response, opcode, idn, index, ··· 4209 4241 uic_cmd.command = UIC_CMD_DME_SET; 4210 4242 uic_cmd.argument1 = UIC_ARG_MIB(PA_PWRMODE); 4211 4243 uic_cmd.argument3 = mode; 4212 - ufshcd_hold(hba, false); 4244 + ufshcd_hold(hba); 4213 4245 ret = ufshcd_uic_pwr_ctrl(hba, &uic_cmd); 4214 4246 ufshcd_release(hba); 4215 4247 ··· 4316 4348 if (update && 4317 4349 !pm_runtime_suspended(&hba->ufs_device_wlun->sdev_gendev)) { 4318 4350 ufshcd_rpm_get_sync(hba); 4319 - ufshcd_hold(hba, false); 4351 + ufshcd_hold(hba); 4320 4352 ufshcd_auto_hibern8_enable(hba); 4321 4353 ufshcd_release(hba); 4322 4354 ufshcd_rpm_put_sync(hba); ··· 4909 4941 int err = 0; 4910 4942 int retries; 4911 4943 4912 - ufshcd_hold(hba, false); 4944 + ufshcd_hold(hba); 4913 4945 mutex_lock(&hba->dev_cmd.lock); 4914 4946 for (retries = NOP_OUT_RETRIES; retries > 0; retries--) { 4915 4947 err = ufshcd_exec_dev_cmd(hba, DEV_CMD_TYPE_NOP, ··· 6195 6227 ufshcd_setup_vreg(hba, true); 6196 6228 ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq); 6197 6229 ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq2); 6198 - ufshcd_hold(hba, false); 6230 + ufshcd_hold(hba); 6199 6231 if (!ufshcd_is_clkgating_allowed(hba)) 6200 6232 ufshcd_setup_clocks(hba, true); 6201 6233 ufshcd_release(hba); 6202 6234 pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM; 6203 6235 ufshcd_vops_resume(hba, pm_op); 6204 6236 } else { 6205 - ufshcd_hold(hba, false); 6237 + ufshcd_hold(hba); 6206 6238 if (ufshcd_is_clkscaling_supported(hba) && 6207 6239 hba->clk_scaling.is_enabled) 6208 6240 ufshcd_suspend_clkscaling(hba); 6209 6241 ufshcd_clk_scaling_allow(hba, false); 6210 6242 } 6211 6243 ufshcd_scsi_block_requests(hba); 6212 - /* Drain ufshcd_queuecommand() */ 6213 - synchronize_rcu(); 6244 + /* Wait for ongoing ufshcd_queuecommand() calls to finish. */ 6245 + blk_mq_wait_quiesce_done(&hba->host->tag_set); 6214 6246 cancel_work_sync(&hba->eeh_work); 6215 6247 } 6216 6248 ··· 6855 6887 return PTR_ERR(req); 6856 6888 6857 6889 req->end_io_data = &wait; 6858 - ufshcd_hold(hba, false); 6890 + ufshcd_hold(hba); 6859 6891 6860 6892 spin_lock_irqsave(host->host_lock, flags); 6861 6893 ··· 7092 7124 cmd_type = DEV_CMD_TYPE_NOP; 7093 7125 fallthrough; 7094 7126 case UPIU_TRANSACTION_QUERY_REQ: 7095 - ufshcd_hold(hba, false); 7127 + ufshcd_hold(hba); 7096 7128 mutex_lock(&hba->dev_cmd.lock); 7097 7129 err = ufshcd_issue_devman_upiu_cmd(hba, req_upiu, rsp_upiu, 7098 7130 desc_buff, buff_len, ··· 7158 7190 u16 ehs_len; 7159 7191 7160 7192 /* Protects use of hba->reserved_slot. */ 7161 - ufshcd_hold(hba, false); 7193 + ufshcd_hold(hba); 7162 7194 mutex_lock(&hba->dev_cmd.lock); 7163 7195 down_read(&hba->clk_scaling_lock); 7164 7196 ··· 7393 7425 7394 7426 WARN_ONCE(tag < 0, "Invalid tag %d\n", tag); 7395 7427 7396 - ufshcd_hold(hba, false); 7428 + ufshcd_hold(hba); 7397 7429 reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); 7398 7430 /* If command is already aborted/completed, return FAILED. */ 7399 7431 if (!(test_bit(tag, &hba->outstanding_reqs))) { ··· 9384 9416 * If we can't transition into any of the low power modes 9385 9417 * just gate the clocks. 9386 9418 */ 9387 - ufshcd_hold(hba, false); 9419 + ufshcd_hold(hba); 9388 9420 hba->clk_gating.is_suspended = true; 9389 9421 9390 9422 if (ufshcd_is_clkscaling_supported(hba)) ··· 10172 10204 host->max_channel = UFSHCD_MAX_CHANNEL; 10173 10205 host->unique_id = host->host_no; 10174 10206 host->max_cmd_len = UFS_CDB_SIZE; 10207 + host->queuecommand_may_block = !!(hba->caps & UFSHCD_CAP_CLK_GATING); 10175 10208 10176 10209 hba->max_pwr_info.is_valid = false; 10177 10210
+6
include/scsi/scsi_host.h
··· 458 458 /* True if the host uses host-wide tagspace */ 459 459 unsigned host_tagset:1; 460 460 461 + /* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */ 462 + unsigned queuecommand_may_block:1; 463 + 461 464 /* 462 465 * Countdown for host blocking with no commands outstanding. 463 466 */ ··· 655 652 656 653 /* True if the host uses host-wide tagspace */ 657 654 unsigned host_tagset:1; 655 + 656 + /* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */ 657 + unsigned queuecommand_may_block:1; 658 658 659 659 /* Host responded with short (<36 bytes) INQUIRY result */ 660 660 unsigned short_inquiry:1;
+1 -1
include/ufs/ufshcd.h
··· 1358 1358 int ufshcd_read_string_desc(struct ufs_hba *hba, u8 desc_index, 1359 1359 u8 **buf, bool ascii); 1360 1360 1361 - int ufshcd_hold(struct ufs_hba *hba, bool async); 1361 + void ufshcd_hold(struct ufs_hba *hba); 1362 1362 void ufshcd_release(struct ufs_hba *hba); 1363 1363 1364 1364 void ufshcd_clkgate_delay_set(struct device *dev, unsigned long value);