Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nvme: add support for TP4084 - Time-to-Ready Enhancements

Add support for using longer timeouts during controller initialization
and letting the controller come up with namespaces that are not ready
for I/O yet. We skip these not ready namespaces during scanning and
only bring them online once anoter scan is kicked off by the AEN that
is set when the NRDY bit gets set in the I/O Command Set Independent
Identify Namespace Data Structure. This asynchronous probing avoids
blocking the kernel boot when controllers take a very long time to
recover after unclean shutdowns (up to minutes).

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>

+102 -6
+1
drivers/nvme/host/constants.c
··· 91 91 [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected", 92 92 [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted", 93 93 [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error", 94 + [NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready", 94 95 [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set", 95 96 [NVME_SC_LBA_RANGE] = "LBA Out of Range", 96 97 [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
+70 -6
drivers/nvme/host/core.c
··· 1427 1427 return error; 1428 1428 } 1429 1429 1430 + static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid, 1431 + struct nvme_id_ns_cs_indep **id) 1432 + { 1433 + struct nvme_command c = { 1434 + .identify.opcode = nvme_admin_identify, 1435 + .identify.nsid = cpu_to_le32(nsid), 1436 + .identify.cns = NVME_ID_CNS_NS_CS_INDEP, 1437 + }; 1438 + int ret; 1439 + 1440 + *id = kmalloc(sizeof(**id), GFP_KERNEL); 1441 + if (!*id) 1442 + return -ENOMEM; 1443 + 1444 + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id)); 1445 + if (ret) { 1446 + dev_warn(ctrl->device, 1447 + "Identify namespace (CS independent) failed (%d)\n", 1448 + ret); 1449 + kfree(*id); 1450 + return ret; 1451 + } 1452 + 1453 + return 0; 1454 + } 1455 + 1430 1456 static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, 1431 1457 unsigned int dword11, void *buffer, size_t buflen, u32 *result) 1432 1458 { ··· 2129 2103 .pr_ops = &nvme_pr_ops, 2130 2104 }; 2131 2105 2132 - static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) 2106 + static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled) 2133 2107 { 2134 - unsigned long timeout = 2135 - ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; 2108 + unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies; 2136 2109 u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; 2137 2110 int ret; 2138 2111 ··· 2144 2119 usleep_range(1000, 2000); 2145 2120 if (fatal_signal_pending(current)) 2146 2121 return -EINTR; 2147 - if (time_after(jiffies, timeout)) { 2122 + if (time_after(jiffies, timeout_jiffies)) { 2148 2123 dev_err(ctrl->device, 2149 2124 "Device not ready; aborting %s, CSTS=0x%x\n", 2150 2125 enabled ? "initialisation" : "reset", csts); ··· 2175 2150 if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) 2176 2151 msleep(NVME_QUIRK_DELAY_AMOUNT); 2177 2152 2178 - return nvme_wait_ready(ctrl, ctrl->cap, false); 2153 + return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false); 2179 2154 } 2180 2155 EXPORT_SYMBOL_GPL(nvme_disable_ctrl); 2181 2156 2182 2157 int nvme_enable_ctrl(struct nvme_ctrl *ctrl) 2183 2158 { 2184 2159 unsigned dev_page_min; 2160 + u32 timeout; 2185 2161 int ret; 2186 2162 2187 2163 ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); ··· 2203 2177 ctrl->ctrl_config = NVME_CC_CSS_CSI; 2204 2178 else 2205 2179 ctrl->ctrl_config = NVME_CC_CSS_NVM; 2180 + 2181 + if (ctrl->cap & NVME_CAP_CRMS_CRWMS) { 2182 + u32 crto; 2183 + 2184 + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto); 2185 + if (ret) { 2186 + dev_err(ctrl->device, "Reading CRTO failed (%d)\n", 2187 + ret); 2188 + return ret; 2189 + } 2190 + 2191 + if (ctrl->cap & NVME_CAP_CRMS_CRIMS) { 2192 + ctrl->ctrl_config |= NVME_CC_CRIME; 2193 + timeout = NVME_CRTO_CRIMT(crto); 2194 + } else { 2195 + timeout = NVME_CRTO_CRWMT(crto); 2196 + } 2197 + } else { 2198 + timeout = NVME_CAP_TIMEOUT(ctrl->cap); 2199 + } 2200 + 2206 2201 ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; 2207 2202 ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; 2208 2203 ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; ··· 2232 2185 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); 2233 2186 if (ret) 2234 2187 return ret; 2235 - return nvme_wait_ready(ctrl, ctrl->cap, true); 2188 + return nvme_wait_ready(ctrl, timeout, true); 2236 2189 } 2237 2190 EXPORT_SYMBOL_GPL(nvme_enable_ctrl); 2238 2191 ··· 4139 4092 static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) 4140 4093 { 4141 4094 struct nvme_ns_ids ids = { }; 4095 + struct nvme_id_ns_cs_indep *id; 4142 4096 struct nvme_ns *ns; 4097 + bool ready = true; 4143 4098 4144 4099 if (nvme_identify_ns_descs(ctrl, nsid, &ids)) 4100 + return; 4101 + 4102 + /* 4103 + * Check if the namespace is ready. If not ignore it, we will get an 4104 + * AEN once it becomes ready and restart the scan. 4105 + */ 4106 + if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) && 4107 + !nvme_identify_ns_cs_indep(ctrl, nsid, &id)) { 4108 + ready = id->nstat & NVME_NSTAT_NRDY; 4109 + kfree(id); 4110 + } 4111 + 4112 + if (!ready) 4145 4113 return; 4146 4114 4147 4115 ns = nvme_find_get_ns(ctrl, nsid); ··· 4903 4841 BUILD_BUG_ON(sizeof(struct nvme_command) != 64); 4904 4842 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); 4905 4843 BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); 4844 + BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) != 4845 + NVME_IDENTIFY_DATA_SIZE); 4906 4846 BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE); 4907 4847 BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE); 4908 4848 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
+31
include/linux/nvme.h
··· 137 137 NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory 138 138 * Space Control 139 139 */ 140 + NVME_REG_CRTO = 0x0068, /* Controller Ready Timeouts */ 140 141 NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ 141 142 NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ 142 143 NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ ··· 161 160 162 161 #define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) 163 162 #define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) 163 + 164 + #define NVME_CRTO_CRIMT(crto) ((crto) >> 16) 165 + #define NVME_CRTO_CRWMT(crto) ((crto) & 0xffff) 164 166 165 167 enum { 166 168 NVME_CMBSZ_SQS = 1 << 0, ··· 208 204 NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, 209 205 NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, 210 206 NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, 207 + NVME_CC_CRIME = 1 << 24, 211 208 }; 212 209 213 210 enum { ··· 230 225 enum { 231 226 NVME_CAP_CSS_NVM = 1 << 0, 232 227 NVME_CAP_CSS_CSI = 1 << 6, 228 + }; 229 + 230 + enum { 231 + NVME_CAP_CRMS_CRIMS = 1ULL << 59, 232 + NVME_CAP_CRMS_CRWMS = 1ULL << 60, 233 233 }; 234 234 235 235 struct nvme_id_power_state { ··· 424 414 __u8 vs[3712]; 425 415 }; 426 416 417 + /* I/O Command Set Independent Identify Namespace Data Structure */ 418 + struct nvme_id_ns_cs_indep { 419 + __u8 nsfeat; 420 + __u8 nmic; 421 + __u8 rescap; 422 + __u8 fpi; 423 + __le32 anagrpid; 424 + __u8 nsattr; 425 + __u8 rsvd9; 426 + __le16 nvmsetid; 427 + __le16 endgid; 428 + __u8 nstat; 429 + __u8 rsvd15[4081]; 430 + }; 431 + 427 432 struct nvme_zns_lbafe { 428 433 __le64 zsze; 429 434 __u8 zdes; ··· 503 478 NVME_ID_CNS_NS_DESC_LIST = 0x03, 504 479 NVME_ID_CNS_CS_NS = 0x05, 505 480 NVME_ID_CNS_CS_CTRL = 0x06, 481 + NVME_ID_CNS_NS_CS_INDEP = 0x08, 506 482 NVME_ID_CNS_NS_PRESENT_LIST = 0x10, 507 483 NVME_ID_CNS_NS_PRESENT = 0x11, 508 484 NVME_ID_CNS_CTRL_NS_LIST = 0x12, ··· 555 529 NVME_NS_DPS_PI_TYPE1 = 1, 556 530 NVME_NS_DPS_PI_TYPE2 = 2, 557 531 NVME_NS_DPS_PI_TYPE3 = 3, 532 + }; 533 + 534 + enum { 535 + NVME_NSTAT_NRDY = 1 << 0, 558 536 }; 559 537 560 538 enum { ··· 1622 1592 NVME_SC_NS_WRITE_PROTECTED = 0x20, 1623 1593 NVME_SC_CMD_INTERRUPTED = 0x21, 1624 1594 NVME_SC_TRANSIENT_TR_ERR = 0x22, 1595 + NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY = 0x24, 1625 1596 NVME_SC_INVALID_IO_CMD_SET = 0x2C, 1626 1597 1627 1598 NVME_SC_LBA_RANGE = 0x80,