Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nvme-6.13-2024-11-13' of git://git.infradead.org/nvme into for-6.13/block

Pull NVMe updates from Keith:

"nvme updates for Linux 6.13

- Use uring_cmd helper (Pavel)
- Host Memory Buffer allocation enhancements (Christoph)
- Target persistent reservation support (Guixin)
- Persistent reservation tracing (Guixen)
- NVMe 2.1 specification support (Keith)
- Rotational Meta Support (Matias, Wang, Keith)
- Volatile cache detection enhancment (Guixen)"

* tag 'nvme-6.13-2024-11-13' of git://git.infradead.org/nvme: (22 commits)
nvmet: add tracing of reservation commands
nvme: parse reservation commands's action and rtype to string
nvmet: report ns's vwc not present
nvme: check ns's volatile write cache not present
nvme: add rotational support
nvme: use command set independent id ns if available
nvmet: support for csi identify ns
nvmet: implement rotational media information log
nvmet: implement endurance groups
nvmet: declare 2.1 version compliance
nvmet: implement crto property
nvmet: implement supported features log
nvmet: implement supported log pages
nvmet: implement active command set ns list
nvmet: implement id ns for nvm command set
nvmet: support reservation feature
nvme: add reservation command's defines
nvme-core: remove repeated wq flags
nvmet: make nvmet_wq visible in sysfs
nvme-pci: use dma_alloc_noncontigous if possible
...

+1968 -52
+19 -10
drivers/nvme/host/core.c
··· 42 42 bool is_readonly; 43 43 bool is_ready; 44 44 bool is_removed; 45 + bool is_rotational; 46 + bool no_vwc; 45 47 }; 46 48 47 49 unsigned int admin_timeout = 60; ··· 1617 1615 info->is_shared = id->nmic & NVME_NS_NMIC_SHARED; 1618 1616 info->is_readonly = id->nsattr & NVME_NS_ATTR_RO; 1619 1617 info->is_ready = id->nstat & NVME_NSTAT_NRDY; 1618 + info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL; 1619 + info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT; 1620 1620 } 1621 1621 kfree(id); 1622 1622 return ret; ··· 2161 2157 ns->head->ids.csi == NVME_CSI_ZNS) 2162 2158 nvme_update_zone_info(ns, &lim, &zi); 2163 2159 2164 - if (ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT) 2160 + if ((ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT) && !info->no_vwc) 2165 2161 lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA; 2166 2162 else 2167 2163 lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA); 2164 + 2165 + if (info->is_rotational) 2166 + lim.features |= BLK_FEAT_ROTATIONAL; 2168 2167 2169 2168 /* 2170 2169 * Register a metadata profile for PI, or the plain non-integrity NVMe ··· 3615 3608 head->ns_id = info->nsid; 3616 3609 head->ids = info->ids; 3617 3610 head->shared = info->is_shared; 3611 + head->rotational = info->is_rotational; 3618 3612 ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1); 3619 3613 ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE); 3620 3614 kref_init(&head->ref); ··· 3996 3988 { 3997 3989 struct nvme_ns_info info = { .nsid = nsid }; 3998 3990 struct nvme_ns *ns; 3999 - int ret; 3991 + int ret = 1; 4000 3992 4001 3993 if (nvme_identify_ns_descs(ctrl, &info)) 4002 3994 return; ··· 4013 4005 * set up a namespace. If not fall back to the legacy version. 4014 4006 */ 4015 4007 if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) || 4016 - (info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS)) 4008 + (info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS) || 4009 + ctrl->vs >= NVME_VS(2, 0, 0)) 4017 4010 ret = nvme_ns_info_from_id_cs_indep(ctrl, &info); 4018 - else 4011 + if (ret > 0) 4019 4012 ret = nvme_ns_info_from_identify(ctrl, &info); 4020 4013 4021 4014 if (info.is_removed) ··· 5015 5006 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE); 5016 5007 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); 5017 5008 BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); 5009 + BUILD_BUG_ON(sizeof(struct nvme_endurance_group_log) != 512); 5010 + BUILD_BUG_ON(sizeof(struct nvme_rotational_media_log) != 512); 5018 5011 BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); 5019 5012 BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64); 5020 5013 BUILD_BUG_ON(sizeof(struct nvme_feat_host_behavior) != 512); ··· 5025 5014 5026 5015 static int __init nvme_core_init(void) 5027 5016 { 5017 + unsigned int wq_flags = WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS; 5028 5018 int result = -ENOMEM; 5029 5019 5030 5020 _nvme_check_size(); 5031 5021 5032 - nvme_wq = alloc_workqueue("nvme-wq", 5033 - WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); 5022 + nvme_wq = alloc_workqueue("nvme-wq", wq_flags, 0); 5034 5023 if (!nvme_wq) 5035 5024 goto out; 5036 5025 5037 - nvme_reset_wq = alloc_workqueue("nvme-reset-wq", 5038 - WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); 5026 + nvme_reset_wq = alloc_workqueue("nvme-reset-wq", wq_flags, 0); 5039 5027 if (!nvme_reset_wq) 5040 5028 goto destroy_wq; 5041 5029 5042 - nvme_delete_wq = alloc_workqueue("nvme-delete-wq", 5043 - WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); 5030 + nvme_delete_wq = alloc_workqueue("nvme-delete-wq", wq_flags, 0); 5044 5031 if (!nvme_delete_wq) 5045 5032 goto destroy_reset_wq; 5046 5033
+1 -3
drivers/nvme/host/ioctl.c
··· 401 401 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 402 402 struct io_uring_cmd *ioucmd) 403 403 { 404 - return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; 404 + return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu); 405 405 } 406 406 407 407 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, ··· 630 630 { 631 631 struct nvme_ctrl *ctrl = ns->ctrl; 632 632 int ret; 633 - 634 - BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); 635 633 636 634 ret = nvme_uring_cmd_checks(issue_flags); 637 635 if (ret)
+1
drivers/nvme/host/nvme.h
··· 474 474 struct list_head entry; 475 475 struct kref ref; 476 476 bool shared; 477 + bool rotational; 477 478 bool passthru_err_log_enabled; 478 479 struct nvme_effects_log *effects; 479 480 u64 nuse;
+62 -12
drivers/nvme/host/pci.c
··· 141 141 struct nvme_ctrl ctrl; 142 142 u32 last_ps; 143 143 bool hmb; 144 + struct sg_table *hmb_sgt; 144 145 145 146 mempool_t *iod_mempool; 146 147 ··· 154 153 /* host memory buffer support: */ 155 154 u64 host_mem_size; 156 155 u32 nr_host_mem_descs; 156 + u32 host_mem_descs_size; 157 157 dma_addr_t host_mem_descs_dma; 158 158 struct nvme_host_mem_buf_desc *host_mem_descs; 159 159 void **host_mem_desc_bufs; ··· 1953 1951 return ret; 1954 1952 } 1955 1953 1956 - static void nvme_free_host_mem(struct nvme_dev *dev) 1954 + static void nvme_free_host_mem_multi(struct nvme_dev *dev) 1957 1955 { 1958 1956 int i; 1959 1957 ··· 1968 1966 1969 1967 kfree(dev->host_mem_desc_bufs); 1970 1968 dev->host_mem_desc_bufs = NULL; 1971 - dma_free_coherent(dev->dev, 1972 - dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), 1969 + } 1970 + 1971 + static void nvme_free_host_mem(struct nvme_dev *dev) 1972 + { 1973 + if (dev->hmb_sgt) 1974 + dma_free_noncontiguous(dev->dev, dev->host_mem_size, 1975 + dev->hmb_sgt, DMA_BIDIRECTIONAL); 1976 + else 1977 + nvme_free_host_mem_multi(dev); 1978 + 1979 + dma_free_coherent(dev->dev, dev->host_mem_descs_size, 1973 1980 dev->host_mem_descs, dev->host_mem_descs_dma); 1974 1981 dev->host_mem_descs = NULL; 1982 + dev->host_mem_descs_size = 0; 1975 1983 dev->nr_host_mem_descs = 0; 1976 1984 } 1977 1985 1978 - static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, 1986 + static int nvme_alloc_host_mem_single(struct nvme_dev *dev, u64 size) 1987 + { 1988 + dev->hmb_sgt = dma_alloc_noncontiguous(dev->dev, size, 1989 + DMA_BIDIRECTIONAL, GFP_KERNEL, 0); 1990 + if (!dev->hmb_sgt) 1991 + return -ENOMEM; 1992 + 1993 + dev->host_mem_descs = dma_alloc_coherent(dev->dev, 1994 + sizeof(*dev->host_mem_descs), &dev->host_mem_descs_dma, 1995 + GFP_KERNEL); 1996 + if (!dev->host_mem_descs) { 1997 + dma_free_noncontiguous(dev->dev, dev->host_mem_size, 1998 + dev->hmb_sgt, DMA_BIDIRECTIONAL); 1999 + dev->hmb_sgt = NULL; 2000 + return -ENOMEM; 2001 + } 2002 + dev->host_mem_size = size; 2003 + dev->host_mem_descs_size = sizeof(*dev->host_mem_descs); 2004 + dev->nr_host_mem_descs = 1; 2005 + 2006 + dev->host_mem_descs[0].addr = 2007 + cpu_to_le64(dev->hmb_sgt->sgl->dma_address); 2008 + dev->host_mem_descs[0].size = cpu_to_le32(size / NVME_CTRL_PAGE_SIZE); 2009 + return 0; 2010 + } 2011 + 2012 + static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred, 1979 2013 u32 chunk_size) 1980 2014 { 1981 2015 struct nvme_host_mem_buf_desc *descs; 1982 - u32 max_entries, len; 2016 + u32 max_entries, len, descs_size; 1983 2017 dma_addr_t descs_dma; 1984 2018 int i = 0; 1985 2019 void **bufs; ··· 2028 1990 if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries) 2029 1991 max_entries = dev->ctrl.hmmaxd; 2030 1992 2031 - descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs), 2032 - &descs_dma, GFP_KERNEL); 1993 + descs_size = max_entries * sizeof(*descs); 1994 + descs = dma_alloc_coherent(dev->dev, descs_size, &descs_dma, 1995 + GFP_KERNEL); 2033 1996 if (!descs) 2034 1997 goto out; 2035 1998 ··· 2059 2020 dev->host_mem_size = size; 2060 2021 dev->host_mem_descs = descs; 2061 2022 dev->host_mem_descs_dma = descs_dma; 2023 + dev->host_mem_descs_size = descs_size; 2062 2024 dev->host_mem_desc_bufs = bufs; 2063 2025 return 0; 2064 2026 ··· 2074 2034 2075 2035 kfree(bufs); 2076 2036 out_free_descs: 2077 - dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs, 2078 - descs_dma); 2037 + dma_free_coherent(dev->dev, descs_size, descs, descs_dma); 2079 2038 out: 2080 2039 dev->host_mem_descs = NULL; 2081 2040 return -ENOMEM; ··· 2086 2047 u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2); 2087 2048 u64 chunk_size; 2088 2049 2050 + /* 2051 + * If there is an IOMMU that can merge pages, try a virtually 2052 + * non-contiguous allocation for a single segment first. 2053 + */ 2054 + if (!(PAGE_SIZE & dma_get_merge_boundary(dev->dev))) { 2055 + if (!nvme_alloc_host_mem_single(dev, preferred)) 2056 + return 0; 2057 + } 2058 + 2089 2059 /* start big and work our way down */ 2090 2060 for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) { 2091 - if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) { 2061 + if (!nvme_alloc_host_mem_multi(dev, preferred, chunk_size)) { 2092 2062 if (!min || dev->host_mem_size >= min) 2093 2063 return 0; 2094 2064 nvme_free_host_mem(dev); ··· 2145 2097 } 2146 2098 2147 2099 dev_info(dev->ctrl.device, 2148 - "allocated %lld MiB host memory buffer.\n", 2149 - dev->host_mem_size >> ilog2(SZ_1M)); 2100 + "allocated %lld MiB host memory buffer (%u segment%s).\n", 2101 + dev->host_mem_size >> ilog2(SZ_1M), 2102 + dev->nr_host_mem_descs, 2103 + str_plural(dev->nr_host_mem_descs)); 2150 2104 } 2151 2105 2152 2106 ret = nvme_set_host_mem(dev, enable_bits);
+52 -6
drivers/nvme/host/trace.c
··· 228 228 229 229 static const char *nvme_trace_resv_reg(struct trace_seq *p, u8 *cdw10) 230 230 { 231 + static const char * const rrega_strs[] = { 232 + [0x00] = "register", 233 + [0x01] = "unregister", 234 + [0x02] = "replace", 235 + }; 231 236 const char *ret = trace_seq_buffer_ptr(p); 232 237 u8 rrega = cdw10[0] & 0x7; 233 238 u8 iekey = (cdw10[0] >> 3) & 0x1; 234 239 u8 ptpl = (cdw10[3] >> 6) & 0x3; 240 + const char *rrega_str; 235 241 236 - trace_seq_printf(p, "rrega=%u, iekey=%u, ptpl=%u", 237 - rrega, iekey, ptpl); 242 + if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega]) 243 + rrega_str = rrega_strs[rrega]; 244 + else 245 + rrega_str = "reserved"; 246 + 247 + trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u", 248 + rrega, rrega_str, iekey, ptpl); 238 249 trace_seq_putc(p, 0); 239 250 240 251 return ret; 241 252 } 242 253 254 + static const char * const rtype_strs[] = { 255 + [0x00] = "reserved", 256 + [0x01] = "write exclusive", 257 + [0x02] = "exclusive access", 258 + [0x03] = "write exclusive registrants only", 259 + [0x04] = "exclusive access registrants only", 260 + [0x05] = "write exclusive all registrants", 261 + [0x06] = "exclusive access all registrants", 262 + }; 263 + 243 264 static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10) 244 265 { 266 + static const char * const racqa_strs[] = { 267 + [0x00] = "acquire", 268 + [0x01] = "preempt", 269 + [0x02] = "preempt and abort", 270 + }; 245 271 const char *ret = trace_seq_buffer_ptr(p); 246 272 u8 racqa = cdw10[0] & 0x7; 247 273 u8 iekey = (cdw10[0] >> 3) & 0x1; 248 274 u8 rtype = cdw10[1]; 275 + const char *racqa_str = "reserved"; 276 + const char *rtype_str = "reserved"; 249 277 250 - trace_seq_printf(p, "racqa=%u, iekey=%u, rtype=%u", 251 - racqa, iekey, rtype); 278 + if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa]) 279 + racqa_str = racqa_strs[racqa]; 280 + 281 + if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype]) 282 + rtype_str = rtype_strs[rtype]; 283 + 284 + trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s", 285 + racqa, racqa_str, iekey, rtype, rtype_str); 252 286 trace_seq_putc(p, 0); 253 287 254 288 return ret; ··· 290 256 291 257 static const char *nvme_trace_resv_rel(struct trace_seq *p, u8 *cdw10) 292 258 { 259 + static const char * const rrela_strs[] = { 260 + [0x00] = "release", 261 + [0x01] = "clear", 262 + }; 293 263 const char *ret = trace_seq_buffer_ptr(p); 294 264 u8 rrela = cdw10[0] & 0x7; 295 265 u8 iekey = (cdw10[0] >> 3) & 0x1; 296 266 u8 rtype = cdw10[1]; 267 + const char *rrela_str = "reserved"; 268 + const char *rtype_str = "reserved"; 297 269 298 - trace_seq_printf(p, "rrela=%u, iekey=%u, rtype=%u", 299 - rrela, iekey, rtype); 270 + if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela]) 271 + rrela_str = rrela_strs[rrela]; 272 + 273 + if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype]) 274 + rtype_str = rtype_strs[rtype]; 275 + 276 + trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s", 277 + rrela, rrela_str, iekey, rtype, rtype_str); 300 278 trace_seq_putc(p, 0); 301 279 302 280 return ret;
+1 -1
drivers/nvme/target/Makefile
··· 10 10 obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o 11 11 12 12 nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \ 13 - discovery.o io-cmd-file.o io-cmd-bdev.o 13 + discovery.o io-cmd-file.o io-cmd-bdev.o pr.o 14 14 nvmet-$(CONFIG_NVME_TARGET_DEBUGFS) += debugfs.o 15 15 nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o 16 16 nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
+283 -5
drivers/nvme/target/admin-cmd.c
··· 71 71 nvmet_req_complete(req, 0); 72 72 } 73 73 74 + static void nvmet_execute_get_supported_log_pages(struct nvmet_req *req) 75 + { 76 + struct nvme_supported_log *logs; 77 + u16 status; 78 + 79 + logs = kzalloc(sizeof(*logs), GFP_KERNEL); 80 + if (!logs) { 81 + status = NVME_SC_INTERNAL; 82 + goto out; 83 + } 84 + 85 + logs->lids[NVME_LOG_SUPPORTED] = cpu_to_le32(NVME_LIDS_LSUPP); 86 + logs->lids[NVME_LOG_ERROR] = cpu_to_le32(NVME_LIDS_LSUPP); 87 + logs->lids[NVME_LOG_SMART] = cpu_to_le32(NVME_LIDS_LSUPP); 88 + logs->lids[NVME_LOG_FW_SLOT] = cpu_to_le32(NVME_LIDS_LSUPP); 89 + logs->lids[NVME_LOG_CHANGED_NS] = cpu_to_le32(NVME_LIDS_LSUPP); 90 + logs->lids[NVME_LOG_CMD_EFFECTS] = cpu_to_le32(NVME_LIDS_LSUPP); 91 + logs->lids[NVME_LOG_ENDURANCE_GROUP] = cpu_to_le32(NVME_LIDS_LSUPP); 92 + logs->lids[NVME_LOG_ANA] = cpu_to_le32(NVME_LIDS_LSUPP); 93 + logs->lids[NVME_LOG_FEATURES] = cpu_to_le32(NVME_LIDS_LSUPP); 94 + logs->lids[NVME_LOG_RMI] = cpu_to_le32(NVME_LIDS_LSUPP); 95 + logs->lids[NVME_LOG_RESERVATION] = cpu_to_le32(NVME_LIDS_LSUPP); 96 + 97 + status = nvmet_copy_to_sgl(req, 0, logs, sizeof(*logs)); 98 + kfree(logs); 99 + out: 100 + nvmet_req_complete(req, status); 101 + } 102 + 74 103 static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, 75 104 struct nvme_smart_log *slog) 76 105 { ··· 159 130 return NVME_SC_SUCCESS; 160 131 } 161 132 133 + static void nvmet_execute_get_log_page_rmi(struct nvmet_req *req) 134 + { 135 + struct nvme_rotational_media_log *log; 136 + struct gendisk *disk; 137 + u16 status; 138 + 139 + req->cmd->common.nsid = cpu_to_le32(le16_to_cpu( 140 + req->cmd->get_log_page.lsi)); 141 + status = nvmet_req_find_ns(req); 142 + if (status) 143 + goto out; 144 + 145 + if (!req->ns->bdev || bdev_nonrot(req->ns->bdev)) { 146 + status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 147 + goto out; 148 + } 149 + 150 + if (req->transfer_len != sizeof(*log)) { 151 + status = NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR; 152 + goto out; 153 + } 154 + 155 + log = kzalloc(sizeof(*log), GFP_KERNEL); 156 + if (!log) 157 + goto out; 158 + 159 + log->endgid = req->cmd->get_log_page.lsi; 160 + disk = req->ns->bdev->bd_disk; 161 + if (disk && disk->ia_ranges) 162 + log->numa = cpu_to_le16(disk->ia_ranges->nr_ia_ranges); 163 + else 164 + log->numa = cpu_to_le16(1); 165 + 166 + status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); 167 + kfree(log); 168 + out: 169 + nvmet_req_complete(req, status); 170 + } 171 + 162 172 static void nvmet_execute_get_log_page_smart(struct nvmet_req *req) 163 173 { 164 174 struct nvme_smart_log *log; ··· 244 176 log->iocs[nvme_cmd_read] = 245 177 log->iocs[nvme_cmd_flush] = 246 178 log->iocs[nvme_cmd_dsm] = 179 + log->iocs[nvme_cmd_resv_acquire] = 180 + log->iocs[nvme_cmd_resv_register] = 181 + log->iocs[nvme_cmd_resv_release] = 182 + log->iocs[nvme_cmd_resv_report] = 247 183 cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); 248 184 log->iocs[nvme_cmd_write] = 249 185 log->iocs[nvme_cmd_write_zeroes] = ··· 344 272 return struct_size(desc, nsids, count); 345 273 } 346 274 275 + static void nvmet_execute_get_log_page_endgrp(struct nvmet_req *req) 276 + { 277 + u64 host_reads, host_writes, data_units_read, data_units_written; 278 + struct nvme_endurance_group_log *log; 279 + u16 status; 280 + 281 + /* 282 + * The target driver emulates each endurance group as its own 283 + * namespace, reusing the nsid as the endurance group identifier. 284 + */ 285 + req->cmd->common.nsid = cpu_to_le32(le16_to_cpu( 286 + req->cmd->get_log_page.lsi)); 287 + status = nvmet_req_find_ns(req); 288 + if (status) 289 + goto out; 290 + 291 + log = kzalloc(sizeof(*log), GFP_KERNEL); 292 + if (!log) { 293 + status = NVME_SC_INTERNAL; 294 + goto out; 295 + } 296 + 297 + if (!req->ns->bdev) 298 + goto copy; 299 + 300 + host_reads = part_stat_read(req->ns->bdev, ios[READ]); 301 + data_units_read = 302 + DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[READ]), 1000); 303 + host_writes = part_stat_read(req->ns->bdev, ios[WRITE]); 304 + data_units_written = 305 + DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[WRITE]), 1000); 306 + 307 + put_unaligned_le64(host_reads, &log->hrc[0]); 308 + put_unaligned_le64(data_units_read, &log->dur[0]); 309 + put_unaligned_le64(host_writes, &log->hwc[0]); 310 + put_unaligned_le64(data_units_written, &log->duw[0]); 311 + copy: 312 + status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); 313 + kfree(log); 314 + out: 315 + nvmet_req_complete(req, status); 316 + } 317 + 347 318 static void nvmet_execute_get_log_page_ana(struct nvmet_req *req) 348 319 { 349 320 struct nvme_ana_rsp_hdr hdr = { 0, }; ··· 432 317 nvmet_req_complete(req, status); 433 318 } 434 319 320 + static void nvmet_execute_get_log_page_features(struct nvmet_req *req) 321 + { 322 + struct nvme_supported_features_log *features; 323 + u16 status; 324 + 325 + features = kzalloc(sizeof(*features), GFP_KERNEL); 326 + if (!features) { 327 + status = NVME_SC_INTERNAL; 328 + goto out; 329 + } 330 + 331 + features->fis[NVME_FEAT_NUM_QUEUES] = 332 + cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE); 333 + features->fis[NVME_FEAT_KATO] = 334 + cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE); 335 + features->fis[NVME_FEAT_ASYNC_EVENT] = 336 + cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE); 337 + features->fis[NVME_FEAT_HOST_ID] = 338 + cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE); 339 + features->fis[NVME_FEAT_WRITE_PROTECT] = 340 + cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_NSCPE); 341 + features->fis[NVME_FEAT_RESV_MASK] = 342 + cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_NSCPE); 343 + 344 + status = nvmet_copy_to_sgl(req, 0, features, sizeof(*features)); 345 + kfree(features); 346 + out: 347 + nvmet_req_complete(req, status); 348 + } 349 + 435 350 static void nvmet_execute_get_log_page(struct nvmet_req *req) 436 351 { 437 352 if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd))) 438 353 return; 439 354 440 355 switch (req->cmd->get_log_page.lid) { 356 + case NVME_LOG_SUPPORTED: 357 + return nvmet_execute_get_supported_log_pages(req); 441 358 case NVME_LOG_ERROR: 442 359 return nvmet_execute_get_log_page_error(req); 443 360 case NVME_LOG_SMART: ··· 485 338 return nvmet_execute_get_log_changed_ns(req); 486 339 case NVME_LOG_CMD_EFFECTS: 487 340 return nvmet_execute_get_log_cmd_effects_ns(req); 341 + case NVME_LOG_ENDURANCE_GROUP: 342 + return nvmet_execute_get_log_page_endgrp(req); 488 343 case NVME_LOG_ANA: 489 344 return nvmet_execute_get_log_page_ana(req); 345 + case NVME_LOG_FEATURES: 346 + return nvmet_execute_get_log_page_features(req); 347 + case NVME_LOG_RMI: 348 + return nvmet_execute_get_log_page_rmi(req); 349 + case NVME_LOG_RESERVATION: 350 + return nvmet_execute_get_log_page_resv(req); 490 351 } 491 352 pr_debug("unhandled lid %d on qid %d\n", 492 353 req->cmd->get_log_page.lid, req->sq->qid); ··· 588 433 id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES); 589 434 id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES); 590 435 id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM | 591 - NVME_CTRL_ONCS_WRITE_ZEROES); 436 + NVME_CTRL_ONCS_WRITE_ZEROES | 437 + NVME_CTRL_ONCS_RESERVATIONS); 592 438 593 439 /* XXX: don't report vwc if the underlying device is write through */ 594 440 id->vwc = NVME_CTRL_VWC_PRESENT; ··· 622 466 id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16); 623 467 624 468 id->msdbd = ctrl->ops->msdbd; 469 + 470 + /* 471 + * Endurance group identifier is 16 bits, so we can't let namespaces 472 + * overflow that since we reuse the nsid 473 + */ 474 + BUILD_BUG_ON(NVMET_MAX_NAMESPACES > USHRT_MAX); 475 + id->endgidmax = cpu_to_le16(NVMET_MAX_NAMESPACES); 625 476 626 477 id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); 627 478 id->anatt = 10; /* random value */ ··· 714 551 id->nmic = NVME_NS_NMIC_SHARED; 715 552 id->anagrpid = cpu_to_le32(req->ns->anagrpid); 716 553 554 + if (req->ns->pr.enable) 555 + id->rescap = NVME_PR_SUPPORT_WRITE_EXCLUSIVE | 556 + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS | 557 + NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY | 558 + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY | 559 + NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS | 560 + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS | 561 + NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF; 562 + 563 + /* 564 + * Since we don't know any better, every namespace is its own endurance 565 + * group. 566 + */ 567 + id->endgid = cpu_to_le16(req->ns->nsid); 568 + 717 569 memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid)); 718 570 719 571 id->lbaf[0].ds = req->ns->blksize_shift; ··· 754 576 nvmet_req_complete(req, status); 755 577 } 756 578 757 - static void nvmet_execute_identify_nslist(struct nvmet_req *req) 579 + static void nvmet_execute_identify_endgrp_list(struct nvmet_req *req) 580 + { 581 + u16 min_endgid = le16_to_cpu(req->cmd->identify.cnssid); 582 + static const int buf_size = NVME_IDENTIFY_DATA_SIZE; 583 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 584 + struct nvmet_ns *ns; 585 + unsigned long idx; 586 + __le16 *list; 587 + u16 status; 588 + int i = 1; 589 + 590 + list = kzalloc(buf_size, GFP_KERNEL); 591 + if (!list) { 592 + status = NVME_SC_INTERNAL; 593 + goto out; 594 + } 595 + 596 + xa_for_each(&ctrl->subsys->namespaces, idx, ns) { 597 + if (ns->nsid <= min_endgid) 598 + continue; 599 + 600 + list[i++] = cpu_to_le16(ns->nsid); 601 + if (i == buf_size / sizeof(__le16)) 602 + break; 603 + } 604 + 605 + list[0] = cpu_to_le16(i - 1); 606 + status = nvmet_copy_to_sgl(req, 0, list, buf_size); 607 + kfree(list); 608 + out: 609 + nvmet_req_complete(req, status); 610 + } 611 + 612 + static void nvmet_execute_identify_nslist(struct nvmet_req *req, bool match_css) 758 613 { 759 614 static const int buf_size = NVME_IDENTIFY_DATA_SIZE; 760 615 struct nvmet_ctrl *ctrl = req->sq->ctrl; ··· 816 605 817 606 xa_for_each(&ctrl->subsys->namespaces, idx, ns) { 818 607 if (ns->nsid <= min_nsid) 608 + continue; 609 + if (match_css && req->ns->csi != req->cmd->identify.csi) 819 610 continue; 820 611 list[i++] = cpu_to_le32(ns->nsid); 821 612 if (i == buf_size / sizeof(__le32)) ··· 898 685 nvmet_zero_sgl(req, 0, sizeof(struct nvme_id_ctrl_nvm))); 899 686 } 900 687 688 + static void nvme_execute_identify_ns_nvm(struct nvmet_req *req) 689 + { 690 + u16 status; 691 + 692 + status = nvmet_req_find_ns(req); 693 + if (status) 694 + goto out; 695 + 696 + status = nvmet_copy_to_sgl(req, 0, ZERO_PAGE(0), 697 + NVME_IDENTIFY_DATA_SIZE); 698 + out: 699 + nvmet_req_complete(req, status); 700 + } 701 + 702 + static void nvmet_execute_id_cs_indep(struct nvmet_req *req) 703 + { 704 + struct nvme_id_ns_cs_indep *id; 705 + u16 status; 706 + 707 + status = nvmet_req_find_ns(req); 708 + if (status) 709 + goto out; 710 + 711 + id = kzalloc(sizeof(*id), GFP_KERNEL); 712 + if (!id) { 713 + status = NVME_SC_INTERNAL; 714 + goto out; 715 + } 716 + 717 + id->nstat = NVME_NSTAT_NRDY; 718 + id->anagrpid = cpu_to_le32(req->ns->anagrpid); 719 + id->nmic = NVME_NS_NMIC_SHARED; 720 + if (req->ns->readonly) 721 + id->nsattr |= NVME_NS_ATTR_RO; 722 + if (req->ns->bdev && !bdev_nonrot(req->ns->bdev)) 723 + id->nsfeat |= NVME_NS_ROTATIONAL; 724 + /* 725 + * We need flush command to flush the file's metadata, 726 + * so report supporting vwc if backend is file, even 727 + * though buffered_io is disable. 728 + */ 729 + if (req->ns->bdev && !bdev_write_cache(req->ns->bdev)) 730 + id->nsfeat |= NVME_NS_VWC_NOT_PRESENT; 731 + 732 + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); 733 + kfree(id); 734 + out: 735 + nvmet_req_complete(req, status); 736 + } 737 + 901 738 static void nvmet_execute_identify(struct nvmet_req *req) 902 739 { 903 740 if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) ··· 961 698 nvmet_execute_identify_ctrl(req); 962 699 return; 963 700 case NVME_ID_CNS_NS_ACTIVE_LIST: 964 - nvmet_execute_identify_nslist(req); 701 + nvmet_execute_identify_nslist(req, false); 965 702 return; 966 703 case NVME_ID_CNS_NS_DESC_LIST: 967 704 nvmet_execute_identify_desclist(req); ··· 969 706 case NVME_ID_CNS_CS_NS: 970 707 switch (req->cmd->identify.csi) { 971 708 case NVME_CSI_NVM: 972 - /* Not supported */ 973 - break; 709 + nvme_execute_identify_ns_nvm(req); 710 + return; 974 711 case NVME_CSI_ZNS: 975 712 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { 976 713 nvmet_execute_identify_ns_zns(req); ··· 992 729 break; 993 730 } 994 731 break; 732 + case NVME_ID_CNS_NS_ACTIVE_LIST_CS: 733 + nvmet_execute_identify_nslist(req, true); 734 + return; 735 + case NVME_ID_CNS_NS_CS_INDEP: 736 + nvmet_execute_id_cs_indep(req); 737 + return; 738 + case NVME_ID_CNS_ENDGRP_LIST: 739 + nvmet_execute_identify_endgrp_list(req); 740 + return; 995 741 } 996 742 997 743 pr_debug("unhandled identify cns %d on qid %d\n", ··· 1133 861 case NVME_FEAT_WRITE_PROTECT: 1134 862 status = nvmet_set_feat_write_protect(req); 1135 863 break; 864 + case NVME_FEAT_RESV_MASK: 865 + status = nvmet_set_feat_resv_notif_mask(req, cdw11); 866 + break; 1136 867 default: 1137 868 req->error_loc = offsetof(struct nvme_common_command, cdw10); 1138 869 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; ··· 1233 958 break; 1234 959 case NVME_FEAT_WRITE_PROTECT: 1235 960 status = nvmet_get_feat_write_protect(req); 961 + break; 962 + case NVME_FEAT_RESV_MASK: 963 + status = nvmet_get_feat_resv_notif_mask(req); 1236 964 break; 1237 965 default: 1238 966 req->error_loc =
+27
drivers/nvme/target/configfs.c
··· 769 769 770 770 CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size); 771 771 772 + static ssize_t nvmet_ns_resv_enable_show(struct config_item *item, char *page) 773 + { 774 + return sysfs_emit(page, "%d\n", to_nvmet_ns(item)->pr.enable); 775 + } 776 + 777 + static ssize_t nvmet_ns_resv_enable_store(struct config_item *item, 778 + const char *page, size_t count) 779 + { 780 + struct nvmet_ns *ns = to_nvmet_ns(item); 781 + bool val; 782 + 783 + if (kstrtobool(page, &val)) 784 + return -EINVAL; 785 + 786 + mutex_lock(&ns->subsys->lock); 787 + if (ns->enabled) { 788 + pr_err("the ns:%d is already enabled.\n", ns->nsid); 789 + mutex_unlock(&ns->subsys->lock); 790 + return -EINVAL; 791 + } 792 + ns->pr.enable = val; 793 + mutex_unlock(&ns->subsys->lock); 794 + return count; 795 + } 796 + CONFIGFS_ATTR(nvmet_ns_, resv_enable); 797 + 772 798 static struct configfs_attribute *nvmet_ns_attrs[] = { 773 799 &nvmet_ns_attr_device_path, 774 800 &nvmet_ns_attr_device_nguid, ··· 803 777 &nvmet_ns_attr_enable, 804 778 &nvmet_ns_attr_buffered_io, 805 779 &nvmet_ns_attr_revalidate_size, 780 + &nvmet_ns_attr_resv_enable, 806 781 #ifdef CONFIG_PCI_P2PDMA 807 782 &nvmet_ns_attr_p2pmem, 808 783 #endif
+57 -7
drivers/nvme/target/core.c
··· 611 611 if (ret) 612 612 goto out_restore_subsys_maxnsid; 613 613 614 + if (ns->pr.enable) { 615 + ret = nvmet_pr_init_ns(ns); 616 + if (ret) 617 + goto out_remove_from_subsys; 618 + } 619 + 614 620 subsys->nr_namespaces++; 615 621 616 622 nvmet_ns_changed(subsys, ns->nsid); ··· 626 620 mutex_unlock(&subsys->lock); 627 621 return ret; 628 622 623 + out_remove_from_subsys: 624 + xa_erase(&subsys->namespaces, ns->nsid); 629 625 out_restore_subsys_maxnsid: 630 626 subsys->max_nsid = nvmet_max_nsid(subsys); 631 627 percpu_ref_exit(&ns->ref); ··· 670 662 synchronize_rcu(); 671 663 wait_for_completion(&ns->disable_done); 672 664 percpu_ref_exit(&ns->ref); 665 + 666 + if (ns->pr.enable) 667 + nvmet_pr_exit_ns(ns); 673 668 674 669 mutex_lock(&subsys->lock); 675 670 ··· 765 754 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 766 755 { 767 756 struct nvmet_ns *ns = req->ns; 757 + struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref; 768 758 769 759 if (!req->sq->sqhd_disabled) 770 760 nvmet_update_sq_head(req); ··· 778 766 trace_nvmet_req_complete(req); 779 767 780 768 req->ops->queue_response(req); 769 + 770 + if (pc_ref) 771 + nvmet_pr_put_ns_pc_ref(pc_ref); 781 772 if (ns) 782 773 nvmet_put_namespace(ns); 783 774 } ··· 944 929 return ret; 945 930 } 946 931 932 + if (req->ns->pr.enable) { 933 + ret = nvmet_parse_pr_cmd(req); 934 + if (!ret) 935 + return ret; 936 + } 937 + 947 938 switch (req->ns->csi) { 948 939 case NVME_CSI_NVM: 949 940 if (req->ns->file) 950 - return nvmet_file_parse_io_cmd(req); 951 - return nvmet_bdev_parse_io_cmd(req); 941 + ret = nvmet_file_parse_io_cmd(req); 942 + else 943 + ret = nvmet_bdev_parse_io_cmd(req); 944 + break; 952 945 case NVME_CSI_ZNS: 953 946 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) 954 - return nvmet_bdev_zns_parse_io_cmd(req); 955 - return NVME_SC_INVALID_IO_CMD_SET; 947 + ret = nvmet_bdev_zns_parse_io_cmd(req); 948 + else 949 + ret = NVME_SC_INVALID_IO_CMD_SET; 950 + break; 956 951 default: 957 - return NVME_SC_INVALID_IO_CMD_SET; 952 + ret = NVME_SC_INVALID_IO_CMD_SET; 958 953 } 954 + if (ret) 955 + return ret; 956 + 957 + if (req->ns->pr.enable) { 958 + ret = nvmet_pr_check_cmd_access(req); 959 + if (ret) 960 + return ret; 961 + 962 + ret = nvmet_pr_get_ns_pc_ref(req); 963 + } 964 + return ret; 959 965 } 960 966 961 967 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, ··· 1000 964 req->ns = NULL; 1001 965 req->error_loc = NVMET_NO_ERROR_LOC; 1002 966 req->error_slba = 0; 967 + req->pc_ref = NULL; 1003 968 1004 969 /* no support for fused commands yet */ 1005 970 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { ··· 1052 1015 void nvmet_req_uninit(struct nvmet_req *req) 1053 1016 { 1054 1017 percpu_ref_put(&req->sq->ref); 1018 + if (req->pc_ref) 1019 + nvmet_pr_put_ns_pc_ref(req->pc_ref); 1055 1020 if (req->ns) 1056 1021 nvmet_put_namespace(req->ns); 1057 1022 } ··· 1422 1383 } 1423 1384 1424 1385 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 1425 - struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 1386 + struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp, 1387 + uuid_t *hostid) 1426 1388 { 1427 1389 struct nvmet_subsys *subsys; 1428 1390 struct nvmet_ctrl *ctrl; ··· 1502 1462 } 1503 1463 ctrl->cntlid = ret; 1504 1464 1465 + uuid_copy(&ctrl->hostid, hostid); 1466 + 1505 1467 /* 1506 1468 * Discovery controllers may use some arbitrary high value 1507 1469 * in order to cleanup stale discovery sessions ··· 1520 1478 nvmet_start_keep_alive_timer(ctrl); 1521 1479 1522 1480 mutex_lock(&subsys->lock); 1481 + ret = nvmet_ctrl_init_pr(ctrl); 1482 + if (ret) 1483 + goto init_pr_fail; 1523 1484 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1524 1485 nvmet_setup_p2p_ns_map(ctrl, req); 1525 1486 nvmet_debugfs_ctrl_setup(ctrl); ··· 1531 1486 *ctrlp = ctrl; 1532 1487 return 0; 1533 1488 1489 + init_pr_fail: 1490 + mutex_unlock(&subsys->lock); 1491 + nvmet_stop_keep_alive_timer(ctrl); 1492 + ida_free(&cntlid_ida, ctrl->cntlid); 1534 1493 out_free_sqs: 1535 1494 kfree(ctrl->sqs); 1536 1495 out_free_changed_ns_list: ··· 1553 1504 struct nvmet_subsys *subsys = ctrl->subsys; 1554 1505 1555 1506 mutex_lock(&subsys->lock); 1507 + nvmet_ctrl_destroy_pr(ctrl); 1556 1508 nvmet_release_p2p_ns_map(ctrl); 1557 1509 list_del(&ctrl->subsys_entry); 1558 1510 mutex_unlock(&subsys->lock); ··· 1767 1717 goto out_free_zbd_work_queue; 1768 1718 1769 1719 nvmet_wq = alloc_workqueue("nvmet-wq", 1770 - WQ_MEM_RECLAIM | WQ_UNBOUND, 0); 1720 + WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0); 1771 1721 if (!nvmet_wq) 1772 1722 goto out_free_buffered_work_queue; 1773 1723
+4 -3
drivers/nvme/target/fabrics-cmd.c
··· 64 64 case NVME_REG_CSTS: 65 65 val = ctrl->csts; 66 66 break; 67 + case NVME_REG_CRTO: 68 + val = NVME_CAP_TIMEOUT(ctrl->csts); 69 + break; 67 70 default: 68 71 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 69 72 break; ··· 248 245 d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; 249 246 d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; 250 247 status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, 251 - le32_to_cpu(c->kato), &ctrl); 248 + le32_to_cpu(c->kato), &ctrl, &d->hostid); 252 249 if (status) 253 250 goto out; 254 - 255 - uuid_copy(&ctrl->hostid, &d->hostid); 256 251 257 252 dhchap_status = nvmet_setup_auth(ctrl); 258 253 if (dhchap_status) {
+65 -2
drivers/nvme/target/nvmet.h
··· 20 20 #include <linux/blkdev.h> 21 21 #include <linux/radix-tree.h> 22 22 #include <linux/t10-pi.h> 23 + #include <linux/kfifo.h> 23 24 24 - #define NVMET_DEFAULT_VS NVME_VS(1, 3, 0) 25 + #define NVMET_DEFAULT_VS NVME_VS(2, 1, 0) 25 26 26 27 #define NVMET_ASYNC_EVENTS 4 27 28 #define NVMET_ERROR_LOG_SLOTS 128 ··· 31 30 #define NVMET_MN_MAX_SIZE 40 32 31 #define NVMET_SN_MAX_SIZE 20 33 32 #define NVMET_FR_MAX_SIZE 8 33 + #define NVMET_PR_LOG_QUEUE_SIZE 64 34 34 35 35 /* 36 36 * Supported optional AENs: ··· 57 55 (cpu_to_le32((1 << 16) | (offsetof(struct nvmf_connect_data, x)))) 58 56 #define IPO_IATTR_CONNECT_SQE(x) \ 59 57 (cpu_to_le32(offsetof(struct nvmf_connect_command, x))) 58 + 59 + struct nvmet_pr_registrant { 60 + u64 rkey; 61 + uuid_t hostid; 62 + enum nvme_pr_type rtype; 63 + struct list_head entry; 64 + struct rcu_head rcu; 65 + }; 66 + 67 + struct nvmet_pr { 68 + bool enable; 69 + unsigned long notify_mask; 70 + atomic_t generation; 71 + struct nvmet_pr_registrant __rcu *holder; 72 + /* 73 + * During the execution of the reservation command, mutual 74 + * exclusion is required throughout the process. However, 75 + * while waiting asynchronously for the 'per controller 76 + * percpu_ref' to complete before the 'preempt and abort' 77 + * command finishes, a semaphore is needed to ensure mutual 78 + * exclusion instead of a mutex. 79 + */ 80 + struct semaphore pr_sem; 81 + struct list_head registrant_list; 82 + }; 83 + 84 + struct nvmet_pr_per_ctrl_ref { 85 + struct percpu_ref ref; 86 + struct completion free_done; 87 + struct completion confirm_done; 88 + uuid_t hostid; 89 + }; 60 90 61 91 struct nvmet_ns { 62 92 struct percpu_ref ref; ··· 119 85 int pi_type; 120 86 int metadata_size; 121 87 u8 csi; 88 + struct nvmet_pr pr; 89 + struct xarray pr_per_ctrl_refs; 122 90 }; 123 91 124 92 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) ··· 227 191 return nvmet_port_disc_addr_treq_secure_channel(port) == NVMF_TREQ_REQUIRED; 228 192 } 229 193 194 + struct nvmet_pr_log_mgr { 195 + struct mutex lock; 196 + u64 lost_count; 197 + u64 counter; 198 + DECLARE_KFIFO(log_queue, struct nvme_pr_log, NVMET_PR_LOG_QUEUE_SIZE); 199 + }; 200 + 230 201 struct nvmet_ctrl { 231 202 struct nvmet_subsys *subsys; 232 203 struct nvmet_sq **sqs; ··· 289 246 u8 *dh_key; 290 247 size_t dh_keysize; 291 248 #endif 249 + struct nvmet_pr_log_mgr pr_log_mgr; 292 250 }; 293 251 294 252 struct nvmet_subsys { ··· 440 396 struct work_struct zmgmt_work; 441 397 } z; 442 398 #endif /* CONFIG_BLK_DEV_ZONED */ 399 + struct { 400 + struct work_struct abort_work; 401 + } r; 443 402 }; 444 403 int sg_cnt; 445 404 int metadata_sg_cnt; ··· 459 412 struct device *p2p_client; 460 413 u16 error_loc; 461 414 u64 error_slba; 415 + struct nvmet_pr_per_ctrl_ref *pc_ref; 462 416 }; 463 417 464 418 #define NVMET_MAX_MPOOL_BVEC 16 ··· 546 498 547 499 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new); 548 500 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 549 - struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp); 501 + struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp, 502 + uuid_t *hostid); 550 503 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, 551 504 const char *hostnqn, u16 cntlid, 552 505 struct nvmet_req *req); ··· 810 761 static inline const char *nvmet_dhchap_dhgroup_name(u8 dhgid) { return NULL; } 811 762 #endif 812 763 764 + int nvmet_pr_init_ns(struct nvmet_ns *ns); 765 + u16 nvmet_parse_pr_cmd(struct nvmet_req *req); 766 + u16 nvmet_pr_check_cmd_access(struct nvmet_req *req); 767 + int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl); 768 + void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl); 769 + void nvmet_pr_exit_ns(struct nvmet_ns *ns); 770 + void nvmet_execute_get_log_page_resv(struct nvmet_req *req); 771 + u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask); 772 + u16 nvmet_get_feat_resv_notif_mask(struct nvmet_req *req); 773 + u16 nvmet_pr_get_ns_pc_ref(struct nvmet_req *req); 774 + static inline void nvmet_pr_put_ns_pc_ref(struct nvmet_pr_per_ctrl_ref *pc_ref) 775 + { 776 + percpu_ref_put(&pc_ref->ref); 777 + } 813 778 #endif /* _NVMET_H */
+1156
drivers/nvme/target/pr.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * NVMe over Fabrics Persist Reservation. 4 + * Copyright (c) 2024 Guixin Liu, Alibaba Group. 5 + * All rights reserved. 6 + */ 7 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 + #include <linux/unaligned.h> 9 + #include "nvmet.h" 10 + 11 + #define NVMET_PR_NOTIFI_MASK_ALL \ 12 + (1 << NVME_PR_NOTIFY_BIT_REG_PREEMPTED | \ 13 + 1 << NVME_PR_NOTIFY_BIT_RESV_RELEASED | \ 14 + 1 << NVME_PR_NOTIFY_BIT_RESV_PREEMPTED) 15 + 16 + static inline bool nvmet_pr_parse_ignore_key(u32 cdw10) 17 + { 18 + /* Ignore existing key, bit 03. */ 19 + return (cdw10 >> 3) & 1; 20 + } 21 + 22 + static inline struct nvmet_ns *nvmet_pr_to_ns(struct nvmet_pr *pr) 23 + { 24 + return container_of(pr, struct nvmet_ns, pr); 25 + } 26 + 27 + static struct nvmet_pr_registrant * 28 + nvmet_pr_find_registrant(struct nvmet_pr *pr, uuid_t *hostid) 29 + { 30 + struct nvmet_pr_registrant *reg; 31 + 32 + list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 33 + if (uuid_equal(&reg->hostid, hostid)) 34 + return reg; 35 + } 36 + return NULL; 37 + } 38 + 39 + u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask) 40 + { 41 + u32 nsid = le32_to_cpu(req->cmd->common.nsid); 42 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 43 + struct nvmet_ns *ns; 44 + unsigned long idx; 45 + u16 status; 46 + 47 + if (mask & ~(NVMET_PR_NOTIFI_MASK_ALL)) { 48 + req->error_loc = offsetof(struct nvme_common_command, cdw11); 49 + return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 50 + } 51 + 52 + if (nsid != U32_MAX) { 53 + status = nvmet_req_find_ns(req); 54 + if (status) 55 + return status; 56 + if (!req->ns->pr.enable) 57 + return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 58 + 59 + WRITE_ONCE(req->ns->pr.notify_mask, mask); 60 + goto success; 61 + } 62 + 63 + xa_for_each(&ctrl->subsys->namespaces, idx, ns) { 64 + if (ns->pr.enable) 65 + WRITE_ONCE(ns->pr.notify_mask, mask); 66 + } 67 + 68 + success: 69 + nvmet_set_result(req, mask); 70 + return NVME_SC_SUCCESS; 71 + } 72 + 73 + u16 nvmet_get_feat_resv_notif_mask(struct nvmet_req *req) 74 + { 75 + u16 status; 76 + 77 + status = nvmet_req_find_ns(req); 78 + if (status) 79 + return status; 80 + 81 + if (!req->ns->pr.enable) 82 + return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 83 + 84 + nvmet_set_result(req, READ_ONCE(req->ns->pr.notify_mask)); 85 + return status; 86 + } 87 + 88 + void nvmet_execute_get_log_page_resv(struct nvmet_req *req) 89 + { 90 + struct nvmet_pr_log_mgr *log_mgr = &req->sq->ctrl->pr_log_mgr; 91 + struct nvme_pr_log next_log = {0}; 92 + struct nvme_pr_log log = {0}; 93 + u16 status = NVME_SC_SUCCESS; 94 + u64 lost_count; 95 + u64 cur_count; 96 + u64 next_count; 97 + 98 + mutex_lock(&log_mgr->lock); 99 + if (!kfifo_get(&log_mgr->log_queue, &log)) 100 + goto out; 101 + 102 + /* 103 + * We can't get the last in kfifo. 104 + * Utilize the current count and the count from the next log to 105 + * calculate the number of lost logs, while also addressing cases 106 + * of overflow. If there is no subsequent log, the number of lost 107 + * logs is equal to the lost_count within the nvmet_pr_log_mgr. 108 + */ 109 + cur_count = le64_to_cpu(log.count); 110 + if (kfifo_peek(&log_mgr->log_queue, &next_log)) { 111 + next_count = le64_to_cpu(next_log.count); 112 + if (next_count > cur_count) 113 + lost_count = next_count - cur_count - 1; 114 + else 115 + lost_count = U64_MAX - cur_count + next_count - 1; 116 + } else { 117 + lost_count = log_mgr->lost_count; 118 + } 119 + 120 + log.count = cpu_to_le64((cur_count + lost_count) == 0 ? 121 + 1 : (cur_count + lost_count)); 122 + log_mgr->lost_count -= lost_count; 123 + 124 + log.nr_pages = kfifo_len(&log_mgr->log_queue); 125 + 126 + out: 127 + status = nvmet_copy_to_sgl(req, 0, &log, sizeof(log)); 128 + mutex_unlock(&log_mgr->lock); 129 + nvmet_req_complete(req, status); 130 + } 131 + 132 + static void nvmet_pr_add_resv_log(struct nvmet_ctrl *ctrl, u8 log_type, 133 + u32 nsid) 134 + { 135 + struct nvmet_pr_log_mgr *log_mgr = &ctrl->pr_log_mgr; 136 + struct nvme_pr_log log = {0}; 137 + 138 + mutex_lock(&log_mgr->lock); 139 + log_mgr->counter++; 140 + if (log_mgr->counter == 0) 141 + log_mgr->counter = 1; 142 + 143 + log.count = cpu_to_le64(log_mgr->counter); 144 + log.type = log_type; 145 + log.nsid = cpu_to_le32(nsid); 146 + 147 + if (!kfifo_put(&log_mgr->log_queue, log)) { 148 + pr_info("a reservation log lost, cntlid:%d, log_type:%d, nsid:%d\n", 149 + ctrl->cntlid, log_type, nsid); 150 + log_mgr->lost_count++; 151 + } 152 + 153 + mutex_unlock(&log_mgr->lock); 154 + } 155 + 156 + static void nvmet_pr_resv_released(struct nvmet_pr *pr, uuid_t *hostid) 157 + { 158 + struct nvmet_ns *ns = nvmet_pr_to_ns(pr); 159 + struct nvmet_subsys *subsys = ns->subsys; 160 + struct nvmet_ctrl *ctrl; 161 + 162 + if (test_bit(NVME_PR_NOTIFY_BIT_RESV_RELEASED, &pr->notify_mask)) 163 + return; 164 + 165 + mutex_lock(&subsys->lock); 166 + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 167 + if (!uuid_equal(&ctrl->hostid, hostid) && 168 + nvmet_pr_find_registrant(pr, &ctrl->hostid)) { 169 + nvmet_pr_add_resv_log(ctrl, 170 + NVME_PR_LOG_RESERVATION_RELEASED, ns->nsid); 171 + nvmet_add_async_event(ctrl, NVME_AER_CSS, 172 + NVME_AEN_RESV_LOG_PAGE_AVALIABLE, 173 + NVME_LOG_RESERVATION); 174 + } 175 + } 176 + mutex_unlock(&subsys->lock); 177 + } 178 + 179 + static void nvmet_pr_send_event_to_host(struct nvmet_pr *pr, uuid_t *hostid, 180 + u8 log_type) 181 + { 182 + struct nvmet_ns *ns = nvmet_pr_to_ns(pr); 183 + struct nvmet_subsys *subsys = ns->subsys; 184 + struct nvmet_ctrl *ctrl; 185 + 186 + mutex_lock(&subsys->lock); 187 + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 188 + if (uuid_equal(hostid, &ctrl->hostid)) { 189 + nvmet_pr_add_resv_log(ctrl, log_type, ns->nsid); 190 + nvmet_add_async_event(ctrl, NVME_AER_CSS, 191 + NVME_AEN_RESV_LOG_PAGE_AVALIABLE, 192 + NVME_LOG_RESERVATION); 193 + } 194 + } 195 + mutex_unlock(&subsys->lock); 196 + } 197 + 198 + static void nvmet_pr_resv_preempted(struct nvmet_pr *pr, uuid_t *hostid) 199 + { 200 + if (test_bit(NVME_PR_NOTIFY_BIT_RESV_PREEMPTED, &pr->notify_mask)) 201 + return; 202 + 203 + nvmet_pr_send_event_to_host(pr, hostid, 204 + NVME_PR_LOG_RESERVATOIN_PREEMPTED); 205 + } 206 + 207 + static void nvmet_pr_registration_preempted(struct nvmet_pr *pr, 208 + uuid_t *hostid) 209 + { 210 + if (test_bit(NVME_PR_NOTIFY_BIT_REG_PREEMPTED, &pr->notify_mask)) 211 + return; 212 + 213 + nvmet_pr_send_event_to_host(pr, hostid, 214 + NVME_PR_LOG_REGISTRATION_PREEMPTED); 215 + } 216 + 217 + static inline void nvmet_pr_set_new_holder(struct nvmet_pr *pr, u8 new_rtype, 218 + struct nvmet_pr_registrant *reg) 219 + { 220 + reg->rtype = new_rtype; 221 + rcu_assign_pointer(pr->holder, reg); 222 + } 223 + 224 + static u16 nvmet_pr_register(struct nvmet_req *req, 225 + struct nvmet_pr_register_data *d) 226 + { 227 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 228 + struct nvmet_pr_registrant *new, *reg; 229 + struct nvmet_pr *pr = &req->ns->pr; 230 + u16 status = NVME_SC_SUCCESS; 231 + u64 nrkey = le64_to_cpu(d->nrkey); 232 + 233 + new = kmalloc(sizeof(*new), GFP_KERNEL); 234 + if (!new) 235 + return NVME_SC_INTERNAL; 236 + 237 + down(&pr->pr_sem); 238 + reg = nvmet_pr_find_registrant(pr, &ctrl->hostid); 239 + if (reg) { 240 + if (reg->rkey != nrkey) 241 + status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 242 + kfree(new); 243 + goto out; 244 + } 245 + 246 + memset(new, 0, sizeof(*new)); 247 + INIT_LIST_HEAD(&new->entry); 248 + new->rkey = nrkey; 249 + uuid_copy(&new->hostid, &ctrl->hostid); 250 + list_add_tail_rcu(&new->entry, &pr->registrant_list); 251 + 252 + out: 253 + up(&pr->pr_sem); 254 + return status; 255 + } 256 + 257 + static void nvmet_pr_unregister_one(struct nvmet_pr *pr, 258 + struct nvmet_pr_registrant *reg) 259 + { 260 + struct nvmet_pr_registrant *first_reg; 261 + struct nvmet_pr_registrant *holder; 262 + u8 original_rtype; 263 + 264 + list_del_rcu(&reg->entry); 265 + 266 + holder = rcu_dereference_protected(pr->holder, 1); 267 + if (reg != holder) 268 + goto out; 269 + 270 + original_rtype = holder->rtype; 271 + if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || 272 + original_rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { 273 + first_reg = list_first_or_null_rcu(&pr->registrant_list, 274 + struct nvmet_pr_registrant, entry); 275 + if (first_reg) 276 + first_reg->rtype = original_rtype; 277 + rcu_assign_pointer(pr->holder, first_reg); 278 + } else { 279 + rcu_assign_pointer(pr->holder, NULL); 280 + 281 + if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_REG_ONLY || 282 + original_rtype == NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY) 283 + nvmet_pr_resv_released(pr, &reg->hostid); 284 + } 285 + out: 286 + kfree_rcu(reg, rcu); 287 + } 288 + 289 + static u16 nvmet_pr_unregister(struct nvmet_req *req, 290 + struct nvmet_pr_register_data *d, 291 + bool ignore_key) 292 + { 293 + u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 294 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 295 + struct nvmet_pr *pr = &req->ns->pr; 296 + struct nvmet_pr_registrant *reg; 297 + 298 + down(&pr->pr_sem); 299 + list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 300 + if (uuid_equal(&reg->hostid, &ctrl->hostid)) { 301 + if (ignore_key || reg->rkey == le64_to_cpu(d->crkey)) { 302 + status = NVME_SC_SUCCESS; 303 + nvmet_pr_unregister_one(pr, reg); 304 + } 305 + break; 306 + } 307 + } 308 + up(&pr->pr_sem); 309 + 310 + return status; 311 + } 312 + 313 + static void nvmet_pr_update_reg_rkey(struct nvmet_pr_registrant *reg, 314 + void *attr) 315 + { 316 + reg->rkey = *(u64 *)attr; 317 + } 318 + 319 + static u16 nvmet_pr_update_reg_attr(struct nvmet_pr *pr, 320 + struct nvmet_pr_registrant *reg, 321 + void (*change_attr)(struct nvmet_pr_registrant *reg, 322 + void *attr), 323 + void *attr) 324 + { 325 + struct nvmet_pr_registrant *holder; 326 + struct nvmet_pr_registrant *new; 327 + 328 + holder = rcu_dereference_protected(pr->holder, 1); 329 + if (reg != holder) { 330 + change_attr(reg, attr); 331 + return NVME_SC_SUCCESS; 332 + } 333 + 334 + new = kmalloc(sizeof(*new), GFP_ATOMIC); 335 + if (!new) 336 + return NVME_SC_INTERNAL; 337 + 338 + new->rkey = holder->rkey; 339 + new->rtype = holder->rtype; 340 + uuid_copy(&new->hostid, &holder->hostid); 341 + INIT_LIST_HEAD(&new->entry); 342 + 343 + change_attr(new, attr); 344 + list_replace_rcu(&holder->entry, &new->entry); 345 + rcu_assign_pointer(pr->holder, new); 346 + kfree_rcu(holder, rcu); 347 + 348 + return NVME_SC_SUCCESS; 349 + } 350 + 351 + static u16 nvmet_pr_replace(struct nvmet_req *req, 352 + struct nvmet_pr_register_data *d, 353 + bool ignore_key) 354 + { 355 + u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 356 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 357 + struct nvmet_pr *pr = &req->ns->pr; 358 + struct nvmet_pr_registrant *reg; 359 + u64 nrkey = le64_to_cpu(d->nrkey); 360 + 361 + down(&pr->pr_sem); 362 + list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 363 + if (uuid_equal(&reg->hostid, &ctrl->hostid)) { 364 + if (ignore_key || reg->rkey == le64_to_cpu(d->crkey)) 365 + status = nvmet_pr_update_reg_attr(pr, reg, 366 + nvmet_pr_update_reg_rkey, 367 + &nrkey); 368 + break; 369 + } 370 + } 371 + up(&pr->pr_sem); 372 + return status; 373 + } 374 + 375 + static void nvmet_execute_pr_register(struct nvmet_req *req) 376 + { 377 + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 378 + bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); 379 + struct nvmet_pr_register_data *d; 380 + u8 reg_act = cdw10 & 0x07; /* Reservation Register Action, bit 02:00 */ 381 + u16 status; 382 + 383 + d = kmalloc(sizeof(*d), GFP_KERNEL); 384 + if (!d) { 385 + status = NVME_SC_INTERNAL; 386 + goto out; 387 + } 388 + 389 + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); 390 + if (status) 391 + goto free_data; 392 + 393 + switch (reg_act) { 394 + case NVME_PR_REGISTER_ACT_REG: 395 + status = nvmet_pr_register(req, d); 396 + break; 397 + case NVME_PR_REGISTER_ACT_UNREG: 398 + status = nvmet_pr_unregister(req, d, ignore_key); 399 + break; 400 + case NVME_PR_REGISTER_ACT_REPLACE: 401 + status = nvmet_pr_replace(req, d, ignore_key); 402 + break; 403 + default: 404 + req->error_loc = offsetof(struct nvme_common_command, cdw10); 405 + status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 406 + break; 407 + } 408 + free_data: 409 + kfree(d); 410 + out: 411 + if (!status) 412 + atomic_inc(&req->ns->pr.generation); 413 + nvmet_req_complete(req, status); 414 + } 415 + 416 + static u16 nvmet_pr_acquire(struct nvmet_req *req, 417 + struct nvmet_pr_registrant *reg, 418 + u8 rtype) 419 + { 420 + struct nvmet_pr *pr = &req->ns->pr; 421 + struct nvmet_pr_registrant *holder; 422 + 423 + holder = rcu_dereference_protected(pr->holder, 1); 424 + if (holder && reg != holder) 425 + return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 426 + if (holder && reg == holder) { 427 + if (holder->rtype == rtype) 428 + return NVME_SC_SUCCESS; 429 + return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 430 + } 431 + 432 + nvmet_pr_set_new_holder(pr, rtype, reg); 433 + return NVME_SC_SUCCESS; 434 + } 435 + 436 + static void nvmet_pr_confirm_ns_pc_ref(struct percpu_ref *ref) 437 + { 438 + struct nvmet_pr_per_ctrl_ref *pc_ref = 439 + container_of(ref, struct nvmet_pr_per_ctrl_ref, ref); 440 + 441 + complete(&pc_ref->confirm_done); 442 + } 443 + 444 + static void nvmet_pr_set_ctrl_to_abort(struct nvmet_req *req, uuid_t *hostid) 445 + { 446 + struct nvmet_pr_per_ctrl_ref *pc_ref; 447 + struct nvmet_ns *ns = req->ns; 448 + unsigned long idx; 449 + 450 + xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 451 + if (uuid_equal(&pc_ref->hostid, hostid)) { 452 + percpu_ref_kill_and_confirm(&pc_ref->ref, 453 + nvmet_pr_confirm_ns_pc_ref); 454 + wait_for_completion(&pc_ref->confirm_done); 455 + } 456 + } 457 + } 458 + 459 + static u16 nvmet_pr_unreg_all_host_by_prkey(struct nvmet_req *req, u64 prkey, 460 + uuid_t *send_hostid, 461 + bool abort) 462 + { 463 + u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 464 + struct nvmet_pr_registrant *reg, *tmp; 465 + struct nvmet_pr *pr = &req->ns->pr; 466 + uuid_t hostid; 467 + 468 + list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 469 + if (reg->rkey == prkey) { 470 + status = NVME_SC_SUCCESS; 471 + uuid_copy(&hostid, &reg->hostid); 472 + if (abort) 473 + nvmet_pr_set_ctrl_to_abort(req, &hostid); 474 + nvmet_pr_unregister_one(pr, reg); 475 + if (!uuid_equal(&hostid, send_hostid)) 476 + nvmet_pr_registration_preempted(pr, &hostid); 477 + } 478 + } 479 + return status; 480 + } 481 + 482 + static void nvmet_pr_unreg_all_others_by_prkey(struct nvmet_req *req, 483 + u64 prkey, 484 + uuid_t *send_hostid, 485 + bool abort) 486 + { 487 + struct nvmet_pr_registrant *reg, *tmp; 488 + struct nvmet_pr *pr = &req->ns->pr; 489 + uuid_t hostid; 490 + 491 + list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 492 + if (reg->rkey == prkey && 493 + !uuid_equal(&reg->hostid, send_hostid)) { 494 + uuid_copy(&hostid, &reg->hostid); 495 + if (abort) 496 + nvmet_pr_set_ctrl_to_abort(req, &hostid); 497 + nvmet_pr_unregister_one(pr, reg); 498 + nvmet_pr_registration_preempted(pr, &hostid); 499 + } 500 + } 501 + } 502 + 503 + static void nvmet_pr_unreg_all_others(struct nvmet_req *req, 504 + uuid_t *send_hostid, 505 + bool abort) 506 + { 507 + struct nvmet_pr_registrant *reg, *tmp; 508 + struct nvmet_pr *pr = &req->ns->pr; 509 + uuid_t hostid; 510 + 511 + list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 512 + if (!uuid_equal(&reg->hostid, send_hostid)) { 513 + uuid_copy(&hostid, &reg->hostid); 514 + if (abort) 515 + nvmet_pr_set_ctrl_to_abort(req, &hostid); 516 + nvmet_pr_unregister_one(pr, reg); 517 + nvmet_pr_registration_preempted(pr, &hostid); 518 + } 519 + } 520 + } 521 + 522 + static void nvmet_pr_update_holder_rtype(struct nvmet_pr_registrant *reg, 523 + void *attr) 524 + { 525 + u8 new_rtype = *(u8 *)attr; 526 + 527 + reg->rtype = new_rtype; 528 + } 529 + 530 + static u16 nvmet_pr_preempt(struct nvmet_req *req, 531 + struct nvmet_pr_registrant *reg, 532 + u8 rtype, 533 + struct nvmet_pr_acquire_data *d, 534 + bool abort) 535 + { 536 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 537 + struct nvmet_pr *pr = &req->ns->pr; 538 + struct nvmet_pr_registrant *holder; 539 + enum nvme_pr_type original_rtype; 540 + u64 prkey = le64_to_cpu(d->prkey); 541 + u16 status; 542 + 543 + holder = rcu_dereference_protected(pr->holder, 1); 544 + if (!holder) 545 + return nvmet_pr_unreg_all_host_by_prkey(req, prkey, 546 + &ctrl->hostid, abort); 547 + 548 + original_rtype = holder->rtype; 549 + if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || 550 + original_rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { 551 + if (!prkey) { 552 + /* 553 + * To prevent possible access from other hosts, and 554 + * avoid terminate the holder, set the new holder 555 + * first before unregistering. 556 + */ 557 + nvmet_pr_set_new_holder(pr, rtype, reg); 558 + nvmet_pr_unreg_all_others(req, &ctrl->hostid, abort); 559 + return NVME_SC_SUCCESS; 560 + } 561 + return nvmet_pr_unreg_all_host_by_prkey(req, prkey, 562 + &ctrl->hostid, abort); 563 + } 564 + 565 + if (holder == reg) { 566 + status = nvmet_pr_update_reg_attr(pr, holder, 567 + nvmet_pr_update_holder_rtype, &rtype); 568 + if (!status && original_rtype != rtype) 569 + nvmet_pr_resv_released(pr, &reg->hostid); 570 + return status; 571 + } 572 + 573 + if (prkey == holder->rkey) { 574 + /* 575 + * Same as before, set the new holder first. 576 + */ 577 + nvmet_pr_set_new_holder(pr, rtype, reg); 578 + nvmet_pr_unreg_all_others_by_prkey(req, prkey, &ctrl->hostid, 579 + abort); 580 + if (original_rtype != rtype) 581 + nvmet_pr_resv_released(pr, &reg->hostid); 582 + return NVME_SC_SUCCESS; 583 + } 584 + 585 + if (prkey) 586 + return nvmet_pr_unreg_all_host_by_prkey(req, prkey, 587 + &ctrl->hostid, abort); 588 + return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 589 + } 590 + 591 + static void nvmet_pr_do_abort(struct work_struct *w) 592 + { 593 + struct nvmet_req *req = container_of(w, struct nvmet_req, r.abort_work); 594 + struct nvmet_pr_per_ctrl_ref *pc_ref; 595 + struct nvmet_ns *ns = req->ns; 596 + unsigned long idx; 597 + 598 + /* 599 + * The target does not support abort, just wait per-controller ref to 0. 600 + */ 601 + xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 602 + if (percpu_ref_is_dying(&pc_ref->ref)) { 603 + wait_for_completion(&pc_ref->free_done); 604 + reinit_completion(&pc_ref->confirm_done); 605 + reinit_completion(&pc_ref->free_done); 606 + percpu_ref_resurrect(&pc_ref->ref); 607 + } 608 + } 609 + 610 + up(&ns->pr.pr_sem); 611 + nvmet_req_complete(req, NVME_SC_SUCCESS); 612 + } 613 + 614 + static u16 __nvmet_execute_pr_acquire(struct nvmet_req *req, 615 + struct nvmet_pr_registrant *reg, 616 + u8 acquire_act, 617 + u8 rtype, 618 + struct nvmet_pr_acquire_data *d) 619 + { 620 + u16 status; 621 + 622 + switch (acquire_act) { 623 + case NVME_PR_ACQUIRE_ACT_ACQUIRE: 624 + status = nvmet_pr_acquire(req, reg, rtype); 625 + goto out; 626 + case NVME_PR_ACQUIRE_ACT_PREEMPT: 627 + status = nvmet_pr_preempt(req, reg, rtype, d, false); 628 + goto inc_gen; 629 + case NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT: 630 + status = nvmet_pr_preempt(req, reg, rtype, d, true); 631 + goto inc_gen; 632 + default: 633 + req->error_loc = offsetof(struct nvme_common_command, cdw10); 634 + status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 635 + goto out; 636 + } 637 + inc_gen: 638 + if (!status) 639 + atomic_inc(&req->ns->pr.generation); 640 + out: 641 + return status; 642 + } 643 + 644 + static void nvmet_execute_pr_acquire(struct nvmet_req *req) 645 + { 646 + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 647 + bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); 648 + /* Reservation type, bit 15:08 */ 649 + u8 rtype = (u8)((cdw10 >> 8) & 0xff); 650 + /* Reservation acquire action, bit 02:00 */ 651 + u8 acquire_act = cdw10 & 0x07; 652 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 653 + struct nvmet_pr_acquire_data *d = NULL; 654 + struct nvmet_pr *pr = &req->ns->pr; 655 + struct nvmet_pr_registrant *reg; 656 + u16 status = NVME_SC_SUCCESS; 657 + 658 + if (ignore_key || 659 + rtype < NVME_PR_WRITE_EXCLUSIVE || 660 + rtype > NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { 661 + status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 662 + goto out; 663 + } 664 + 665 + d = kmalloc(sizeof(*d), GFP_KERNEL); 666 + if (!d) { 667 + status = NVME_SC_INTERNAL; 668 + goto out; 669 + } 670 + 671 + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); 672 + if (status) 673 + goto free_data; 674 + 675 + status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 676 + down(&pr->pr_sem); 677 + list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 678 + if (uuid_equal(&reg->hostid, &ctrl->hostid) && 679 + reg->rkey == le64_to_cpu(d->crkey)) { 680 + status = __nvmet_execute_pr_acquire(req, reg, 681 + acquire_act, rtype, d); 682 + break; 683 + } 684 + } 685 + 686 + if (!status && acquire_act == NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT) { 687 + kfree(d); 688 + INIT_WORK(&req->r.abort_work, nvmet_pr_do_abort); 689 + queue_work(nvmet_wq, &req->r.abort_work); 690 + return; 691 + } 692 + 693 + up(&pr->pr_sem); 694 + 695 + free_data: 696 + kfree(d); 697 + out: 698 + nvmet_req_complete(req, status); 699 + } 700 + 701 + static u16 nvmet_pr_release(struct nvmet_req *req, 702 + struct nvmet_pr_registrant *reg, 703 + u8 rtype) 704 + { 705 + struct nvmet_pr *pr = &req->ns->pr; 706 + struct nvmet_pr_registrant *holder; 707 + u8 original_rtype; 708 + 709 + holder = rcu_dereference_protected(pr->holder, 1); 710 + if (!holder || reg != holder) 711 + return NVME_SC_SUCCESS; 712 + 713 + original_rtype = holder->rtype; 714 + if (original_rtype != rtype) 715 + return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 716 + 717 + rcu_assign_pointer(pr->holder, NULL); 718 + 719 + if (original_rtype != NVME_PR_WRITE_EXCLUSIVE && 720 + original_rtype != NVME_PR_EXCLUSIVE_ACCESS) 721 + nvmet_pr_resv_released(pr, &reg->hostid); 722 + 723 + return NVME_SC_SUCCESS; 724 + } 725 + 726 + static void nvmet_pr_clear(struct nvmet_req *req) 727 + { 728 + struct nvmet_pr_registrant *reg, *tmp; 729 + struct nvmet_pr *pr = &req->ns->pr; 730 + 731 + rcu_assign_pointer(pr->holder, NULL); 732 + 733 + list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 734 + list_del_rcu(&reg->entry); 735 + if (!uuid_equal(&req->sq->ctrl->hostid, &reg->hostid)) 736 + nvmet_pr_resv_preempted(pr, &reg->hostid); 737 + kfree_rcu(reg, rcu); 738 + } 739 + 740 + atomic_inc(&pr->generation); 741 + } 742 + 743 + static u16 __nvmet_execute_pr_release(struct nvmet_req *req, 744 + struct nvmet_pr_registrant *reg, 745 + u8 release_act, u8 rtype) 746 + { 747 + switch (release_act) { 748 + case NVME_PR_RELEASE_ACT_RELEASE: 749 + return nvmet_pr_release(req, reg, rtype); 750 + case NVME_PR_RELEASE_ACT_CLEAR: 751 + nvmet_pr_clear(req); 752 + return NVME_SC_SUCCESS; 753 + default: 754 + req->error_loc = offsetof(struct nvme_common_command, cdw10); 755 + return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 756 + } 757 + } 758 + 759 + static void nvmet_execute_pr_release(struct nvmet_req *req) 760 + { 761 + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 762 + bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); 763 + u8 rtype = (u8)((cdw10 >> 8) & 0xff); /* Reservation type, bit 15:08 */ 764 + u8 release_act = cdw10 & 0x07; /* Reservation release action, bit 02:00 */ 765 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 766 + struct nvmet_pr *pr = &req->ns->pr; 767 + struct nvmet_pr_release_data *d; 768 + struct nvmet_pr_registrant *reg; 769 + u16 status; 770 + 771 + if (ignore_key) { 772 + status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 773 + goto out; 774 + } 775 + 776 + d = kmalloc(sizeof(*d), GFP_KERNEL); 777 + if (!d) { 778 + status = NVME_SC_INTERNAL; 779 + goto out; 780 + } 781 + 782 + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); 783 + if (status) 784 + goto free_data; 785 + 786 + status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 787 + down(&pr->pr_sem); 788 + list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 789 + if (uuid_equal(&reg->hostid, &ctrl->hostid) && 790 + reg->rkey == le64_to_cpu(d->crkey)) { 791 + status = __nvmet_execute_pr_release(req, reg, 792 + release_act, rtype); 793 + break; 794 + } 795 + } 796 + up(&pr->pr_sem); 797 + free_data: 798 + kfree(d); 799 + out: 800 + nvmet_req_complete(req, status); 801 + } 802 + 803 + static void nvmet_execute_pr_report(struct nvmet_req *req) 804 + { 805 + u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11); 806 + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 807 + u32 num_bytes = 4 * (cdw10 + 1); /* cdw10 is number of dwords */ 808 + u8 eds = cdw11 & 1; /* Extended data structure, bit 00 */ 809 + struct nvme_registered_ctrl_ext *ctrl_eds; 810 + struct nvme_reservation_status_ext *data; 811 + struct nvmet_pr *pr = &req->ns->pr; 812 + struct nvmet_pr_registrant *holder; 813 + struct nvmet_pr_registrant *reg; 814 + u16 num_ctrls = 0; 815 + u16 status; 816 + u8 rtype; 817 + 818 + /* nvmet hostid(uuid_t) is 128 bit. */ 819 + if (!eds) { 820 + req->error_loc = offsetof(struct nvme_common_command, cdw11); 821 + status = NVME_SC_HOST_ID_INCONSIST | NVME_STATUS_DNR; 822 + goto out; 823 + } 824 + 825 + if (num_bytes < sizeof(struct nvme_reservation_status_ext)) { 826 + req->error_loc = offsetof(struct nvme_common_command, cdw10); 827 + status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 828 + goto out; 829 + } 830 + 831 + data = kmalloc(num_bytes, GFP_KERNEL); 832 + if (!data) { 833 + status = NVME_SC_INTERNAL; 834 + goto out; 835 + } 836 + memset(data, 0, num_bytes); 837 + data->gen = cpu_to_le32(atomic_read(&pr->generation)); 838 + data->ptpls = 0; 839 + ctrl_eds = data->regctl_eds; 840 + 841 + rcu_read_lock(); 842 + holder = rcu_dereference(pr->holder); 843 + rtype = holder ? holder->rtype : 0; 844 + data->rtype = rtype; 845 + 846 + list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 847 + num_ctrls++; 848 + /* 849 + * continue to get the number of all registrans. 850 + */ 851 + if (((void *)ctrl_eds + sizeof(*ctrl_eds)) > 852 + ((void *)data + num_bytes)) 853 + continue; 854 + /* 855 + * Dynamic controller, set cntlid to 0xffff. 856 + */ 857 + ctrl_eds->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC); 858 + if (rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || 859 + rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) 860 + ctrl_eds->rcsts = 1; 861 + if (reg == holder) 862 + ctrl_eds->rcsts = 1; 863 + uuid_copy((uuid_t *)&ctrl_eds->hostid, &reg->hostid); 864 + ctrl_eds->rkey = cpu_to_le64(reg->rkey); 865 + ctrl_eds++; 866 + } 867 + rcu_read_unlock(); 868 + 869 + put_unaligned_le16(num_ctrls, data->regctl); 870 + status = nvmet_copy_to_sgl(req, 0, data, num_bytes); 871 + kfree(data); 872 + out: 873 + nvmet_req_complete(req, status); 874 + } 875 + 876 + u16 nvmet_parse_pr_cmd(struct nvmet_req *req) 877 + { 878 + struct nvme_command *cmd = req->cmd; 879 + 880 + switch (cmd->common.opcode) { 881 + case nvme_cmd_resv_register: 882 + req->execute = nvmet_execute_pr_register; 883 + break; 884 + case nvme_cmd_resv_acquire: 885 + req->execute = nvmet_execute_pr_acquire; 886 + break; 887 + case nvme_cmd_resv_release: 888 + req->execute = nvmet_execute_pr_release; 889 + break; 890 + case nvme_cmd_resv_report: 891 + req->execute = nvmet_execute_pr_report; 892 + break; 893 + default: 894 + return 1; 895 + } 896 + return NVME_SC_SUCCESS; 897 + } 898 + 899 + static bool nvmet_is_req_write_cmd_group(struct nvmet_req *req) 900 + { 901 + u8 opcode = req->cmd->common.opcode; 902 + 903 + if (req->sq->qid) { 904 + switch (opcode) { 905 + case nvme_cmd_flush: 906 + case nvme_cmd_write: 907 + case nvme_cmd_write_zeroes: 908 + case nvme_cmd_dsm: 909 + case nvme_cmd_zone_append: 910 + case nvme_cmd_zone_mgmt_send: 911 + return true; 912 + default: 913 + return false; 914 + } 915 + } 916 + return false; 917 + } 918 + 919 + static bool nvmet_is_req_read_cmd_group(struct nvmet_req *req) 920 + { 921 + u8 opcode = req->cmd->common.opcode; 922 + 923 + if (req->sq->qid) { 924 + switch (opcode) { 925 + case nvme_cmd_read: 926 + case nvme_cmd_zone_mgmt_recv: 927 + return true; 928 + default: 929 + return false; 930 + } 931 + } 932 + return false; 933 + } 934 + 935 + u16 nvmet_pr_check_cmd_access(struct nvmet_req *req) 936 + { 937 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 938 + struct nvmet_pr_registrant *holder; 939 + struct nvmet_ns *ns = req->ns; 940 + struct nvmet_pr *pr = &ns->pr; 941 + u16 status = NVME_SC_SUCCESS; 942 + 943 + rcu_read_lock(); 944 + holder = rcu_dereference(pr->holder); 945 + if (!holder) 946 + goto unlock; 947 + if (uuid_equal(&ctrl->hostid, &holder->hostid)) 948 + goto unlock; 949 + 950 + /* 951 + * The Reservation command group is checked in executing, 952 + * allow it here. 953 + */ 954 + switch (holder->rtype) { 955 + case NVME_PR_WRITE_EXCLUSIVE: 956 + if (nvmet_is_req_write_cmd_group(req)) 957 + status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 958 + break; 959 + case NVME_PR_EXCLUSIVE_ACCESS: 960 + if (nvmet_is_req_read_cmd_group(req) || 961 + nvmet_is_req_write_cmd_group(req)) 962 + status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 963 + break; 964 + case NVME_PR_WRITE_EXCLUSIVE_REG_ONLY: 965 + case NVME_PR_WRITE_EXCLUSIVE_ALL_REGS: 966 + if ((nvmet_is_req_write_cmd_group(req)) && 967 + !nvmet_pr_find_registrant(pr, &ctrl->hostid)) 968 + status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 969 + break; 970 + case NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY: 971 + case NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS: 972 + if ((nvmet_is_req_read_cmd_group(req) || 973 + nvmet_is_req_write_cmd_group(req)) && 974 + !nvmet_pr_find_registrant(pr, &ctrl->hostid)) 975 + status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 976 + break; 977 + default: 978 + pr_warn("the reservation type is set wrong, type:%d\n", 979 + holder->rtype); 980 + break; 981 + } 982 + 983 + unlock: 984 + rcu_read_unlock(); 985 + if (status) 986 + req->error_loc = offsetof(struct nvme_common_command, opcode); 987 + return status; 988 + } 989 + 990 + u16 nvmet_pr_get_ns_pc_ref(struct nvmet_req *req) 991 + { 992 + struct nvmet_pr_per_ctrl_ref *pc_ref; 993 + 994 + pc_ref = xa_load(&req->ns->pr_per_ctrl_refs, 995 + req->sq->ctrl->cntlid); 996 + if (unlikely(!percpu_ref_tryget_live(&pc_ref->ref))) 997 + return NVME_SC_INTERNAL; 998 + req->pc_ref = pc_ref; 999 + return NVME_SC_SUCCESS; 1000 + } 1001 + 1002 + static void nvmet_pr_ctrl_ns_all_cmds_done(struct percpu_ref *ref) 1003 + { 1004 + struct nvmet_pr_per_ctrl_ref *pc_ref = 1005 + container_of(ref, struct nvmet_pr_per_ctrl_ref, ref); 1006 + 1007 + complete(&pc_ref->free_done); 1008 + } 1009 + 1010 + static int nvmet_pr_alloc_and_insert_pc_ref(struct nvmet_ns *ns, 1011 + unsigned long idx, 1012 + uuid_t *hostid) 1013 + { 1014 + struct nvmet_pr_per_ctrl_ref *pc_ref; 1015 + int ret; 1016 + 1017 + pc_ref = kmalloc(sizeof(*pc_ref), GFP_ATOMIC); 1018 + if (!pc_ref) 1019 + return -ENOMEM; 1020 + 1021 + ret = percpu_ref_init(&pc_ref->ref, nvmet_pr_ctrl_ns_all_cmds_done, 1022 + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); 1023 + if (ret) 1024 + goto free; 1025 + 1026 + init_completion(&pc_ref->free_done); 1027 + init_completion(&pc_ref->confirm_done); 1028 + uuid_copy(&pc_ref->hostid, hostid); 1029 + 1030 + ret = xa_insert(&ns->pr_per_ctrl_refs, idx, pc_ref, GFP_KERNEL); 1031 + if (ret) 1032 + goto exit; 1033 + return ret; 1034 + exit: 1035 + percpu_ref_exit(&pc_ref->ref); 1036 + free: 1037 + kfree(pc_ref); 1038 + return ret; 1039 + } 1040 + 1041 + int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl) 1042 + { 1043 + struct nvmet_subsys *subsys = ctrl->subsys; 1044 + struct nvmet_pr_per_ctrl_ref *pc_ref; 1045 + struct nvmet_ns *ns = NULL; 1046 + unsigned long idx; 1047 + int ret; 1048 + 1049 + ctrl->pr_log_mgr.counter = 0; 1050 + ctrl->pr_log_mgr.lost_count = 0; 1051 + mutex_init(&ctrl->pr_log_mgr.lock); 1052 + INIT_KFIFO(ctrl->pr_log_mgr.log_queue); 1053 + 1054 + /* 1055 + * Here we are under subsys lock, if an ns not in subsys->namespaces, 1056 + * we can make sure that ns is not enabled, and not call 1057 + * nvmet_pr_init_ns(), see more details in nvmet_ns_enable(). 1058 + * So just check ns->pr.enable. 1059 + */ 1060 + xa_for_each(&subsys->namespaces, idx, ns) { 1061 + if (ns->pr.enable) { 1062 + ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid, 1063 + &ctrl->hostid); 1064 + if (ret) 1065 + goto free_per_ctrl_refs; 1066 + } 1067 + } 1068 + return 0; 1069 + 1070 + free_per_ctrl_refs: 1071 + xa_for_each(&subsys->namespaces, idx, ns) { 1072 + if (ns->pr.enable) { 1073 + pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid); 1074 + if (pc_ref) 1075 + percpu_ref_exit(&pc_ref->ref); 1076 + kfree(pc_ref); 1077 + } 1078 + } 1079 + return ret; 1080 + } 1081 + 1082 + void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl) 1083 + { 1084 + struct nvmet_pr_per_ctrl_ref *pc_ref; 1085 + struct nvmet_ns *ns; 1086 + unsigned long idx; 1087 + 1088 + kfifo_free(&ctrl->pr_log_mgr.log_queue); 1089 + mutex_destroy(&ctrl->pr_log_mgr.lock); 1090 + 1091 + xa_for_each(&ctrl->subsys->namespaces, idx, ns) { 1092 + if (ns->pr.enable) { 1093 + pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid); 1094 + if (pc_ref) 1095 + percpu_ref_exit(&pc_ref->ref); 1096 + kfree(pc_ref); 1097 + } 1098 + } 1099 + } 1100 + 1101 + int nvmet_pr_init_ns(struct nvmet_ns *ns) 1102 + { 1103 + struct nvmet_subsys *subsys = ns->subsys; 1104 + struct nvmet_pr_per_ctrl_ref *pc_ref; 1105 + struct nvmet_ctrl *ctrl = NULL; 1106 + unsigned long idx; 1107 + int ret; 1108 + 1109 + ns->pr.holder = NULL; 1110 + atomic_set(&ns->pr.generation, 0); 1111 + sema_init(&ns->pr.pr_sem, 1); 1112 + INIT_LIST_HEAD(&ns->pr.registrant_list); 1113 + ns->pr.notify_mask = 0; 1114 + 1115 + xa_init(&ns->pr_per_ctrl_refs); 1116 + 1117 + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1118 + ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid, 1119 + &ctrl->hostid); 1120 + if (ret) 1121 + goto free_per_ctrl_refs; 1122 + } 1123 + return 0; 1124 + 1125 + free_per_ctrl_refs: 1126 + xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 1127 + xa_erase(&ns->pr_per_ctrl_refs, idx); 1128 + percpu_ref_exit(&pc_ref->ref); 1129 + kfree(pc_ref); 1130 + } 1131 + return ret; 1132 + } 1133 + 1134 + void nvmet_pr_exit_ns(struct nvmet_ns *ns) 1135 + { 1136 + struct nvmet_pr_registrant *reg, *tmp; 1137 + struct nvmet_pr_per_ctrl_ref *pc_ref; 1138 + struct nvmet_pr *pr = &ns->pr; 1139 + unsigned long idx; 1140 + 1141 + list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 1142 + list_del(&reg->entry); 1143 + kfree(reg); 1144 + } 1145 + 1146 + xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 1147 + /* 1148 + * No command on ns here, we can safely free pc_ref. 1149 + */ 1150 + pc_ref = xa_erase(&ns->pr_per_ctrl_refs, idx); 1151 + percpu_ref_exit(&pc_ref->ref); 1152 + kfree(pc_ref); 1153 + } 1154 + 1155 + xa_destroy(&ns->pr_per_ctrl_refs); 1156 + }
+108
drivers/nvme/target/trace.c
··· 180 180 return ret; 181 181 } 182 182 183 + static const char *nvmet_trace_resv_reg(struct trace_seq *p, u8 *cdw10) 184 + { 185 + static const char * const rrega_strs[] = { 186 + [0x00] = "register", 187 + [0x01] = "unregister", 188 + [0x02] = "replace", 189 + }; 190 + const char *ret = trace_seq_buffer_ptr(p); 191 + u8 rrega = cdw10[0] & 0x7; 192 + u8 iekey = (cdw10[0] >> 3) & 0x1; 193 + u8 ptpl = (cdw10[3] >> 6) & 0x3; 194 + const char *rrega_str; 195 + 196 + if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega]) 197 + rrega_str = rrega_strs[rrega]; 198 + else 199 + rrega_str = "reserved"; 200 + 201 + trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u", 202 + rrega, rrega_str, iekey, ptpl); 203 + trace_seq_putc(p, 0); 204 + 205 + return ret; 206 + } 207 + 208 + static const char * const rtype_strs[] = { 209 + [0x00] = "reserved", 210 + [0x01] = "write exclusive", 211 + [0x02] = "exclusive access", 212 + [0x03] = "write exclusive registrants only", 213 + [0x04] = "exclusive access registrants only", 214 + [0x05] = "write exclusive all registrants", 215 + [0x06] = "exclusive access all registrants", 216 + }; 217 + 218 + static const char *nvmet_trace_resv_acq(struct trace_seq *p, u8 *cdw10) 219 + { 220 + static const char * const racqa_strs[] = { 221 + [0x00] = "acquire", 222 + [0x01] = "preempt", 223 + [0x02] = "preempt and abort", 224 + }; 225 + const char *ret = trace_seq_buffer_ptr(p); 226 + u8 racqa = cdw10[0] & 0x7; 227 + u8 iekey = (cdw10[0] >> 3) & 0x1; 228 + u8 rtype = cdw10[1]; 229 + const char *racqa_str = "reserved"; 230 + const char *rtype_str = "reserved"; 231 + 232 + if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa]) 233 + racqa_str = racqa_strs[racqa]; 234 + 235 + if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype]) 236 + rtype_str = rtype_strs[rtype]; 237 + 238 + trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s", 239 + racqa, racqa_str, iekey, rtype, rtype_str); 240 + trace_seq_putc(p, 0); 241 + 242 + return ret; 243 + } 244 + 245 + static const char *nvmet_trace_resv_rel(struct trace_seq *p, u8 *cdw10) 246 + { 247 + static const char * const rrela_strs[] = { 248 + [0x00] = "release", 249 + [0x01] = "clear", 250 + }; 251 + const char *ret = trace_seq_buffer_ptr(p); 252 + u8 rrela = cdw10[0] & 0x7; 253 + u8 iekey = (cdw10[0] >> 3) & 0x1; 254 + u8 rtype = cdw10[1]; 255 + const char *rrela_str = "reserved"; 256 + const char *rtype_str = "reserved"; 257 + 258 + if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela]) 259 + rrela_str = rrela_strs[rrela]; 260 + 261 + if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype]) 262 + rtype_str = rtype_strs[rtype]; 263 + 264 + trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s", 265 + rrela, rrela_str, iekey, rtype, rtype_str); 266 + trace_seq_putc(p, 0); 267 + 268 + return ret; 269 + } 270 + 271 + static const char *nvmet_trace_resv_report(struct trace_seq *p, u8 *cdw10) 272 + { 273 + const char *ret = trace_seq_buffer_ptr(p); 274 + u32 numd = get_unaligned_le32(cdw10); 275 + u8 eds = cdw10[4] & 0x1; 276 + 277 + trace_seq_printf(p, "numd=%u, eds=%u", numd, eds); 278 + trace_seq_putc(p, 0); 279 + 280 + return ret; 281 + } 282 + 183 283 const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p, 184 284 u8 opcode, u8 *cdw10) 185 285 { ··· 295 195 return nvmet_trace_zone_mgmt_send(p, cdw10); 296 196 case nvme_cmd_zone_mgmt_recv: 297 197 return nvmet_trace_zone_mgmt_recv(p, cdw10); 198 + case nvme_cmd_resv_register: 199 + return nvmet_trace_resv_reg(p, cdw10); 200 + case nvme_cmd_resv_acquire: 201 + return nvmet_trace_resv_acq(p, cdw10); 202 + case nvme_cmd_resv_release: 203 + return nvmet_trace_resv_rel(p, cdw10); 204 + case nvme_cmd_resv_report: 205 + return nvmet_trace_resv_report(p, cdw10); 298 206 default: 299 207 return nvmet_trace_common(p, cdw10); 300 208 }
+132 -3
include/linux/nvme.h
··· 327 327 __le32 sanicap; 328 328 __le32 hmminds; 329 329 __le16 hmmaxd; 330 - __u8 rsvd338[4]; 330 + __le16 nvmsetidmax; 331 + __le16 endgidmax; 331 332 __u8 anatt; 332 333 __u8 anacap; 333 334 __le32 anagrpmax; ··· 523 522 NVME_ID_CNS_NS_DESC_LIST = 0x03, 524 523 NVME_ID_CNS_CS_NS = 0x05, 525 524 NVME_ID_CNS_CS_CTRL = 0x06, 525 + NVME_ID_CNS_NS_ACTIVE_LIST_CS = 0x07, 526 526 NVME_ID_CNS_NS_CS_INDEP = 0x08, 527 527 NVME_ID_CNS_NS_PRESENT_LIST = 0x10, 528 528 NVME_ID_CNS_NS_PRESENT = 0x11, ··· 532 530 NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15, 533 531 NVME_ID_CNS_NS_GRANULARITY = 0x16, 534 532 NVME_ID_CNS_UUID_LIST = 0x17, 533 + NVME_ID_CNS_ENDGRP_LIST = 0x19, 535 534 }; 536 535 537 536 enum { ··· 563 560 NVME_NS_FLBAS_LBA_SHIFT = 1, 564 561 NVME_NS_FLBAS_META_EXT = 0x10, 565 562 NVME_NS_NMIC_SHARED = 1 << 0, 563 + NVME_NS_ROTATIONAL = 1 << 4, 564 + NVME_NS_VWC_NOT_PRESENT = 1 << 5, 566 565 NVME_LBAF_RP_BEST = 0, 567 566 NVME_LBAF_RP_BETTER = 1, 568 567 NVME_LBAF_RP_GOOD = 2, ··· 620 615 NVME_NIDT_NGUID = 0x02, 621 616 NVME_NIDT_UUID = 0x03, 622 617 NVME_NIDT_CSI = 0x04, 618 + }; 619 + 620 + struct nvme_endurance_group_log { 621 + __u8 egcw; 622 + __u8 egfeat; 623 + __u8 rsvd2; 624 + __u8 avsp; 625 + __u8 avspt; 626 + __u8 pused; 627 + __le16 did; 628 + __u8 rsvd8[24]; 629 + __u8 ee[16]; 630 + __u8 dur[16]; 631 + __u8 duw[16]; 632 + __u8 muw[16]; 633 + __u8 hrc[16]; 634 + __u8 hwc[16]; 635 + __u8 mdie[16]; 636 + __u8 neile[16]; 637 + __u8 tegcap[16]; 638 + __u8 uegcap[16]; 639 + __u8 rsvd192[320]; 640 + }; 641 + 642 + struct nvme_rotational_media_log { 643 + __le16 endgid; 644 + __le16 numa; 645 + __le16 nrs; 646 + __u8 rsvd6[2]; 647 + __le32 spinc; 648 + __le32 fspinc; 649 + __le32 ldc; 650 + __le32 fldc; 651 + __u8 rsvd24[488]; 623 652 }; 624 653 625 654 struct nvme_smart_log { ··· 1283 1244 NVME_FEAT_WRITE_PROTECT = 0x84, 1284 1245 NVME_FEAT_VENDOR_START = 0xC0, 1285 1246 NVME_FEAT_VENDOR_END = 0xFF, 1247 + NVME_LOG_SUPPORTED = 0x00, 1286 1248 NVME_LOG_ERROR = 0x01, 1287 1249 NVME_LOG_SMART = 0x02, 1288 1250 NVME_LOG_FW_SLOT = 0x03, ··· 1294 1254 NVME_LOG_TELEMETRY_CTRL = 0x08, 1295 1255 NVME_LOG_ENDURANCE_GROUP = 0x09, 1296 1256 NVME_LOG_ANA = 0x0c, 1257 + NVME_LOG_FEATURES = 0x12, 1258 + NVME_LOG_RMI = 0x16, 1297 1259 NVME_LOG_DISC = 0x70, 1298 1260 NVME_LOG_RESERVATION = 0x80, 1299 1261 NVME_FWACT_REPL = (0 << 3), 1300 1262 NVME_FWACT_REPL_ACTV = (1 << 3), 1301 1263 NVME_FWACT_ACTV = (2 << 3), 1264 + }; 1265 + 1266 + struct nvme_supported_log { 1267 + __le32 lids[256]; 1268 + }; 1269 + 1270 + enum { 1271 + NVME_LIDS_LSUPP = 1 << 0, 1272 + }; 1273 + 1274 + struct nvme_supported_features_log { 1275 + __le32 fis[256]; 1276 + }; 1277 + 1278 + enum { 1279 + NVME_FIS_FSUPP = 1 << 0, 1280 + NVME_FIS_NSCPE = 1 << 20, 1281 + NVME_FIS_CSCPE = 1 << 21, 1302 1282 }; 1303 1283 1304 1284 /* NVMe Namespace Write Protect State */ ··· 1341 1281 __u8 cns; 1342 1282 __u8 rsvd3; 1343 1283 __le16 ctrlid; 1344 - __u8 rsvd11[3]; 1284 + __le16 cnssid; 1285 + __u8 rsvd11; 1345 1286 __u8 csi; 1346 1287 __u32 rsvd12[4]; 1347 1288 }; ··· 1450 1389 __u8 lsp; /* upper 4 bits reserved */ 1451 1390 __le16 numdl; 1452 1391 __le16 numdu; 1453 - __u16 rsvd11; 1392 + __le16 lsi; 1454 1393 union { 1455 1394 struct { 1456 1395 __le32 lpol; ··· 2097 2036 #define NVME_MAJOR(ver) ((ver) >> 16) 2098 2037 #define NVME_MINOR(ver) (((ver) >> 8) & 0xff) 2099 2038 #define NVME_TERTIARY(ver) ((ver) & 0xff) 2039 + 2040 + enum { 2041 + NVME_AEN_RESV_LOG_PAGE_AVALIABLE = 0x00, 2042 + }; 2043 + 2044 + enum { 2045 + NVME_PR_LOG_EMPTY_LOG_PAGE = 0x00, 2046 + NVME_PR_LOG_REGISTRATION_PREEMPTED = 0x01, 2047 + NVME_PR_LOG_RESERVATION_RELEASED = 0x02, 2048 + NVME_PR_LOG_RESERVATOIN_PREEMPTED = 0x03, 2049 + }; 2050 + 2051 + enum { 2052 + NVME_PR_NOTIFY_BIT_REG_PREEMPTED = 1, 2053 + NVME_PR_NOTIFY_BIT_RESV_RELEASED = 2, 2054 + NVME_PR_NOTIFY_BIT_RESV_PREEMPTED = 3, 2055 + }; 2056 + 2057 + struct nvme_pr_log { 2058 + __le64 count; 2059 + __u8 type; 2060 + __u8 nr_pages; 2061 + __u8 rsvd1[2]; 2062 + __le32 nsid; 2063 + __u8 rsvd2[48]; 2064 + }; 2065 + 2066 + struct nvmet_pr_register_data { 2067 + __le64 crkey; 2068 + __le64 nrkey; 2069 + }; 2070 + 2071 + struct nvmet_pr_acquire_data { 2072 + __le64 crkey; 2073 + __le64 prkey; 2074 + }; 2075 + 2076 + struct nvmet_pr_release_data { 2077 + __le64 crkey; 2078 + }; 2079 + 2080 + enum nvme_pr_capabilities { 2081 + NVME_PR_SUPPORT_PTPL = 1, 2082 + NVME_PR_SUPPORT_WRITE_EXCLUSIVE = 1 << 1, 2083 + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS = 1 << 2, 2084 + NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY = 1 << 3, 2085 + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY = 1 << 4, 2086 + NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS = 1 << 5, 2087 + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS = 1 << 6, 2088 + NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF = 1 << 7, 2089 + }; 2090 + 2091 + enum nvme_pr_register_action { 2092 + NVME_PR_REGISTER_ACT_REG = 0, 2093 + NVME_PR_REGISTER_ACT_UNREG = 1, 2094 + NVME_PR_REGISTER_ACT_REPLACE = 1 << 1, 2095 + }; 2096 + 2097 + enum nvme_pr_acquire_action { 2098 + NVME_PR_ACQUIRE_ACT_ACQUIRE = 0, 2099 + NVME_PR_ACQUIRE_ACT_PREEMPT = 1, 2100 + NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT = 1 << 1, 2101 + }; 2102 + 2103 + enum nvme_pr_release_action { 2104 + NVME_PR_RELEASE_ACT_RELEASE = 0, 2105 + NVME_PR_RELEASE_ACT_CLEAR = 1, 2106 + }; 2100 2107 2101 2108 #endif /* _LINUX_NVME_H */