Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nvme-6.16-2025-05-20' of git://git.infradead.org/nvme into for-6.16/block

Pull NVMe updates from Christoph:

"nvme updates for Linux 6.16

- add per-node DMA pools and use them for PRP/SGL allocations
(Caleb Sander Mateos, Keith Busch)
- nvme-fcloop refcounting fixes (Daniel Wagner)
- support delayed removal of the multipath node and optionally support
the multipath node for private namespaces (Nilay Shroff)
- support shared CQs in the PCI endpoint target code (Wilfred Mallawa)
- support admin-queue only authentication (Hannes Reinecke)
- use the crc32c library instead of the crypto API (Eric Biggers)
- misc cleanups (Christoph Hellwig, Marcelo Moreira, Hannes Reinecke,
Leon Romanovsky, Gustavo A. R. Silva)"

* tag 'nvme-6.16-2025-05-20' of git://git.infradead.org/nvme: (42 commits)
nvme: rename nvme_mpath_shutdown_disk to nvme_mpath_remove_disk
nvme: introduce multipath_always_on module param
nvme-multipath: introduce delayed removal of the multipath head node
nvme-pci: derive and better document max segments limits
nvme-pci: use struct_size for allocation struct nvme_dev
nvme-pci: add a symolic name for the small pool size
nvme-pci: use a better encoding for small prp pool allocations
nvme-pci: rename the descriptor pools
nvme-pci: remove struct nvme_descriptor
nvme-pci: store aborted state in flags variable
nvme-pci: don't try to use SGLs for metadata on the admin queue
nvme-pci: make PRP list DMA pools per-NUMA-node
nvme-pci: factor out a nvme_init_hctx_common() helper
dmapool: add NUMA affinity support
nvme-fc: do not reference lsrsp after failure
nvmet-fcloop: don't wait for lport cleanup
nvmet-fcloop: add missing fcloop_callback_host_done
nvmet-fc: take tgtport refs for portentry
nvmet-fc: free pending reqs on tgtport unregister
nvmet-fcloop: drop response if targetport is gone
...

Jens Axboe 39eb8101 496a3bc5

+1007 -532
+2 -13
drivers/nvme/common/auth.c
··· 242 242 { 243 243 const char *hmac_name; 244 244 struct crypto_shash *key_tfm; 245 - struct shash_desc *shash; 245 + SHASH_DESC_ON_STACK(shash, key_tfm); 246 246 struct nvme_dhchap_key *transformed_key; 247 247 int ret, key_len; 248 248 ··· 267 267 if (IS_ERR(key_tfm)) 268 268 return ERR_CAST(key_tfm); 269 269 270 - shash = kmalloc(sizeof(struct shash_desc) + 271 - crypto_shash_descsize(key_tfm), 272 - GFP_KERNEL); 273 - if (!shash) { 274 - ret = -ENOMEM; 275 - goto out_free_key; 276 - } 277 - 278 270 key_len = crypto_shash_digestsize(key_tfm); 279 271 transformed_key = nvme_auth_alloc_key(key_len, key->hash); 280 272 if (!transformed_key) { 281 273 ret = -ENOMEM; 282 - goto out_free_shash; 274 + goto out_free_key; 283 275 } 284 276 285 277 shash->tfm = key_tfm; ··· 291 299 if (ret < 0) 292 300 goto out_free_transformed_key; 293 301 294 - kfree(shash); 295 302 crypto_free_shash(key_tfm); 296 303 297 304 return transformed_key; 298 305 299 306 out_free_transformed_key: 300 307 nvme_auth_free_key(transformed_key); 301 - out_free_shash: 302 - kfree(shash); 303 308 out_free_key: 304 309 crypto_free_shash(key_tfm); 305 310
+22 -8
drivers/nvme/host/auth.c
··· 31 31 u32 s1; 32 32 u32 s2; 33 33 bool bi_directional; 34 + bool authenticated; 34 35 u16 transaction; 35 36 u8 status; 36 37 u8 dhgroup_id; ··· 683 682 static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap) 684 683 { 685 684 nvme_auth_reset_dhchap(chap); 685 + chap->authenticated = false; 686 686 if (chap->shash_tfm) 687 687 crypto_free_shash(chap->shash_tfm); 688 688 if (chap->dh_tfm) ··· 932 930 } 933 931 if (!ret) { 934 932 chap->error = 0; 933 + chap->authenticated = true; 935 934 if (ctrl->opts->concat && 936 935 (ret = nvme_auth_secure_concat(ctrl, chap))) { 937 936 dev_warn(ctrl->device, 938 937 "%s: qid %d failed to enable secure concatenation\n", 939 938 __func__, chap->qid); 940 939 chap->error = ret; 940 + chap->authenticated = false; 941 941 } 942 942 return; 943 943 } ··· 1027 1023 return; 1028 1024 1029 1025 for (q = 1; q < ctrl->queue_count; q++) { 1030 - ret = nvme_auth_negotiate(ctrl, q); 1031 - if (ret) { 1032 - dev_warn(ctrl->device, 1033 - "qid %d: error %d setting up authentication\n", 1034 - q, ret); 1035 - break; 1036 - } 1026 + struct nvme_dhchap_queue_context *chap = 1027 + &ctrl->dhchap_ctxs[q]; 1028 + /* 1029 + * Skip re-authentication if the queue had 1030 + * not been authenticated initially. 1031 + */ 1032 + if (!chap->authenticated) 1033 + continue; 1034 + cancel_work_sync(&chap->auth_work); 1035 + queue_work(nvme_auth_wq, &chap->auth_work); 1037 1036 } 1038 1037 1039 1038 /* ··· 1044 1037 * the controller terminates the connection. 1045 1038 */ 1046 1039 for (q = 1; q < ctrl->queue_count; q++) { 1047 - ret = nvme_auth_wait(ctrl, q); 1040 + struct nvme_dhchap_queue_context *chap = 1041 + &ctrl->dhchap_ctxs[q]; 1042 + if (!chap->authenticated) 1043 + continue; 1044 + flush_work(&chap->auth_work); 1045 + ret = chap->error; 1046 + nvme_auth_reset_dhchap(chap); 1048 1047 if (ret) 1049 1048 dev_warn(ctrl->device, 1050 1049 "qid %d: authentication failed\n", q); ··· 1089 1076 chap = &ctrl->dhchap_ctxs[i]; 1090 1077 chap->qid = i; 1091 1078 chap->ctrl = ctrl; 1079 + chap->authenticated = false; 1092 1080 INIT_WORK(&chap->auth_work, nvme_queue_auth_work); 1093 1081 } 1094 1082
+7 -5
drivers/nvme/host/core.c
··· 668 668 struct nvme_ns_head *head = 669 669 container_of(ref, struct nvme_ns_head, ref); 670 670 671 - nvme_mpath_remove_disk(head); 671 + nvme_mpath_put_disk(head); 672 672 ida_free(&head->subsys->ns_ida, head->instance); 673 673 cleanup_srcu_struct(&head->srcu); 674 674 nvme_put_subsystem(head->subsys); ··· 3743 3743 */ 3744 3744 if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h)) 3745 3745 continue; 3746 - if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) 3746 + if (nvme_tryget_ns_head(h)) 3747 3747 return h; 3748 3748 } 3749 3749 ··· 3987 3987 } 3988 3988 } else { 3989 3989 ret = -EINVAL; 3990 - if (!info->is_shared || !head->shared) { 3990 + if ((!info->is_shared || !head->shared) && 3991 + !list_empty(&head->list)) { 3991 3992 dev_err(ctrl->device, 3992 3993 "Duplicate unshared namespace %d\n", 3993 3994 info->nsid); ··· 4192 4191 mutex_lock(&ns->ctrl->subsys->lock); 4193 4192 list_del_rcu(&ns->siblings); 4194 4193 if (list_empty(&ns->head->list)) { 4195 - list_del_init(&ns->head->entry); 4194 + if (!nvme_mpath_queue_if_no_path(ns->head)) 4195 + list_del_init(&ns->head->entry); 4196 4196 last_path = true; 4197 4197 } 4198 4198 mutex_unlock(&ns->ctrl->subsys->lock); ··· 4214 4212 synchronize_srcu(&ns->ctrl->srcu); 4215 4213 4216 4214 if (last_path) 4217 - nvme_mpath_shutdown_disk(ns->head); 4215 + nvme_mpath_remove_disk(ns->head); 4218 4216 nvme_put_ns(ns); 4219 4217 } 4220 4218
+10 -3
drivers/nvme/host/fc.c
··· 1410 1410 } 1411 1411 1412 1412 static void 1413 - nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) 1413 + nvme_fc_xmt_ls_rsp_free(struct nvmefc_ls_rcv_op *lsop) 1414 1414 { 1415 - struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private; 1416 1415 struct nvme_fc_rport *rport = lsop->rport; 1417 1416 struct nvme_fc_lport *lport = rport->lport; 1418 1417 unsigned long flags; ··· 1433 1434 } 1434 1435 1435 1436 static void 1437 + nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) 1438 + { 1439 + struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private; 1440 + 1441 + nvme_fc_xmt_ls_rsp_free(lsop); 1442 + } 1443 + 1444 + static void 1436 1445 nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop) 1437 1446 { 1438 1447 struct nvme_fc_rport *rport = lsop->rport; ··· 1457 1450 dev_warn(lport->dev, 1458 1451 "LLDD rejected LS RSP xmt: LS %d status %d\n", 1459 1452 w0->ls_cmd, ret); 1460 - nvme_fc_xmt_ls_rsp_done(lsop->lsrsp); 1453 + nvme_fc_xmt_ls_rsp_free(lsop); 1461 1454 return; 1462 1455 } 1463 1456 }
+188 -18
drivers/nvme/host/multipath.c
··· 10 10 #include "nvme.h" 11 11 12 12 bool multipath = true; 13 - module_param(multipath, bool, 0444); 13 + static bool multipath_always_on; 14 + 15 + static int multipath_param_set(const char *val, const struct kernel_param *kp) 16 + { 17 + int ret; 18 + bool *arg = kp->arg; 19 + 20 + ret = param_set_bool(val, kp); 21 + if (ret) 22 + return ret; 23 + 24 + if (multipath_always_on && !*arg) { 25 + pr_err("Can't disable multipath when multipath_always_on is configured.\n"); 26 + *arg = true; 27 + return -EINVAL; 28 + } 29 + 30 + return 0; 31 + } 32 + 33 + static const struct kernel_param_ops multipath_param_ops = { 34 + .set = multipath_param_set, 35 + .get = param_get_bool, 36 + }; 37 + 38 + module_param_cb(multipath, &multipath_param_ops, &multipath, 0444); 14 39 MODULE_PARM_DESC(multipath, 15 40 "turn on native support for multiple controllers per subsystem"); 41 + 42 + static int multipath_always_on_set(const char *val, 43 + const struct kernel_param *kp) 44 + { 45 + int ret; 46 + bool *arg = kp->arg; 47 + 48 + ret = param_set_bool(val, kp); 49 + if (ret < 0) 50 + return ret; 51 + 52 + if (*arg) 53 + multipath = true; 54 + 55 + return 0; 56 + } 57 + 58 + static const struct kernel_param_ops multipath_always_on_ops = { 59 + .set = multipath_always_on_set, 60 + .get = param_get_bool, 61 + }; 62 + 63 + module_param_cb(multipath_always_on, &multipath_always_on_ops, 64 + &multipath_always_on, 0444); 65 + MODULE_PARM_DESC(multipath_always_on, 66 + "create multipath node always except for private namespace with non-unique nsid; note that this also implicitly enables native multipath support"); 16 67 17 68 static const char *nvme_iopolicy_names[] = { 18 69 [NVME_IOPOLICY_NUMA] = "numa", ··· 493 442 break; 494 443 } 495 444 } 496 - return false; 445 + 446 + /* 447 + * If "head->delayed_removal_secs" is configured (i.e., non-zero), do 448 + * not immediately fail I/O. Instead, requeue the I/O for the configured 449 + * duration, anticipating that if there's a transient link failure then 450 + * it may recover within this time window. This parameter is exported to 451 + * userspace via sysfs, and its default value is zero. It is internally 452 + * mapped to NVME_NSHEAD_QUEUE_IF_NO_PATH. When delayed_removal_secs is 453 + * non-zero, this flag is set to true. When zero, the flag is cleared. 454 + */ 455 + return nvme_mpath_queue_if_no_path(head); 497 456 } 498 457 499 458 static void nvme_ns_head_submit_bio(struct bio *bio) ··· 678 617 } 679 618 } 680 619 620 + static void nvme_remove_head(struct nvme_ns_head *head) 621 + { 622 + if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { 623 + /* 624 + * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared 625 + * to allow multipath to fail all I/O. 626 + */ 627 + kblockd_schedule_work(&head->requeue_work); 628 + 629 + nvme_cdev_del(&head->cdev, &head->cdev_device); 630 + synchronize_srcu(&head->srcu); 631 + del_gendisk(head->disk); 632 + nvme_put_ns_head(head); 633 + } 634 + } 635 + 636 + static void nvme_remove_head_work(struct work_struct *work) 637 + { 638 + struct nvme_ns_head *head = container_of(to_delayed_work(work), 639 + struct nvme_ns_head, remove_work); 640 + bool remove = false; 641 + 642 + mutex_lock(&head->subsys->lock); 643 + if (list_empty(&head->list)) { 644 + list_del_init(&head->entry); 645 + remove = true; 646 + } 647 + mutex_unlock(&head->subsys->lock); 648 + if (remove) 649 + nvme_remove_head(head); 650 + 651 + module_put(THIS_MODULE); 652 + } 653 + 681 654 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) 682 655 { 683 656 struct queue_limits lim; ··· 721 626 spin_lock_init(&head->requeue_lock); 722 627 INIT_WORK(&head->requeue_work, nvme_requeue_work); 723 628 INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work); 629 + INIT_DELAYED_WORK(&head->remove_work, nvme_remove_head_work); 630 + head->delayed_removal_secs = 0; 724 631 725 632 /* 726 - * Add a multipath node if the subsystems supports multiple controllers. 727 - * We also do this for private namespaces as the namespace sharing flag 728 - * could change after a rescan. 633 + * If "multipath_always_on" is enabled, a multipath node is added 634 + * regardless of whether the disk is single/multi ported, and whether 635 + * the namespace is shared or private. If "multipath_always_on" is not 636 + * enabled, a multipath node is added only if the subsystem supports 637 + * multiple controllers and the "multipath" option is configured. In 638 + * either case, for private namespaces, we ensure that the NSID is 639 + * unique. 729 640 */ 730 - if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || 731 - !nvme_is_unique_nsid(ctrl, head) || !multipath) 641 + if (!multipath_always_on) { 642 + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || 643 + !multipath) 644 + return 0; 645 + } 646 + 647 + if (!nvme_is_unique_nsid(ctrl, head)) 732 648 return 0; 733 649 734 650 blk_set_stacking_limits(&lim); ··· 765 659 set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state); 766 660 sprintf(head->disk->disk_name, "nvme%dn%d", 767 661 ctrl->subsys->instance, head->instance); 662 + nvme_tryget_ns_head(head); 768 663 return 0; 769 664 } 770 665 ··· 1122 1015 } 1123 1016 DEVICE_ATTR_RO(numa_nodes); 1124 1017 1018 + static ssize_t delayed_removal_secs_show(struct device *dev, 1019 + struct device_attribute *attr, char *buf) 1020 + { 1021 + struct gendisk *disk = dev_to_disk(dev); 1022 + struct nvme_ns_head *head = disk->private_data; 1023 + int ret; 1024 + 1025 + mutex_lock(&head->subsys->lock); 1026 + ret = sysfs_emit(buf, "%u\n", head->delayed_removal_secs); 1027 + mutex_unlock(&head->subsys->lock); 1028 + return ret; 1029 + } 1030 + 1031 + static ssize_t delayed_removal_secs_store(struct device *dev, 1032 + struct device_attribute *attr, const char *buf, size_t count) 1033 + { 1034 + struct gendisk *disk = dev_to_disk(dev); 1035 + struct nvme_ns_head *head = disk->private_data; 1036 + unsigned int sec; 1037 + int ret; 1038 + 1039 + ret = kstrtouint(buf, 0, &sec); 1040 + if (ret < 0) 1041 + return ret; 1042 + 1043 + mutex_lock(&head->subsys->lock); 1044 + head->delayed_removal_secs = sec; 1045 + if (sec) 1046 + set_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags); 1047 + else 1048 + clear_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags); 1049 + mutex_unlock(&head->subsys->lock); 1050 + /* 1051 + * Ensure that update to NVME_NSHEAD_QUEUE_IF_NO_PATH is seen 1052 + * by its reader. 1053 + */ 1054 + synchronize_srcu(&head->srcu); 1055 + 1056 + return count; 1057 + } 1058 + 1059 + DEVICE_ATTR_RW(delayed_removal_secs); 1060 + 1125 1061 static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, 1126 1062 struct nvme_ana_group_desc *desc, void *data) 1127 1063 { ··· 1286 1136 #endif 1287 1137 } 1288 1138 1289 - void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) 1139 + void nvme_mpath_remove_disk(struct nvme_ns_head *head) 1290 1140 { 1291 - if (!head->disk) 1292 - return; 1293 - if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { 1294 - nvme_cdev_del(&head->cdev, &head->cdev_device); 1141 + bool remove = false; 1142 + 1143 + mutex_lock(&head->subsys->lock); 1144 + /* 1145 + * We are called when all paths have been removed, and at that point 1146 + * head->list is expected to be empty. However, nvme_remove_ns() and 1147 + * nvme_init_ns_head() can run concurrently and so if head->delayed_ 1148 + * removal_secs is configured, it is possible that by the time we reach 1149 + * this point, head->list may no longer be empty. Therefore, we recheck 1150 + * head->list here. If it is no longer empty then we skip enqueuing the 1151 + * delayed head removal work. 1152 + */ 1153 + if (!list_empty(&head->list)) 1154 + goto out; 1155 + 1156 + if (head->delayed_removal_secs) { 1295 1157 /* 1296 - * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared 1297 - * to allow multipath to fail all I/O. 1158 + * Ensure that no one could remove this module while the head 1159 + * remove work is pending. 1298 1160 */ 1299 - synchronize_srcu(&head->srcu); 1300 - kblockd_schedule_work(&head->requeue_work); 1301 - del_gendisk(head->disk); 1161 + if (!try_module_get(THIS_MODULE)) 1162 + goto out; 1163 + queue_delayed_work(nvme_wq, &head->remove_work, 1164 + head->delayed_removal_secs * HZ); 1165 + } else { 1166 + list_del_init(&head->entry); 1167 + remove = true; 1302 1168 } 1169 + out: 1170 + mutex_unlock(&head->subsys->lock); 1171 + if (remove) 1172 + nvme_remove_head(head); 1303 1173 } 1304 1174 1305 - void nvme_mpath_remove_disk(struct nvme_ns_head *head) 1175 + void nvme_mpath_put_disk(struct nvme_ns_head *head) 1306 1176 { 1307 1177 if (!head->disk) 1308 1178 return;
+19 -5
drivers/nvme/host/nvme.h
··· 506 506 struct work_struct partition_scan_work; 507 507 struct mutex lock; 508 508 unsigned long flags; 509 - #define NVME_NSHEAD_DISK_LIVE 0 509 + struct delayed_work remove_work; 510 + unsigned int delayed_removal_secs; 511 + #define NVME_NSHEAD_DISK_LIVE 0 512 + #define NVME_NSHEAD_QUEUE_IF_NO_PATH 1 510 513 struct nvme_ns __rcu *current_path[]; 511 514 #endif 512 515 }; ··· 966 963 void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns); 967 964 void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns); 968 965 void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid); 969 - void nvme_mpath_remove_disk(struct nvme_ns_head *head); 966 + void nvme_mpath_put_disk(struct nvme_ns_head *head); 970 967 int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); 971 968 void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); 972 969 void nvme_mpath_update(struct nvme_ctrl *ctrl); ··· 975 972 bool nvme_mpath_clear_current_path(struct nvme_ns *ns); 976 973 void nvme_mpath_revalidate_paths(struct nvme_ns *ns); 977 974 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); 978 - void nvme_mpath_shutdown_disk(struct nvme_ns_head *head); 975 + void nvme_mpath_remove_disk(struct nvme_ns_head *head); 979 976 void nvme_mpath_start_request(struct request *rq); 980 977 void nvme_mpath_end_request(struct request *rq); 981 978 ··· 992 989 extern struct device_attribute dev_attr_ana_state; 993 990 extern struct device_attribute dev_attr_queue_depth; 994 991 extern struct device_attribute dev_attr_numa_nodes; 992 + extern struct device_attribute dev_attr_delayed_removal_secs; 995 993 extern struct device_attribute subsys_attr_iopolicy; 996 994 997 995 static inline bool nvme_disk_is_ns_head(struct gendisk *disk) 998 996 { 999 997 return disk->fops == &nvme_ns_head_ops; 998 + } 999 + static inline bool nvme_mpath_queue_if_no_path(struct nvme_ns_head *head) 1000 + { 1001 + if (test_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags)) 1002 + return true; 1003 + return false; 1000 1004 } 1001 1005 #else 1002 1006 #define multipath false ··· 1025 1015 static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) 1026 1016 { 1027 1017 } 1028 - static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) 1018 + static inline void nvme_mpath_put_disk(struct nvme_ns_head *head) 1029 1019 { 1030 1020 } 1031 1021 static inline void nvme_mpath_add_sysfs_link(struct nvme_ns *ns) ··· 1044 1034 static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) 1045 1035 { 1046 1036 } 1047 - static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) 1037 + static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) 1048 1038 { 1049 1039 } 1050 1040 static inline void nvme_trace_bio_complete(struct request *req) ··· 1089 1079 { 1090 1080 } 1091 1081 static inline bool nvme_disk_is_ns_head(struct gendisk *disk) 1082 + { 1083 + return false; 1084 + } 1085 + static inline bool nvme_mpath_queue_if_no_path(struct nvme_ns_head *head) 1092 1086 { 1093 1087 return false; 1094 1088 }
+167 -133
drivers/nvme/host/pci.c
··· 18 18 #include <linux/mm.h> 19 19 #include <linux/module.h> 20 20 #include <linux/mutex.h> 21 + #include <linux/nodemask.h> 21 22 #include <linux/once.h> 22 23 #include <linux/pci.h> 23 24 #include <linux/suspend.h> ··· 35 34 #define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) 36 35 #define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) 37 36 38 - #define SGES_PER_PAGE (NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc)) 37 + /* Optimisation for I/Os between 4k and 128k */ 38 + #define NVME_SMALL_POOL_SIZE 256 39 39 40 40 /* 41 41 * These can be higher, but we need to ensure that any command doesn't 42 42 * require an sg allocation that needs more than a page of data. 43 43 */ 44 44 #define NVME_MAX_KB_SZ 8192 45 - #define NVME_MAX_SEGS 128 46 - #define NVME_MAX_META_SEGS 15 47 - #define NVME_MAX_NR_ALLOCATIONS 5 45 + #define NVME_MAX_NR_DESCRIPTORS 5 46 + 47 + /* 48 + * For data SGLs we support a single descriptors worth of SGL entries, but for 49 + * now we also limit it to avoid an allocation larger than PAGE_SIZE for the 50 + * scatterlist. 51 + */ 52 + #define NVME_MAX_SEGS \ 53 + min(NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc), \ 54 + (PAGE_SIZE / sizeof(struct scatterlist))) 55 + 56 + /* 57 + * For metadata SGLs, only the small descriptor is supported, and the first 58 + * entry is the segment descriptor, which for the data pointer sits in the SQE. 59 + */ 60 + #define NVME_MAX_META_SEGS \ 61 + ((NVME_SMALL_POOL_SIZE / sizeof(struct nvme_sgl_desc)) - 1) 48 62 49 63 static int use_threaded_interrupts; 50 64 module_param(use_threaded_interrupts, int, 0444); ··· 128 112 static void nvme_delete_io_queues(struct nvme_dev *dev); 129 113 static void nvme_update_attrs(struct nvme_dev *dev); 130 114 115 + struct nvme_descriptor_pools { 116 + struct dma_pool *large; 117 + struct dma_pool *small; 118 + }; 119 + 131 120 /* 132 121 * Represents an NVM Express device. Each nvme_dev is a PCI function. 133 122 */ ··· 142 121 struct blk_mq_tag_set admin_tagset; 143 122 u32 __iomem *dbs; 144 123 struct device *dev; 145 - struct dma_pool *prp_page_pool; 146 - struct dma_pool *prp_small_pool; 147 124 unsigned online_queues; 148 125 unsigned max_qid; 149 126 unsigned io_queues[HCTX_MAX_TYPES]; ··· 181 162 unsigned int nr_allocated_queues; 182 163 unsigned int nr_write_queues; 183 164 unsigned int nr_poll_queues; 165 + struct nvme_descriptor_pools descriptor_pools[]; 184 166 }; 185 167 186 168 static int io_queue_depth_set(const char *val, const struct kernel_param *kp) ··· 211 191 */ 212 192 struct nvme_queue { 213 193 struct nvme_dev *dev; 194 + struct nvme_descriptor_pools descriptor_pools; 214 195 spinlock_t sq_lock; 215 196 void *sq_cmds; 216 197 /* only used for poll queues: */ ··· 240 219 struct completion delete_done; 241 220 }; 242 221 243 - union nvme_descriptor { 244 - struct nvme_sgl_desc *sg_list; 245 - __le64 *prp_list; 222 + /* bits for iod->flags */ 223 + enum nvme_iod_flags { 224 + /* this command has been aborted by the timeout handler */ 225 + IOD_ABORTED = 1U << 0, 226 + 227 + /* uses the small descriptor pool */ 228 + IOD_SMALL_DESCRIPTOR = 1U << 1, 246 229 }; 247 230 248 231 /* 249 232 * The nvme_iod describes the data in an I/O. 250 - * 251 - * The sg pointer contains the list of PRP/SGL chunk allocations in addition 252 - * to the actual struct scatterlist. 253 233 */ 254 234 struct nvme_iod { 255 235 struct nvme_request req; 256 236 struct nvme_command cmd; 257 - bool aborted; 258 - s8 nr_allocations; /* PRP list pool allocations. 0 means small 259 - pool in use */ 237 + u8 flags; 238 + u8 nr_descriptors; 260 239 unsigned int dma_len; /* length of single DMA segment mapping */ 261 240 dma_addr_t first_dma; 262 241 dma_addr_t meta_dma; 263 242 struct sg_table sgt; 264 243 struct sg_table meta_sgt; 265 - union nvme_descriptor meta_list; 266 - union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS]; 244 + struct nvme_sgl_desc *meta_descriptor; 245 + void *descriptors[NVME_MAX_NR_DESCRIPTORS]; 267 246 }; 268 247 269 248 static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) ··· 418 397 return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8); 419 398 } 420 399 421 - static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 422 - unsigned int hctx_idx) 400 + static struct nvme_descriptor_pools * 401 + nvme_setup_descriptor_pools(struct nvme_dev *dev, unsigned numa_node) 402 + { 403 + struct nvme_descriptor_pools *pools = &dev->descriptor_pools[numa_node]; 404 + size_t small_align = NVME_SMALL_POOL_SIZE; 405 + 406 + if (pools->small) 407 + return pools; /* already initialized */ 408 + 409 + pools->large = dma_pool_create_node("nvme descriptor page", dev->dev, 410 + NVME_CTRL_PAGE_SIZE, NVME_CTRL_PAGE_SIZE, 0, numa_node); 411 + if (!pools->large) 412 + return ERR_PTR(-ENOMEM); 413 + 414 + if (dev->ctrl.quirks & NVME_QUIRK_DMAPOOL_ALIGN_512) 415 + small_align = 512; 416 + 417 + pools->small = dma_pool_create_node("nvme descriptor small", dev->dev, 418 + NVME_SMALL_POOL_SIZE, small_align, 0, numa_node); 419 + if (!pools->small) { 420 + dma_pool_destroy(pools->large); 421 + pools->large = NULL; 422 + return ERR_PTR(-ENOMEM); 423 + } 424 + 425 + return pools; 426 + } 427 + 428 + static void nvme_release_descriptor_pools(struct nvme_dev *dev) 429 + { 430 + unsigned i; 431 + 432 + for (i = 0; i < nr_node_ids; i++) { 433 + struct nvme_descriptor_pools *pools = &dev->descriptor_pools[i]; 434 + 435 + dma_pool_destroy(pools->large); 436 + dma_pool_destroy(pools->small); 437 + } 438 + } 439 + 440 + static int nvme_init_hctx_common(struct blk_mq_hw_ctx *hctx, void *data, 441 + unsigned qid) 423 442 { 424 443 struct nvme_dev *dev = to_nvme_dev(data); 425 - struct nvme_queue *nvmeq = &dev->queues[0]; 444 + struct nvme_queue *nvmeq = &dev->queues[qid]; 445 + struct nvme_descriptor_pools *pools; 446 + struct blk_mq_tags *tags; 426 447 427 - WARN_ON(hctx_idx != 0); 428 - WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); 448 + tags = qid ? dev->tagset.tags[qid - 1] : dev->admin_tagset.tags[0]; 449 + WARN_ON(tags != hctx->tags); 450 + pools = nvme_setup_descriptor_pools(dev, hctx->numa_node); 451 + if (IS_ERR(pools)) 452 + return PTR_ERR(pools); 429 453 454 + nvmeq->descriptor_pools = *pools; 430 455 hctx->driver_data = nvmeq; 431 456 return 0; 432 457 } 433 458 434 - static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 435 - unsigned int hctx_idx) 459 + static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 460 + unsigned int hctx_idx) 436 461 { 437 - struct nvme_dev *dev = to_nvme_dev(data); 438 - struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; 462 + WARN_ON(hctx_idx != 0); 463 + return nvme_init_hctx_common(hctx, data, 0); 464 + } 439 465 440 - WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); 441 - hctx->driver_data = nvmeq; 442 - return 0; 466 + static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 467 + unsigned int hctx_idx) 468 + { 469 + return nvme_init_hctx_common(hctx, data, hctx_idx + 1); 443 470 } 444 471 445 472 static int nvme_pci_init_request(struct blk_mq_tag_set *set, ··· 606 537 return true; 607 538 } 608 539 609 - static void nvme_free_prps(struct nvme_dev *dev, struct request *req) 540 + static inline struct dma_pool *nvme_dma_pool(struct nvme_queue *nvmeq, 541 + struct nvme_iod *iod) 542 + { 543 + if (iod->flags & IOD_SMALL_DESCRIPTOR) 544 + return nvmeq->descriptor_pools.small; 545 + return nvmeq->descriptor_pools.large; 546 + } 547 + 548 + static void nvme_free_descriptors(struct nvme_queue *nvmeq, struct request *req) 610 549 { 611 550 const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; 612 551 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 613 552 dma_addr_t dma_addr = iod->first_dma; 614 553 int i; 615 554 616 - for (i = 0; i < iod->nr_allocations; i++) { 617 - __le64 *prp_list = iod->list[i].prp_list; 555 + if (iod->nr_descriptors == 1) { 556 + dma_pool_free(nvme_dma_pool(nvmeq, iod), iod->descriptors[0], 557 + dma_addr); 558 + return; 559 + } 560 + 561 + for (i = 0; i < iod->nr_descriptors; i++) { 562 + __le64 *prp_list = iod->descriptors[i]; 618 563 dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); 619 564 620 - dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); 565 + dma_pool_free(nvmeq->descriptor_pools.large, prp_list, 566 + dma_addr); 621 567 dma_addr = next_dma_addr; 622 568 } 623 569 } 624 570 625 - static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) 571 + static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_queue *nvmeq, 572 + struct request *req) 626 573 { 627 574 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 628 575 ··· 651 566 WARN_ON_ONCE(!iod->sgt.nents); 652 567 653 568 dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); 654 - 655 - if (iod->nr_allocations == 0) 656 - dma_pool_free(dev->prp_small_pool, iod->list[0].sg_list, 657 - iod->first_dma); 658 - else if (iod->nr_allocations == 1) 659 - dma_pool_free(dev->prp_page_pool, iod->list[0].sg_list, 660 - iod->first_dma); 661 - else 662 - nvme_free_prps(dev, req); 569 + nvme_free_descriptors(nvmeq, req); 663 570 mempool_free(iod->sgt.sgl, dev->iod_mempool); 664 571 } 665 572 ··· 669 592 } 670 593 } 671 594 672 - static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, 595 + static blk_status_t nvme_pci_setup_prps(struct nvme_queue *nvmeq, 673 596 struct request *req, struct nvme_rw_command *cmnd) 674 597 { 675 598 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 676 - struct dma_pool *pool; 677 599 int length = blk_rq_payload_bytes(req); 678 600 struct scatterlist *sg = iod->sgt.sgl; 679 601 int dma_len = sg_dma_len(sg); ··· 680 604 int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); 681 605 __le64 *prp_list; 682 606 dma_addr_t prp_dma; 683 - int nprps, i; 607 + int i; 684 608 685 609 length -= (NVME_CTRL_PAGE_SIZE - offset); 686 610 if (length <= 0) { ··· 702 626 goto done; 703 627 } 704 628 705 - nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE); 706 - if (nprps <= (256 / 8)) { 707 - pool = dev->prp_small_pool; 708 - iod->nr_allocations = 0; 709 - } else { 710 - pool = dev->prp_page_pool; 711 - iod->nr_allocations = 1; 712 - } 629 + if (DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE) <= 630 + NVME_SMALL_POOL_SIZE / sizeof(__le64)) 631 + iod->flags |= IOD_SMALL_DESCRIPTOR; 713 632 714 - prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); 715 - if (!prp_list) { 716 - iod->nr_allocations = -1; 633 + prp_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC, 634 + &prp_dma); 635 + if (!prp_list) 717 636 return BLK_STS_RESOURCE; 718 - } 719 - iod->list[0].prp_list = prp_list; 637 + iod->descriptors[iod->nr_descriptors++] = prp_list; 720 638 iod->first_dma = prp_dma; 721 639 i = 0; 722 640 for (;;) { 723 641 if (i == NVME_CTRL_PAGE_SIZE >> 3) { 724 642 __le64 *old_prp_list = prp_list; 725 - prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); 643 + 644 + prp_list = dma_pool_alloc(nvmeq->descriptor_pools.large, 645 + GFP_ATOMIC, &prp_dma); 726 646 if (!prp_list) 727 647 goto free_prps; 728 - iod->list[iod->nr_allocations++].prp_list = prp_list; 648 + iod->descriptors[iod->nr_descriptors++] = prp_list; 729 649 prp_list[0] = old_prp_list[i - 1]; 730 650 old_prp_list[i - 1] = cpu_to_le64(prp_dma); 731 651 i = 1; ··· 745 673 cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); 746 674 return BLK_STS_OK; 747 675 free_prps: 748 - nvme_free_prps(dev, req); 676 + nvme_free_descriptors(nvmeq, req); 749 677 return BLK_STS_RESOURCE; 750 678 bad_sgl: 751 679 WARN(DO_ONCE(nvme_print_sgl, iod->sgt.sgl, iod->sgt.nents), ··· 770 698 sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; 771 699 } 772 700 773 - static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, 701 + static blk_status_t nvme_pci_setup_sgls(struct nvme_queue *nvmeq, 774 702 struct request *req, struct nvme_rw_command *cmd) 775 703 { 776 704 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 777 - struct dma_pool *pool; 778 705 struct nvme_sgl_desc *sg_list; 779 706 struct scatterlist *sg = iod->sgt.sgl; 780 707 unsigned int entries = iod->sgt.nents; ··· 788 717 return BLK_STS_OK; 789 718 } 790 719 791 - if (entries <= (256 / sizeof(struct nvme_sgl_desc))) { 792 - pool = dev->prp_small_pool; 793 - iod->nr_allocations = 0; 794 - } else { 795 - pool = dev->prp_page_pool; 796 - iod->nr_allocations = 1; 797 - } 720 + if (entries <= NVME_SMALL_POOL_SIZE / sizeof(*sg_list)) 721 + iod->flags |= IOD_SMALL_DESCRIPTOR; 798 722 799 - sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); 800 - if (!sg_list) { 801 - iod->nr_allocations = -1; 723 + sg_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC, 724 + &sgl_dma); 725 + if (!sg_list) 802 726 return BLK_STS_RESOURCE; 803 - } 804 - 805 - iod->list[0].sg_list = sg_list; 727 + iod->descriptors[iod->nr_descriptors++] = sg_list; 806 728 iod->first_dma = sgl_dma; 807 729 808 730 nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries); ··· 849 785 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, 850 786 struct nvme_command *cmnd) 851 787 { 788 + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 852 789 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 853 790 blk_status_t ret = BLK_STS_RESOURCE; 854 791 int rc; 855 792 856 793 if (blk_rq_nr_phys_segments(req) == 1) { 857 - struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 858 794 struct bio_vec bv = req_bvec(req); 859 795 860 796 if (!is_pci_p2pdma_page(bv.bv_page)) { ··· 889 825 } 890 826 891 827 if (nvme_pci_use_sgls(dev, req, iod->sgt.nents)) 892 - ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); 828 + ret = nvme_pci_setup_sgls(nvmeq, req, &cmnd->rw); 893 829 else 894 - ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); 830 + ret = nvme_pci_setup_prps(nvmeq, req, &cmnd->rw); 895 831 if (ret != BLK_STS_OK) 896 832 goto out_unmap_sg; 897 833 return BLK_STS_OK; ··· 906 842 static blk_status_t nvme_pci_setup_meta_sgls(struct nvme_dev *dev, 907 843 struct request *req) 908 844 { 845 + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; 909 846 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 910 847 struct nvme_rw_command *cmnd = &iod->cmd.rw; 911 848 struct nvme_sgl_desc *sg_list; ··· 930 865 if (rc) 931 866 goto out_free_sg; 932 867 933 - sg_list = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC, &sgl_dma); 868 + sg_list = dma_pool_alloc(nvmeq->descriptor_pools.small, GFP_ATOMIC, 869 + &sgl_dma); 934 870 if (!sg_list) 935 871 goto out_unmap_sg; 936 872 937 873 entries = iod->meta_sgt.nents; 938 - iod->meta_list.sg_list = sg_list; 874 + iod->meta_descriptor = sg_list; 939 875 iod->meta_dma = sgl_dma; 940 876 941 877 cmnd->flags = NVME_CMD_SGL_METASEG; ··· 978 912 979 913 static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req) 980 914 { 981 - if (nvme_pci_metadata_use_sgls(dev, req)) 915 + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 916 + 917 + if ((iod->cmd.common.flags & NVME_CMD_SGL_METABUF) && 918 + nvme_pci_metadata_use_sgls(dev, req)) 982 919 return nvme_pci_setup_meta_sgls(dev, req); 983 920 return nvme_pci_setup_meta_mptr(dev, req); 984 921 } ··· 991 922 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 992 923 blk_status_t ret; 993 924 994 - iod->aborted = false; 995 - iod->nr_allocations = -1; 925 + iod->flags = 0; 926 + iod->nr_descriptors = 0; 996 927 iod->sgt.nents = 0; 997 928 iod->meta_sgt.nents = 0; 998 929 ··· 1016 947 return BLK_STS_OK; 1017 948 out_unmap_data: 1018 949 if (blk_rq_nr_phys_segments(req)) 1019 - nvme_unmap_data(dev, req); 950 + nvme_unmap_data(dev, req->mq_hctx->driver_data, req); 1020 951 out_free_cmd: 1021 952 nvme_cleanup_cmd(req); 1022 953 return ret; ··· 1106 1037 } 1107 1038 1108 1039 static __always_inline void nvme_unmap_metadata(struct nvme_dev *dev, 1040 + struct nvme_queue *nvmeq, 1109 1041 struct request *req) 1110 1042 { 1111 1043 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); ··· 1118 1048 return; 1119 1049 } 1120 1050 1121 - dma_pool_free(dev->prp_small_pool, iod->meta_list.sg_list, 1122 - iod->meta_dma); 1051 + dma_pool_free(nvmeq->descriptor_pools.small, iod->meta_descriptor, 1052 + iod->meta_dma); 1123 1053 dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0); 1124 1054 mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool); 1125 1055 } ··· 1130 1060 struct nvme_dev *dev = nvmeq->dev; 1131 1061 1132 1062 if (blk_integrity_rq(req)) 1133 - nvme_unmap_metadata(dev, req); 1063 + nvme_unmap_metadata(dev, nvmeq, req); 1134 1064 1135 1065 if (blk_rq_nr_phys_segments(req)) 1136 - nvme_unmap_data(dev, req); 1066 + nvme_unmap_data(dev, nvmeq, req); 1137 1067 } 1138 1068 1139 1069 static void nvme_pci_complete_rq(struct request *req) ··· 1558 1488 * returned to the driver, or if this is the admin queue. 1559 1489 */ 1560 1490 opcode = nvme_req(req)->cmd->common.opcode; 1561 - if (!nvmeq->qid || iod->aborted) { 1491 + if (!nvmeq->qid || (iod->flags & IOD_ABORTED)) { 1562 1492 dev_warn(dev->ctrl.device, 1563 1493 "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n", 1564 1494 req->tag, nvme_cid(req), opcode, ··· 1571 1501 atomic_inc(&dev->ctrl.abort_limit); 1572 1502 return BLK_EH_RESET_TIMER; 1573 1503 } 1574 - iod->aborted = true; 1504 + iod->flags |= IOD_ABORTED; 1575 1505 1576 1506 cmd.abort.opcode = nvme_admin_abort_cmd; 1577 1507 cmd.abort.cid = nvme_cid(req); ··· 2910 2840 return 0; 2911 2841 } 2912 2842 2913 - static int nvme_setup_prp_pools(struct nvme_dev *dev) 2914 - { 2915 - size_t small_align = 256; 2916 - 2917 - dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, 2918 - NVME_CTRL_PAGE_SIZE, 2919 - NVME_CTRL_PAGE_SIZE, 0); 2920 - if (!dev->prp_page_pool) 2921 - return -ENOMEM; 2922 - 2923 - if (dev->ctrl.quirks & NVME_QUIRK_DMAPOOL_ALIGN_512) 2924 - small_align = 512; 2925 - 2926 - /* Optimisation for I/Os between 4k and 128k */ 2927 - dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev, 2928 - 256, small_align, 0); 2929 - if (!dev->prp_small_pool) { 2930 - dma_pool_destroy(dev->prp_page_pool); 2931 - return -ENOMEM; 2932 - } 2933 - return 0; 2934 - } 2935 - 2936 - static void nvme_release_prp_pools(struct nvme_dev *dev) 2937 - { 2938 - dma_pool_destroy(dev->prp_page_pool); 2939 - dma_pool_destroy(dev->prp_small_pool); 2940 - } 2941 - 2942 2843 static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) 2943 2844 { 2944 2845 size_t meta_size = sizeof(struct scatterlist) * (NVME_MAX_META_SEGS + 1); ··· 3224 3183 struct nvme_dev *dev; 3225 3184 int ret = -ENOMEM; 3226 3185 3227 - dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); 3186 + dev = kzalloc_node(struct_size(dev, descriptor_pools, nr_node_ids), 3187 + GFP_KERNEL, node); 3228 3188 if (!dev) 3229 3189 return ERR_PTR(-ENOMEM); 3230 3190 INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); ··· 3300 3258 if (result) 3301 3259 goto out_uninit_ctrl; 3302 3260 3303 - result = nvme_setup_prp_pools(dev); 3304 - if (result) 3305 - goto out_dev_unmap; 3306 - 3307 3261 result = nvme_pci_alloc_iod_mempool(dev); 3308 3262 if (result) 3309 - goto out_release_prp_pools; 3263 + goto out_dev_unmap; 3310 3264 3311 3265 dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); 3312 3266 ··· 3378 3340 out_release_iod_mempool: 3379 3341 mempool_destroy(dev->iod_mempool); 3380 3342 mempool_destroy(dev->iod_meta_mempool); 3381 - out_release_prp_pools: 3382 - nvme_release_prp_pools(dev); 3383 3343 out_dev_unmap: 3384 3344 nvme_dev_unmap(dev); 3385 3345 out_uninit_ctrl: ··· 3442 3406 nvme_free_queues(dev, 0); 3443 3407 mempool_destroy(dev->iod_mempool); 3444 3408 mempool_destroy(dev->iod_meta_mempool); 3445 - nvme_release_prp_pools(dev); 3409 + nvme_release_descriptor_pools(dev); 3446 3410 nvme_dev_unmap(dev); 3447 3411 nvme_uninit_ctrl(&dev->ctrl); 3448 3412 } ··· 3841 3805 BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); 3842 3806 BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); 3843 3807 BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); 3844 - BUILD_BUG_ON(NVME_MAX_SEGS > SGES_PER_PAGE); 3845 - BUILD_BUG_ON(sizeof(struct scatterlist) * NVME_MAX_SEGS > PAGE_SIZE); 3846 - BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_ALLOCATIONS); 3808 + BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_DESCRIPTORS); 3847 3809 3848 3810 return pci_register_driver(&nvme_driver); 3849 3811 }
+7
drivers/nvme/host/sysfs.c
··· 260 260 &dev_attr_ana_state.attr, 261 261 &dev_attr_queue_depth.attr, 262 262 &dev_attr_numa_nodes.attr, 263 + &dev_attr_delayed_removal_secs.attr, 263 264 #endif 264 265 &dev_attr_io_passthru_err_log_enabled.attr, 265 266 NULL, ··· 295 294 } 296 295 if (a == &dev_attr_queue_depth.attr || a == &dev_attr_numa_nodes.attr) { 297 296 if (nvme_disk_is_ns_head(dev_to_disk(dev))) 297 + return 0; 298 + } 299 + if (a == &dev_attr_delayed_removal_secs.attr) { 300 + struct gendisk *disk = dev_to_disk(dev); 301 + 302 + if (!nvme_disk_is_ns_head(disk)) 298 303 return 0; 299 304 } 300 305 #endif
+8 -6
drivers/nvme/host/tcp.c
··· 403 403 } 404 404 405 405 static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, 406 - bool sync, bool last) 406 + bool last) 407 407 { 408 408 struct nvme_tcp_queue *queue = req->queue; 409 409 bool empty; ··· 417 417 * are on the same cpu, so we don't introduce contention. 418 418 */ 419 419 if (queue->io_cpu == raw_smp_processor_id() && 420 - sync && empty && mutex_trylock(&queue->send_mutex)) { 420 + empty && mutex_trylock(&queue->send_mutex)) { 421 421 nvme_tcp_send_all(queue); 422 422 mutex_unlock(&queue->send_mutex); 423 423 } ··· 770 770 req->ttag = pdu->ttag; 771 771 772 772 nvme_tcp_setup_h2c_data_pdu(req); 773 - nvme_tcp_queue_request(req, false, true); 773 + 774 + llist_add(&req->lentry, &queue->req_list); 775 + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); 774 776 775 777 return 0; 776 778 } ··· 2387 2385 if (ret) 2388 2386 return ret; 2389 2387 2390 - if (ctrl->opts && ctrl->opts->concat && !ctrl->tls_pskid) { 2388 + if (ctrl->opts->concat && !ctrl->tls_pskid) { 2391 2389 /* See comments for nvme_tcp_key_revoke_needed() */ 2392 2390 dev_dbg(ctrl->device, "restart admin queue for secure concatenation\n"); 2393 2391 nvme_stop_keep_alive(ctrl); ··· 2639 2637 ctrl->async_req.curr_bio = NULL; 2640 2638 ctrl->async_req.data_len = 0; 2641 2639 2642 - nvme_tcp_queue_request(&ctrl->async_req, true, true); 2640 + nvme_tcp_queue_request(&ctrl->async_req, true); 2643 2641 } 2644 2642 2645 2643 static void nvme_tcp_complete_timed_out(struct request *rq) ··· 2791 2789 2792 2790 nvme_start_request(rq); 2793 2791 2794 - nvme_tcp_queue_request(req, true, bd->last); 2792 + nvme_tcp_queue_request(req, bd->last); 2795 2793 2796 2794 return BLK_STS_OK; 2797 2795 }
+11 -20
drivers/nvme/target/admin-cmd.c
··· 63 63 if (status != NVME_SC_SUCCESS) 64 64 goto complete; 65 65 66 - /* 67 - * Note: The NVMe specification allows multiple SQs to use the same CQ. 68 - * However, the target code does not really support that. So for now, 69 - * prevent this and fail the command if sqid and cqid are different. 70 - */ 71 - if (!cqid || cqid != sqid) { 72 - pr_err("SQ %u: Unsupported CQID %u\n", sqid, cqid); 73 - status = NVME_SC_CQ_INVALID | NVME_STATUS_DNR; 66 + status = nvmet_check_io_cqid(ctrl, cqid, false); 67 + if (status != NVME_SC_SUCCESS) { 68 + pr_err("SQ %u: Invalid CQID %u\n", sqid, cqid); 74 69 goto complete; 75 70 } 76 71 ··· 74 79 goto complete; 75 80 } 76 81 77 - status = ctrl->ops->create_sq(ctrl, sqid, sq_flags, qsize, prp1); 82 + status = ctrl->ops->create_sq(ctrl, sqid, cqid, sq_flags, qsize, prp1); 78 83 79 84 complete: 80 85 nvmet_req_complete(req, status); ··· 91 96 goto complete; 92 97 } 93 98 94 - if (!cqid) { 99 + status = nvmet_check_io_cqid(ctrl, cqid, false); 100 + if (status != NVME_SC_SUCCESS) 101 + goto complete; 102 + 103 + if (!ctrl->cqs[cqid] || nvmet_cq_in_use(ctrl->cqs[cqid])) { 104 + /* Some SQs are still using this CQ */ 95 105 status = NVME_SC_QID_INVALID | NVME_STATUS_DNR; 96 106 goto complete; 97 107 } 98 - 99 - status = nvmet_check_cqid(ctrl, cqid); 100 - if (status != NVME_SC_SUCCESS) 101 - goto complete; 102 108 103 109 status = ctrl->ops->delete_cq(ctrl, cqid); 104 110 ··· 123 127 goto complete; 124 128 } 125 129 126 - if (!cqid) { 127 - status = NVME_SC_QID_INVALID | NVME_STATUS_DNR; 128 - goto complete; 129 - } 130 - 131 - status = nvmet_check_cqid(ctrl, cqid); 130 + status = nvmet_check_io_cqid(ctrl, cqid, true); 132 131 if (status != NVME_SC_SUCCESS) 133 132 goto complete; 134 133
+8 -13
drivers/nvme/target/auth.c
··· 280 280 281 281 bool nvmet_check_auth_status(struct nvmet_req *req) 282 282 { 283 - if (req->sq->ctrl->host_key && 284 - !req->sq->authenticated) 285 - return false; 283 + if (req->sq->ctrl->host_key) { 284 + if (req->sq->qid > 0) 285 + return true; 286 + if (!req->sq->authenticated) 287 + return false; 288 + } 286 289 return true; 287 290 } 288 291 ··· 293 290 unsigned int shash_len) 294 291 { 295 292 struct crypto_shash *shash_tfm; 296 - struct shash_desc *shash; 293 + SHASH_DESC_ON_STACK(shash, shash_tfm); 297 294 struct nvmet_ctrl *ctrl = req->sq->ctrl; 298 295 const char *hash_name; 299 296 u8 *challenge = req->sq->dhchap_c1; ··· 345 342 req->sq->dhchap_c1, 346 343 challenge, shash_len); 347 344 if (ret) 348 - goto out_free_challenge; 345 + goto out; 349 346 } 350 347 351 348 pr_debug("ctrl %d qid %d host response seq %u transaction %d\n", 352 349 ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1, 353 350 req->sq->dhchap_tid); 354 351 355 - shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(shash_tfm), 356 - GFP_KERNEL); 357 - if (!shash) { 358 - ret = -ENOMEM; 359 - goto out_free_challenge; 360 - } 361 352 shash->tfm = shash_tfm; 362 353 ret = crypto_shash_init(shash); 363 354 if (ret) ··· 386 389 goto out; 387 390 ret = crypto_shash_final(shash, response); 388 391 out: 389 - kfree(shash); 390 - out_free_challenge: 391 392 if (challenge != req->sq->dhchap_c1) 392 393 kfree(challenge); 393 394 out_free_response:
+75 -19
drivers/nvme/target/core.c
··· 813 813 } 814 814 EXPORT_SYMBOL_GPL(nvmet_req_complete); 815 815 816 + void nvmet_cq_init(struct nvmet_cq *cq) 817 + { 818 + refcount_set(&cq->ref, 1); 819 + } 820 + EXPORT_SYMBOL_GPL(nvmet_cq_init); 821 + 822 + bool nvmet_cq_get(struct nvmet_cq *cq) 823 + { 824 + return refcount_inc_not_zero(&cq->ref); 825 + } 826 + EXPORT_SYMBOL_GPL(nvmet_cq_get); 827 + 828 + void nvmet_cq_put(struct nvmet_cq *cq) 829 + { 830 + if (refcount_dec_and_test(&cq->ref)) 831 + nvmet_cq_destroy(cq); 832 + } 833 + EXPORT_SYMBOL_GPL(nvmet_cq_put); 834 + 816 835 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 817 836 u16 qid, u16 size) 818 837 { 819 838 cq->qid = qid; 820 839 cq->size = size; 840 + 841 + ctrl->cqs[qid] = cq; 842 + } 843 + 844 + void nvmet_cq_destroy(struct nvmet_cq *cq) 845 + { 846 + struct nvmet_ctrl *ctrl = cq->ctrl; 847 + 848 + if (ctrl) { 849 + ctrl->cqs[cq->qid] = NULL; 850 + nvmet_ctrl_put(cq->ctrl); 851 + cq->ctrl = NULL; 852 + } 821 853 } 822 854 823 855 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, ··· 869 837 complete(&sq->confirm_done); 870 838 } 871 839 872 - u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid) 840 + u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 873 841 { 874 - if (!ctrl->sqs) 842 + if (!ctrl->cqs) 875 843 return NVME_SC_INTERNAL | NVME_STATUS_DNR; 876 844 877 845 if (cqid > ctrl->subsys->max_qid) 878 846 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 879 847 880 - /* 881 - * Note: For PCI controllers, the NVMe specifications allows multiple 882 - * SQs to share a single CQ. However, we do not support this yet, so 883 - * check that there is no SQ defined for a CQ. If one exist, then the 884 - * CQ ID is invalid for creation as well as when the CQ is being 885 - * deleted (as that would mean that the SQ was not deleted before the 886 - * CQ). 887 - */ 888 - if (ctrl->sqs[cqid]) 848 + if ((create && ctrl->cqs[cqid]) || (!create && !ctrl->cqs[cqid])) 889 849 return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 890 850 891 851 return NVME_SC_SUCCESS; 892 852 } 853 + 854 + u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create) 855 + { 856 + if (!cqid) 857 + return NVME_SC_QID_INVALID | NVME_STATUS_DNR; 858 + return nvmet_check_cqid(ctrl, cqid, create); 859 + } 860 + 861 + bool nvmet_cq_in_use(struct nvmet_cq *cq) 862 + { 863 + return refcount_read(&cq->ref) > 1; 864 + } 865 + EXPORT_SYMBOL_GPL(nvmet_cq_in_use); 893 866 894 867 u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 895 868 u16 qid, u16 size) 896 869 { 897 870 u16 status; 898 871 899 - status = nvmet_check_cqid(ctrl, qid); 872 + status = nvmet_check_cqid(ctrl, qid, true); 900 873 if (status != NVME_SC_SUCCESS) 901 874 return status; 902 875 876 + if (!kref_get_unless_zero(&ctrl->ref)) 877 + return NVME_SC_INTERNAL | NVME_STATUS_DNR; 878 + cq->ctrl = ctrl; 879 + 880 + nvmet_cq_init(cq); 903 881 nvmet_cq_setup(ctrl, cq, qid, size); 904 882 905 883 return NVME_SC_SUCCESS; ··· 933 891 } 934 892 935 893 u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 936 - u16 sqid, u16 size) 894 + struct nvmet_cq *cq, u16 sqid, u16 size) 937 895 { 938 896 u16 status; 939 897 int ret; ··· 945 903 if (status != NVME_SC_SUCCESS) 946 904 return status; 947 905 948 - ret = nvmet_sq_init(sq); 906 + ret = nvmet_sq_init(sq, cq); 949 907 if (ret) { 950 908 status = NVME_SC_INTERNAL | NVME_STATUS_DNR; 951 909 goto ctrl_put; ··· 977 935 wait_for_completion(&sq->free_done); 978 936 percpu_ref_exit(&sq->ref); 979 937 nvmet_auth_sq_free(sq); 938 + nvmet_cq_put(sq->cq); 980 939 981 940 /* 982 941 * we must reference the ctrl again after waiting for inflight IO ··· 1010 967 complete(&sq->free_done); 1011 968 } 1012 969 1013 - int nvmet_sq_init(struct nvmet_sq *sq) 970 + int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq) 1014 971 { 1015 972 int ret; 973 + 974 + if (!nvmet_cq_get(cq)) 975 + return -EINVAL; 1016 976 1017 977 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 1018 978 if (ret) { 1019 979 pr_err("percpu_ref init failed!\n"); 980 + nvmet_cq_put(cq); 1020 981 return ret; 1021 982 } 1022 983 init_completion(&sq->free_done); 1023 984 init_completion(&sq->confirm_done); 1024 985 nvmet_auth_sq_init(sq); 986 + sq->cq = cq; 1025 987 1026 988 return 0; 1027 989 } ··· 1156 1108 return ret; 1157 1109 } 1158 1110 1159 - bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 1160 - struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) 1111 + bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq, 1112 + const struct nvmet_fabrics_ops *ops) 1161 1113 { 1162 1114 u8 flags = req->cmd->common.flags; 1163 1115 u16 status; 1164 1116 1165 - req->cq = cq; 1117 + req->cq = sq->cq; 1166 1118 req->sq = sq; 1167 1119 req->ops = ops; 1168 1120 req->sg = NULL; ··· 1660 1612 if (!ctrl->sqs) 1661 1613 goto out_free_changed_ns_list; 1662 1614 1615 + ctrl->cqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_cq *), 1616 + GFP_KERNEL); 1617 + if (!ctrl->cqs) 1618 + goto out_free_sqs; 1619 + 1663 1620 ret = ida_alloc_range(&cntlid_ida, 1664 1621 subsys->cntlid_min, subsys->cntlid_max, 1665 1622 GFP_KERNEL); 1666 1623 if (ret < 0) { 1667 1624 args->status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; 1668 - goto out_free_sqs; 1625 + goto out_free_cqs; 1669 1626 } 1670 1627 ctrl->cntlid = ret; 1671 1628 ··· 1729 1676 mutex_unlock(&subsys->lock); 1730 1677 nvmet_stop_keep_alive_timer(ctrl); 1731 1678 ida_free(&cntlid_ida, ctrl->cntlid); 1679 + out_free_cqs: 1680 + kfree(ctrl->cqs); 1732 1681 out_free_sqs: 1733 1682 kfree(ctrl->sqs); 1734 1683 out_free_changed_ns_list: ··· 1767 1712 1768 1713 nvmet_async_events_free(ctrl); 1769 1714 kfree(ctrl->sqs); 1715 + kfree(ctrl->cqs); 1770 1716 kfree(ctrl->changed_ns_list); 1771 1717 kfree(ctrl); 1772 1718
+1 -1
drivers/nvme/target/discovery.c
··· 119 119 memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); 120 120 memcpy(e->traddr, traddr, NVMF_TRADDR_SIZE); 121 121 memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE); 122 - strncpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE); 122 + strscpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE); 123 123 } 124 124 125 125 /*
+10 -2
drivers/nvme/target/fabrics-cmd.c
··· 208 208 return NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; 209 209 } 210 210 211 + kref_get(&ctrl->ref); 212 + old = cmpxchg(&req->cq->ctrl, NULL, ctrl); 213 + if (old) { 214 + pr_warn("queue already connected!\n"); 215 + req->error_loc = offsetof(struct nvmf_connect_command, opcode); 216 + return NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR; 217 + } 218 + 211 219 /* note: convert queue size from 0's-based value to 1's-based value */ 212 220 nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1); 213 221 nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1); ··· 247 239 bool needs_auth = nvmet_has_auth(ctrl, sq); 248 240 key_serial_t keyid = nvmet_queue_tls_keyid(sq); 249 241 250 - /* Do not authenticate I/O queues for secure concatenation */ 251 - if (ctrl->concat && sq->qid) 242 + /* Do not authenticate I/O queues */ 243 + if (sq->qid) 252 244 needs_auth = false; 253 245 254 246 if (keyid)
+79 -17
drivers/nvme/target/fc.c
··· 816 816 817 817 nvmet_fc_prep_fcp_iodlist(assoc->tgtport, queue); 818 818 819 - ret = nvmet_sq_init(&queue->nvme_sq); 819 + nvmet_cq_init(&queue->nvme_cq); 820 + ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq); 820 821 if (ret) 821 822 goto out_fail_iodlist; 822 823 ··· 827 826 return queue; 828 827 829 828 out_fail_iodlist: 829 + nvmet_cq_put(&queue->nvme_cq); 830 830 nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); 831 831 destroy_workqueue(queue->work_q); 832 832 out_free_queue: ··· 936 934 flush_workqueue(queue->work_q); 937 935 938 936 nvmet_sq_destroy(&queue->nvme_sq); 937 + nvmet_cq_put(&queue->nvme_cq); 939 938 940 939 nvmet_fc_tgt_q_put(queue); 941 940 } ··· 1257 1254 { 1258 1255 lockdep_assert_held(&nvmet_fc_tgtlock); 1259 1256 1257 + nvmet_fc_tgtport_get(tgtport); 1260 1258 pe->tgtport = tgtport; 1261 1259 tgtport->pe = pe; 1262 1260 ··· 1277 1273 unsigned long flags; 1278 1274 1279 1275 spin_lock_irqsave(&nvmet_fc_tgtlock, flags); 1280 - if (pe->tgtport) 1276 + if (pe->tgtport) { 1277 + nvmet_fc_tgtport_put(pe->tgtport); 1281 1278 pe->tgtport->pe = NULL; 1279 + } 1282 1280 list_del(&pe->pe_list); 1283 1281 spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); 1284 1282 } ··· 1298 1292 1299 1293 spin_lock_irqsave(&nvmet_fc_tgtlock, flags); 1300 1294 pe = tgtport->pe; 1301 - if (pe) 1295 + if (pe) { 1296 + nvmet_fc_tgtport_put(pe->tgtport); 1302 1297 pe->tgtport = NULL; 1298 + } 1303 1299 tgtport->pe = NULL; 1304 1300 spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); 1305 1301 } ··· 1324 1316 list_for_each_entry(pe, &nvmet_fc_portentry_list, pe_list) { 1325 1317 if (tgtport->fc_target_port.node_name == pe->node_name && 1326 1318 tgtport->fc_target_port.port_name == pe->port_name) { 1319 + if (!nvmet_fc_tgtport_get(tgtport)) 1320 + continue; 1321 + 1327 1322 WARN_ON(pe->tgtport); 1328 1323 tgtport->pe = pe; 1329 1324 pe->tgtport = tgtport; ··· 1591 1580 spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); 1592 1581 } 1593 1582 1583 + static void 1584 + nvmet_fc_free_pending_reqs(struct nvmet_fc_tgtport *tgtport) 1585 + { 1586 + struct nvmet_fc_ls_req_op *lsop; 1587 + struct nvmefc_ls_req *lsreq; 1588 + struct nvmet_fc_ls_iod *iod; 1589 + int i; 1590 + 1591 + iod = tgtport->iod; 1592 + for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) 1593 + cancel_work(&iod->work); 1594 + 1595 + /* 1596 + * After this point the connection is lost and thus any pending 1597 + * request can't be processed by the normal completion path. This 1598 + * is likely a request from nvmet_fc_send_ls_req_async. 1599 + */ 1600 + while ((lsop = list_first_entry_or_null(&tgtport->ls_req_list, 1601 + struct nvmet_fc_ls_req_op, lsreq_list))) { 1602 + list_del(&lsop->lsreq_list); 1603 + 1604 + if (!lsop->req_queued) 1605 + continue; 1606 + 1607 + lsreq = &lsop->ls_req; 1608 + fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma, 1609 + (lsreq->rqstlen + lsreq->rsplen), 1610 + DMA_BIDIRECTIONAL); 1611 + nvmet_fc_tgtport_put(tgtport); 1612 + kfree(lsop); 1613 + } 1614 + } 1615 + 1594 1616 /** 1595 1617 * nvmet_fc_unregister_targetport - transport entry point called by an 1596 1618 * LLDD to deregister/remove a previously ··· 1652 1608 1653 1609 flush_workqueue(nvmet_wq); 1654 1610 1655 - /* 1656 - * should terminate LS's as well. However, LS's will be generated 1657 - * at the tail end of association termination, so they likely don't 1658 - * exist yet. And even if they did, it's worthwhile to just let 1659 - * them finish and targetport ref counting will clean things up. 1660 - */ 1661 - 1611 + nvmet_fc_free_pending_reqs(tgtport); 1662 1612 nvmet_fc_tgtport_put(tgtport); 1663 1613 1664 1614 return 0; ··· 2569 2531 fod->data_sg = NULL; 2570 2532 fod->data_sg_cnt = 0; 2571 2533 2572 - ret = nvmet_req_init(&fod->req, 2573 - &fod->queue->nvme_cq, 2574 - &fod->queue->nvme_sq, 2575 - &nvmet_fc_tgt_fcp_ops); 2534 + ret = nvmet_req_init(&fod->req, &fod->queue->nvme_sq, 2535 + &nvmet_fc_tgt_fcp_ops); 2576 2536 if (!ret) { 2577 2537 /* bad SQE content or invalid ctrl state */ 2578 2538 /* nvmet layer has already called op done to send rsp. */ ··· 2896 2860 list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) { 2897 2861 if ((tgtport->fc_target_port.node_name == traddr.nn) && 2898 2862 (tgtport->fc_target_port.port_name == traddr.pn)) { 2863 + if (!nvmet_fc_tgtport_get(tgtport)) 2864 + continue; 2865 + 2899 2866 /* a FC port can only be 1 nvmet port id */ 2900 2867 if (!tgtport->pe) { 2901 2868 nvmet_fc_portentry_bind(tgtport, pe, port); 2902 2869 ret = 0; 2903 2870 } else 2904 2871 ret = -EALREADY; 2872 + 2873 + nvmet_fc_tgtport_put(tgtport); 2905 2874 break; 2906 2875 } 2907 2876 } ··· 2922 2881 nvmet_fc_remove_port(struct nvmet_port *port) 2923 2882 { 2924 2883 struct nvmet_fc_port_entry *pe = port->priv; 2884 + struct nvmet_fc_tgtport *tgtport = NULL; 2885 + unsigned long flags; 2886 + 2887 + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); 2888 + if (pe->tgtport && nvmet_fc_tgtport_get(pe->tgtport)) 2889 + tgtport = pe->tgtport; 2890 + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); 2925 2891 2926 2892 nvmet_fc_portentry_unbind(pe); 2927 2893 2928 - /* terminate any outstanding associations */ 2929 - __nvmet_fc_free_assocs(pe->tgtport); 2894 + if (tgtport) { 2895 + /* terminate any outstanding associations */ 2896 + __nvmet_fc_free_assocs(tgtport); 2897 + nvmet_fc_tgtport_put(tgtport); 2898 + } 2930 2899 2931 2900 kfree(pe); 2932 2901 } ··· 2945 2894 nvmet_fc_discovery_chg(struct nvmet_port *port) 2946 2895 { 2947 2896 struct nvmet_fc_port_entry *pe = port->priv; 2948 - struct nvmet_fc_tgtport *tgtport = pe->tgtport; 2897 + struct nvmet_fc_tgtport *tgtport = NULL; 2898 + unsigned long flags; 2899 + 2900 + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); 2901 + if (pe->tgtport && nvmet_fc_tgtport_get(pe->tgtport)) 2902 + tgtport = pe->tgtport; 2903 + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); 2904 + 2905 + if (!tgtport) 2906 + return; 2949 2907 2950 2908 if (tgtport && tgtport->ops->discovery_event) 2951 2909 tgtport->ops->discovery_event(&tgtport->fc_target_port); 2910 + 2911 + nvmet_fc_tgtport_put(tgtport); 2952 2912 } 2953 2913 2954 2914 static ssize_t
+287 -164
drivers/nvme/target/fcloop.c
··· 207 207 struct fcloop_lport { 208 208 struct nvme_fc_local_port *localport; 209 209 struct list_head lport_list; 210 - struct completion unreg_done; 211 210 refcount_t ref; 212 211 }; 213 212 214 213 struct fcloop_lport_priv { 215 214 struct fcloop_lport *lport; 216 215 }; 216 + 217 + /* The port is already being removed, avoid double free */ 218 + #define PORT_DELETED 0 217 219 218 220 struct fcloop_rport { 219 221 struct nvme_fc_remote_port *remoteport; ··· 225 223 spinlock_t lock; 226 224 struct list_head ls_list; 227 225 struct work_struct ls_work; 226 + unsigned long flags; 228 227 }; 229 228 230 229 struct fcloop_tport { ··· 236 233 spinlock_t lock; 237 234 struct list_head ls_list; 238 235 struct work_struct ls_work; 236 + unsigned long flags; 239 237 }; 240 238 241 239 struct fcloop_nport { ··· 292 288 spinlock_t inilock; 293 289 }; 294 290 291 + /* SLAB cache for fcloop_lsreq structures */ 292 + static struct kmem_cache *lsreq_cache; 293 + 295 294 static inline struct fcloop_lsreq * 296 295 ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp) 297 296 { ··· 345 338 * callee may free memory containing tls_req. 346 339 * do not reference lsreq after this. 347 340 */ 341 + kmem_cache_free(lsreq_cache, tls_req); 348 342 349 343 spin_lock(&rport->lock); 350 344 } ··· 357 349 struct nvme_fc_remote_port *remoteport, 358 350 struct nvmefc_ls_req *lsreq) 359 351 { 360 - struct fcloop_lsreq *tls_req = lsreq->private; 361 352 struct fcloop_rport *rport = remoteport->private; 353 + struct fcloop_lsreq *tls_req; 362 354 int ret = 0; 363 355 356 + tls_req = kmem_cache_alloc(lsreq_cache, GFP_KERNEL); 357 + if (!tls_req) 358 + return -ENOMEM; 364 359 tls_req->lsreq = lsreq; 365 360 INIT_LIST_HEAD(&tls_req->ls_list); 366 361 ··· 400 389 401 390 lsrsp->done(lsrsp); 402 391 403 - if (remoteport) { 404 - rport = remoteport->private; 405 - spin_lock(&rport->lock); 406 - list_add_tail(&tls_req->ls_list, &rport->ls_list); 407 - spin_unlock(&rport->lock); 408 - queue_work(nvmet_wq, &rport->ls_work); 392 + if (!remoteport) { 393 + kmem_cache_free(lsreq_cache, tls_req); 394 + return 0; 409 395 } 396 + 397 + rport = remoteport->private; 398 + spin_lock(&rport->lock); 399 + list_add_tail(&tls_req->ls_list, &rport->ls_list); 400 + spin_unlock(&rport->lock); 401 + queue_work(nvmet_wq, &rport->ls_work); 410 402 411 403 return 0; 412 404 } ··· 436 422 * callee may free memory containing tls_req. 437 423 * do not reference lsreq after this. 438 424 */ 425 + kmem_cache_free(lsreq_cache, tls_req); 439 426 440 427 spin_lock(&tport->lock); 441 428 } ··· 447 432 fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, 448 433 struct nvmefc_ls_req *lsreq) 449 434 { 450 - struct fcloop_lsreq *tls_req = lsreq->private; 451 435 struct fcloop_tport *tport = targetport->private; 436 + struct fcloop_lsreq *tls_req; 452 437 int ret = 0; 453 438 454 439 /* ··· 456 441 * hosthandle ignored as fcloop currently is 457 442 * 1:1 tgtport vs remoteport 458 443 */ 444 + 445 + tls_req = kmem_cache_alloc(lsreq_cache, GFP_KERNEL); 446 + if (!tls_req) 447 + return -ENOMEM; 459 448 tls_req->lsreq = lsreq; 460 449 INIT_LIST_HEAD(&tls_req->ls_list); 461 450 ··· 476 457 ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp, 477 458 lsreq->rqstaddr, lsreq->rqstlen); 478 459 460 + if (ret) 461 + kmem_cache_free(lsreq_cache, tls_req); 462 + 479 463 return ret; 480 464 } 481 465 ··· 493 471 struct nvmet_fc_target_port *targetport = rport->targetport; 494 472 struct fcloop_tport *tport; 495 473 474 + if (!targetport) { 475 + /* 476 + * The target port is gone. The target doesn't expect any 477 + * response anymore and the ->done call is not valid 478 + * because the resources have been freed by 479 + * nvmet_fc_free_pending_reqs. 480 + * 481 + * We end up here from delete association exchange: 482 + * nvmet_fc_xmt_disconnect_assoc sends an async request. 483 + */ 484 + kmem_cache_free(lsreq_cache, tls_req); 485 + return 0; 486 + } 487 + 496 488 memcpy(lsreq->rspaddr, lsrsp->rspbuf, 497 489 ((lsreq->rsplen < lsrsp->rsplen) ? 498 490 lsreq->rsplen : lsrsp->rsplen)); 499 491 lsrsp->done(lsrsp); 500 492 501 - if (targetport) { 502 - tport = targetport->private; 503 - spin_lock(&tport->lock); 504 - list_add_tail(&tls_req->ls_list, &tport->ls_list); 505 - spin_unlock(&tport->lock); 506 - queue_work(nvmet_wq, &tport->ls_work); 507 - } 493 + tport = targetport->private; 494 + spin_lock(&tport->lock); 495 + list_add_tail(&tls_req->ls_list, &tport->ls_list); 496 + spin_unlock(&tport->lock); 497 + queue_work(nvmet_wq, &tport->ls_work); 508 498 509 499 return 0; 510 500 } ··· 600 566 } 601 567 602 568 /* release original io reference on tgt struct */ 603 - fcloop_tfcp_req_put(tfcp_req); 569 + if (tfcp_req) 570 + fcloop_tfcp_req_put(tfcp_req); 604 571 } 605 572 606 573 static bool drop_fabric_opcode; ··· 653 618 { 654 619 struct fcloop_fcpreq *tfcp_req = 655 620 container_of(work, struct fcloop_fcpreq, fcp_rcv_work); 656 - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; 621 + struct nvmefc_fcp_req *fcpreq; 657 622 unsigned long flags; 658 623 int ret = 0; 659 624 bool aborted = false; 660 625 661 626 spin_lock_irqsave(&tfcp_req->reqlock, flags); 627 + fcpreq = tfcp_req->fcpreq; 662 628 switch (tfcp_req->inistate) { 663 629 case INI_IO_START: 664 630 tfcp_req->inistate = INI_IO_ACTIVE; ··· 674 638 } 675 639 spin_unlock_irqrestore(&tfcp_req->reqlock, flags); 676 640 677 - if (unlikely(aborted)) 678 - ret = -ECANCELED; 679 - else { 680 - if (likely(!check_for_drop(tfcp_req))) 681 - ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, 682 - &tfcp_req->tgt_fcp_req, 683 - fcpreq->cmdaddr, fcpreq->cmdlen); 684 - else 685 - pr_info("%s: dropped command ********\n", __func__); 641 + if (unlikely(aborted)) { 642 + /* the abort handler will call fcloop_call_host_done */ 643 + return; 686 644 } 645 + 646 + if (unlikely(check_for_drop(tfcp_req))) { 647 + pr_info("%s: dropped command ********\n", __func__); 648 + return; 649 + } 650 + 651 + ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, 652 + &tfcp_req->tgt_fcp_req, 653 + fcpreq->cmdaddr, fcpreq->cmdlen); 687 654 if (ret) 688 655 fcloop_call_host_done(fcpreq, tfcp_req, ret); 689 656 } ··· 701 662 unsigned long flags; 702 663 703 664 spin_lock_irqsave(&tfcp_req->reqlock, flags); 704 - fcpreq = tfcp_req->fcpreq; 705 665 switch (tfcp_req->inistate) { 706 666 case INI_IO_ABORTED: 667 + fcpreq = tfcp_req->fcpreq; 668 + tfcp_req->fcpreq = NULL; 707 669 break; 708 670 case INI_IO_COMPLETED: 709 671 completed = true; 710 672 break; 711 673 default: 712 674 spin_unlock_irqrestore(&tfcp_req->reqlock, flags); 675 + fcloop_tfcp_req_put(tfcp_req); 713 676 WARN_ON(1); 714 677 return; 715 678 } ··· 726 685 if (tfcp_req->tport->targetport) 727 686 nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, 728 687 &tfcp_req->tgt_fcp_req); 729 - 730 - spin_lock_irqsave(&tfcp_req->reqlock, flags); 731 - tfcp_req->fcpreq = NULL; 732 - spin_unlock_irqrestore(&tfcp_req->reqlock, flags); 733 688 734 689 fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); 735 690 /* call_host_done releases reference for abort downcall */ ··· 995 958 996 959 spin_lock(&inireq->inilock); 997 960 tfcp_req = inireq->tfcp_req; 998 - if (tfcp_req) 999 - fcloop_tfcp_req_get(tfcp_req); 961 + if (tfcp_req) { 962 + if (!fcloop_tfcp_req_get(tfcp_req)) 963 + tfcp_req = NULL; 964 + } 1000 965 spin_unlock(&inireq->inilock); 1001 966 1002 - if (!tfcp_req) 967 + if (!tfcp_req) { 1003 968 /* abort has already been called */ 1004 - return; 969 + goto out_host_done; 970 + } 1005 971 1006 972 /* break initiator/target relationship for io */ 1007 973 spin_lock_irqsave(&tfcp_req->reqlock, flags); ··· 1019 979 default: 1020 980 spin_unlock_irqrestore(&tfcp_req->reqlock, flags); 1021 981 WARN_ON(1); 1022 - return; 982 + goto out_host_done; 1023 983 } 1024 984 spin_unlock_irqrestore(&tfcp_req->reqlock, flags); 1025 985 ··· 1033 993 */ 1034 994 fcloop_tfcp_req_put(tfcp_req); 1035 995 } 996 + 997 + return; 998 + 999 + out_host_done: 1000 + fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); 1036 1001 } 1037 1002 1038 1003 static void ··· 1064 1019 static void 1065 1020 fcloop_nport_put(struct fcloop_nport *nport) 1066 1021 { 1022 + unsigned long flags; 1023 + 1067 1024 if (!refcount_dec_and_test(&nport->ref)) 1068 1025 return; 1026 + 1027 + spin_lock_irqsave(&fcloop_lock, flags); 1028 + list_del(&nport->nport_list); 1029 + spin_unlock_irqrestore(&fcloop_lock, flags); 1030 + 1031 + if (nport->lport) 1032 + fcloop_lport_put(nport->lport); 1069 1033 1070 1034 kfree(nport); 1071 1035 } ··· 1091 1037 struct fcloop_lport_priv *lport_priv = localport->private; 1092 1038 struct fcloop_lport *lport = lport_priv->lport; 1093 1039 1094 - /* release any threads waiting for the unreg to complete */ 1095 - complete(&lport->unreg_done); 1096 - 1097 1040 fcloop_lport_put(lport); 1098 1041 } 1099 1042 ··· 1098 1047 fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport) 1099 1048 { 1100 1049 struct fcloop_rport *rport = remoteport->private; 1050 + bool put_port = false; 1051 + unsigned long flags; 1101 1052 1102 1053 flush_work(&rport->ls_work); 1103 - fcloop_nport_put(rport->nport); 1054 + 1055 + spin_lock_irqsave(&fcloop_lock, flags); 1056 + if (!test_and_set_bit(PORT_DELETED, &rport->flags)) 1057 + put_port = true; 1058 + rport->nport->rport = NULL; 1059 + spin_unlock_irqrestore(&fcloop_lock, flags); 1060 + 1061 + if (put_port) 1062 + fcloop_nport_put(rport->nport); 1104 1063 } 1105 1064 1106 1065 static void 1107 1066 fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) 1108 1067 { 1109 1068 struct fcloop_tport *tport = targetport->private; 1069 + bool put_port = false; 1070 + unsigned long flags; 1110 1071 1111 1072 flush_work(&tport->ls_work); 1112 - fcloop_nport_put(tport->nport); 1073 + 1074 + spin_lock_irqsave(&fcloop_lock, flags); 1075 + if (!test_and_set_bit(PORT_DELETED, &tport->flags)) 1076 + put_port = true; 1077 + tport->nport->tport = NULL; 1078 + spin_unlock_irqrestore(&fcloop_lock, flags); 1079 + 1080 + if (put_port) 1081 + fcloop_nport_put(tport->nport); 1113 1082 } 1114 1083 1115 1084 #define FCLOOP_HW_QUEUES 4 ··· 1153 1082 /* sizes of additional private data for data structures */ 1154 1083 .local_priv_sz = sizeof(struct fcloop_lport_priv), 1155 1084 .remote_priv_sz = sizeof(struct fcloop_rport), 1156 - .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), 1157 1085 .fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq), 1158 1086 }; 1159 1087 ··· 1175 1105 .target_features = 0, 1176 1106 /* sizes of additional private data for data structures */ 1177 1107 .target_priv_sz = sizeof(struct fcloop_tport), 1178 - .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), 1179 1108 }; 1180 1109 1181 1110 static ssize_t ··· 1239 1170 } 1240 1171 1241 1172 static int 1242 - __wait_localport_unreg(struct fcloop_lport *lport) 1173 + __localport_unreg(struct fcloop_lport *lport) 1243 1174 { 1244 - int ret; 1245 - 1246 - init_completion(&lport->unreg_done); 1247 - 1248 - ret = nvme_fc_unregister_localport(lport->localport); 1249 - 1250 - if (!ret) 1251 - wait_for_completion(&lport->unreg_done); 1252 - 1253 - return ret; 1175 + return nvme_fc_unregister_localport(lport->localport); 1254 1176 } 1255 1177 1178 + static struct fcloop_nport * 1179 + __fcloop_nport_lookup(u64 node_name, u64 port_name) 1180 + { 1181 + struct fcloop_nport *nport; 1182 + 1183 + list_for_each_entry(nport, &fcloop_nports, nport_list) { 1184 + if (nport->node_name != node_name || 1185 + nport->port_name != port_name) 1186 + continue; 1187 + 1188 + if (fcloop_nport_get(nport)) 1189 + return nport; 1190 + 1191 + break; 1192 + } 1193 + 1194 + return NULL; 1195 + } 1196 + 1197 + static struct fcloop_nport * 1198 + fcloop_nport_lookup(u64 node_name, u64 port_name) 1199 + { 1200 + struct fcloop_nport *nport; 1201 + unsigned long flags; 1202 + 1203 + spin_lock_irqsave(&fcloop_lock, flags); 1204 + nport = __fcloop_nport_lookup(node_name, port_name); 1205 + spin_unlock_irqrestore(&fcloop_lock, flags); 1206 + 1207 + return nport; 1208 + } 1209 + 1210 + static struct fcloop_lport * 1211 + __fcloop_lport_lookup(u64 node_name, u64 port_name) 1212 + { 1213 + struct fcloop_lport *lport; 1214 + 1215 + list_for_each_entry(lport, &fcloop_lports, lport_list) { 1216 + if (lport->localport->node_name != node_name || 1217 + lport->localport->port_name != port_name) 1218 + continue; 1219 + 1220 + if (fcloop_lport_get(lport)) 1221 + return lport; 1222 + 1223 + break; 1224 + } 1225 + 1226 + return NULL; 1227 + } 1228 + 1229 + static struct fcloop_lport * 1230 + fcloop_lport_lookup(u64 node_name, u64 port_name) 1231 + { 1232 + struct fcloop_lport *lport; 1233 + unsigned long flags; 1234 + 1235 + spin_lock_irqsave(&fcloop_lock, flags); 1236 + lport = __fcloop_lport_lookup(node_name, port_name); 1237 + spin_unlock_irqrestore(&fcloop_lock, flags); 1238 + 1239 + return lport; 1240 + } 1256 1241 1257 1242 static ssize_t 1258 1243 fcloop_delete_local_port(struct device *dev, struct device_attribute *attr, 1259 1244 const char *buf, size_t count) 1260 1245 { 1261 - struct fcloop_lport *tlport, *lport = NULL; 1246 + struct fcloop_lport *lport; 1262 1247 u64 nodename, portname; 1263 - unsigned long flags; 1264 1248 int ret; 1265 1249 1266 1250 ret = fcloop_parse_nm_options(dev, &nodename, &portname, buf); 1267 1251 if (ret) 1268 1252 return ret; 1269 1253 1270 - spin_lock_irqsave(&fcloop_lock, flags); 1271 - 1272 - list_for_each_entry(tlport, &fcloop_lports, lport_list) { 1273 - if (tlport->localport->node_name == nodename && 1274 - tlport->localport->port_name == portname) { 1275 - if (!fcloop_lport_get(tlport)) 1276 - break; 1277 - lport = tlport; 1278 - break; 1279 - } 1280 - } 1281 - spin_unlock_irqrestore(&fcloop_lock, flags); 1282 - 1254 + lport = fcloop_lport_lookup(nodename, portname); 1283 1255 if (!lport) 1284 1256 return -ENOENT; 1285 1257 1286 - ret = __wait_localport_unreg(lport); 1258 + ret = __localport_unreg(lport); 1287 1259 fcloop_lport_put(lport); 1288 1260 1289 1261 return ret ? ret : count; ··· 1333 1223 static struct fcloop_nport * 1334 1224 fcloop_alloc_nport(const char *buf, size_t count, bool remoteport) 1335 1225 { 1336 - struct fcloop_nport *newnport, *nport = NULL; 1337 - struct fcloop_lport *tmplport, *lport = NULL; 1226 + struct fcloop_nport *newnport, *nport; 1227 + struct fcloop_lport *lport; 1338 1228 struct fcloop_ctrl_options *opts; 1339 1229 unsigned long flags; 1340 1230 u32 opts_mask = (remoteport) ? RPORT_OPTS : TGTPORT_OPTS; ··· 1349 1239 goto out_free_opts; 1350 1240 1351 1241 /* everything there ? */ 1352 - if ((opts->mask & opts_mask) != opts_mask) { 1353 - ret = -EINVAL; 1242 + if ((opts->mask & opts_mask) != opts_mask) 1354 1243 goto out_free_opts; 1355 - } 1356 1244 1357 1245 newnport = kzalloc(sizeof(*newnport), GFP_KERNEL); 1358 1246 if (!newnport) ··· 1366 1258 refcount_set(&newnport->ref, 1); 1367 1259 1368 1260 spin_lock_irqsave(&fcloop_lock, flags); 1369 - 1370 - list_for_each_entry(tmplport, &fcloop_lports, lport_list) { 1371 - if (tmplport->localport->node_name == opts->wwnn && 1372 - tmplport->localport->port_name == opts->wwpn) 1373 - goto out_invalid_opts; 1374 - 1375 - if (tmplport->localport->node_name == opts->lpwwnn && 1376 - tmplport->localport->port_name == opts->lpwwpn) 1377 - lport = tmplport; 1261 + lport = __fcloop_lport_lookup(opts->wwnn, opts->wwpn); 1262 + if (lport) { 1263 + /* invalid configuration */ 1264 + fcloop_lport_put(lport); 1265 + goto out_free_newnport; 1378 1266 } 1379 1267 1380 1268 if (remoteport) { 1381 - if (!lport) 1382 - goto out_invalid_opts; 1383 - newnport->lport = lport; 1384 - } 1385 - 1386 - list_for_each_entry(nport, &fcloop_nports, nport_list) { 1387 - if (nport->node_name == opts->wwnn && 1388 - nport->port_name == opts->wwpn) { 1389 - if ((remoteport && nport->rport) || 1390 - (!remoteport && nport->tport)) { 1391 - nport = NULL; 1392 - goto out_invalid_opts; 1393 - } 1394 - 1395 - fcloop_nport_get(nport); 1396 - 1397 - spin_unlock_irqrestore(&fcloop_lock, flags); 1398 - 1399 - if (remoteport) 1400 - nport->lport = lport; 1401 - if (opts->mask & NVMF_OPT_ROLES) 1402 - nport->port_role = opts->roles; 1403 - if (opts->mask & NVMF_OPT_FCADDR) 1404 - nport->port_id = opts->fcaddr; 1269 + lport = __fcloop_lport_lookup(opts->lpwwnn, opts->lpwwpn); 1270 + if (!lport) { 1271 + /* invalid configuration */ 1405 1272 goto out_free_newnport; 1406 1273 } 1407 1274 } 1408 1275 1409 - list_add_tail(&newnport->nport_list, &fcloop_nports); 1276 + nport = __fcloop_nport_lookup(opts->wwnn, opts->wwpn); 1277 + if (nport) { 1278 + if ((remoteport && nport->rport) || 1279 + (!remoteport && nport->tport)) { 1280 + /* invalid configuration */ 1281 + goto out_put_nport; 1282 + } 1410 1283 1284 + /* found existing nport, discard the new nport */ 1285 + kfree(newnport); 1286 + } else { 1287 + list_add_tail(&newnport->nport_list, &fcloop_nports); 1288 + nport = newnport; 1289 + } 1290 + 1291 + if (opts->mask & NVMF_OPT_ROLES) 1292 + nport->port_role = opts->roles; 1293 + if (opts->mask & NVMF_OPT_FCADDR) 1294 + nport->port_id = opts->fcaddr; 1295 + if (lport) { 1296 + if (!nport->lport) 1297 + nport->lport = lport; 1298 + else 1299 + fcloop_lport_put(lport); 1300 + } 1411 1301 spin_unlock_irqrestore(&fcloop_lock, flags); 1412 1302 1413 1303 kfree(opts); 1414 - return newnport; 1304 + return nport; 1415 1305 1416 - out_invalid_opts: 1417 - spin_unlock_irqrestore(&fcloop_lock, flags); 1306 + out_put_nport: 1307 + if (lport) 1308 + fcloop_lport_put(lport); 1309 + fcloop_nport_put(nport); 1418 1310 out_free_newnport: 1311 + spin_unlock_irqrestore(&fcloop_lock, flags); 1419 1312 kfree(newnport); 1420 1313 out_free_opts: 1421 1314 kfree(opts); 1422 - return nport; 1315 + return NULL; 1423 1316 } 1424 1317 1425 1318 static ssize_t ··· 1461 1352 rport->nport = nport; 1462 1353 rport->lport = nport->lport; 1463 1354 nport->rport = rport; 1355 + rport->flags = 0; 1464 1356 spin_lock_init(&rport->lock); 1465 1357 INIT_WORK(&rport->ls_work, fcloop_rport_lsrqst_work); 1466 1358 INIT_LIST_HEAD(&rport->ls_list); ··· 1475 1365 { 1476 1366 struct fcloop_rport *rport = nport->rport; 1477 1367 1368 + lockdep_assert_held(&fcloop_lock); 1369 + 1478 1370 if (rport && nport->tport) 1479 1371 nport->tport->remoteport = NULL; 1480 1372 nport->rport = NULL; 1481 - 1482 - list_del(&nport->nport_list); 1483 1373 1484 1374 return rport; 1485 1375 } ··· 1487 1377 static int 1488 1378 __remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport) 1489 1379 { 1490 - if (!rport) 1491 - return -EALREADY; 1492 - 1493 1380 return nvme_fc_unregister_remoteport(rport->remoteport); 1494 1381 } 1495 1382 ··· 1494 1387 fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr, 1495 1388 const char *buf, size_t count) 1496 1389 { 1497 - struct fcloop_nport *nport = NULL, *tmpport; 1498 - static struct fcloop_rport *rport; 1390 + struct fcloop_nport *nport; 1391 + struct fcloop_rport *rport; 1499 1392 u64 nodename, portname; 1500 1393 unsigned long flags; 1501 1394 int ret; ··· 1504 1397 if (ret) 1505 1398 return ret; 1506 1399 1507 - spin_lock_irqsave(&fcloop_lock, flags); 1508 - 1509 - list_for_each_entry(tmpport, &fcloop_nports, nport_list) { 1510 - if (tmpport->node_name == nodename && 1511 - tmpport->port_name == portname && tmpport->rport) { 1512 - nport = tmpport; 1513 - rport = __unlink_remote_port(nport); 1514 - break; 1515 - } 1516 - } 1517 - 1518 - spin_unlock_irqrestore(&fcloop_lock, flags); 1519 - 1400 + nport = fcloop_nport_lookup(nodename, portname); 1520 1401 if (!nport) 1521 1402 return -ENOENT; 1522 1403 1404 + spin_lock_irqsave(&fcloop_lock, flags); 1405 + rport = __unlink_remote_port(nport); 1406 + spin_unlock_irqrestore(&fcloop_lock, flags); 1407 + 1408 + if (!rport) { 1409 + ret = -ENOENT; 1410 + goto out_nport_put; 1411 + } 1412 + 1523 1413 ret = __remoteport_unreg(nport, rport); 1414 + 1415 + out_nport_put: 1416 + fcloop_nport_put(nport); 1524 1417 1525 1418 return ret ? ret : count; 1526 1419 } ··· 1559 1452 tport->nport = nport; 1560 1453 tport->lport = nport->lport; 1561 1454 nport->tport = tport; 1455 + tport->flags = 0; 1562 1456 spin_lock_init(&tport->lock); 1563 1457 INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work); 1564 1458 INIT_LIST_HEAD(&tport->ls_list); ··· 1573 1465 { 1574 1466 struct fcloop_tport *tport = nport->tport; 1575 1467 1468 + lockdep_assert_held(&fcloop_lock); 1469 + 1576 1470 if (tport && nport->rport) 1577 1471 nport->rport->targetport = NULL; 1578 1472 nport->tport = NULL; ··· 1585 1475 static int 1586 1476 __targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport) 1587 1477 { 1588 - if (!tport) 1589 - return -EALREADY; 1590 - 1591 1478 return nvmet_fc_unregister_targetport(tport->targetport); 1592 1479 } 1593 1480 ··· 1592 1485 fcloop_delete_target_port(struct device *dev, struct device_attribute *attr, 1593 1486 const char *buf, size_t count) 1594 1487 { 1595 - struct fcloop_nport *nport = NULL, *tmpport; 1596 - struct fcloop_tport *tport = NULL; 1488 + struct fcloop_nport *nport; 1489 + struct fcloop_tport *tport; 1597 1490 u64 nodename, portname; 1598 1491 unsigned long flags; 1599 1492 int ret; ··· 1602 1495 if (ret) 1603 1496 return ret; 1604 1497 1605 - spin_lock_irqsave(&fcloop_lock, flags); 1606 - 1607 - list_for_each_entry(tmpport, &fcloop_nports, nport_list) { 1608 - if (tmpport->node_name == nodename && 1609 - tmpport->port_name == portname && tmpport->tport) { 1610 - nport = tmpport; 1611 - tport = __unlink_target_port(nport); 1612 - break; 1613 - } 1614 - } 1615 - 1616 - spin_unlock_irqrestore(&fcloop_lock, flags); 1617 - 1498 + nport = fcloop_nport_lookup(nodename, portname); 1618 1499 if (!nport) 1619 1500 return -ENOENT; 1620 1501 1502 + spin_lock_irqsave(&fcloop_lock, flags); 1503 + tport = __unlink_target_port(nport); 1504 + spin_unlock_irqrestore(&fcloop_lock, flags); 1505 + 1506 + if (!tport) { 1507 + ret = -ENOENT; 1508 + goto out_nport_put; 1509 + } 1510 + 1621 1511 ret = __targetport_unreg(nport, tport); 1512 + 1513 + out_nport_put: 1514 + fcloop_nport_put(nport); 1622 1515 1623 1516 return ret ? ret : count; 1624 1517 } ··· 1685 1578 }; 1686 1579 static struct device *fcloop_device; 1687 1580 1688 - 1689 1581 static int __init fcloop_init(void) 1690 1582 { 1691 1583 int ret; 1692 1584 1585 + lsreq_cache = kmem_cache_create("lsreq_cache", 1586 + sizeof(struct fcloop_lsreq), 0, 1587 + 0, NULL); 1588 + if (!lsreq_cache) 1589 + return -ENOMEM; 1590 + 1693 1591 ret = class_register(&fcloop_class); 1694 1592 if (ret) { 1695 1593 pr_err("couldn't register class fcloop\n"); 1696 - return ret; 1594 + goto out_destroy_cache; 1697 1595 } 1698 1596 1699 1597 fcloop_device = device_create_with_groups( ··· 1716 1604 1717 1605 out_destroy_class: 1718 1606 class_unregister(&fcloop_class); 1607 + out_destroy_cache: 1608 + kmem_cache_destroy(lsreq_cache); 1719 1609 return ret; 1720 1610 } 1721 1611 1722 1612 static void __exit fcloop_exit(void) 1723 1613 { 1724 - struct fcloop_lport *lport = NULL; 1725 - struct fcloop_nport *nport = NULL; 1614 + struct fcloop_lport *lport; 1615 + struct fcloop_nport *nport; 1726 1616 struct fcloop_tport *tport; 1727 1617 struct fcloop_rport *rport; 1728 1618 unsigned long flags; ··· 1735 1621 for (;;) { 1736 1622 nport = list_first_entry_or_null(&fcloop_nports, 1737 1623 typeof(*nport), nport_list); 1738 - if (!nport) 1624 + if (!nport || !fcloop_nport_get(nport)) 1739 1625 break; 1740 1626 1741 1627 tport = __unlink_target_port(nport); ··· 1743 1629 1744 1630 spin_unlock_irqrestore(&fcloop_lock, flags); 1745 1631 1746 - ret = __targetport_unreg(nport, tport); 1747 - if (ret) 1748 - pr_warn("%s: Failed deleting target port\n", __func__); 1632 + if (tport) { 1633 + ret = __targetport_unreg(nport, tport); 1634 + if (ret) 1635 + pr_warn("%s: Failed deleting target port\n", 1636 + __func__); 1637 + } 1749 1638 1750 - ret = __remoteport_unreg(nport, rport); 1751 - if (ret) 1752 - pr_warn("%s: Failed deleting remote port\n", __func__); 1639 + if (rport) { 1640 + ret = __remoteport_unreg(nport, rport); 1641 + if (ret) 1642 + pr_warn("%s: Failed deleting remote port\n", 1643 + __func__); 1644 + } 1645 + 1646 + fcloop_nport_put(nport); 1753 1647 1754 1648 spin_lock_irqsave(&fcloop_lock, flags); 1755 1649 } ··· 1770 1648 1771 1649 spin_unlock_irqrestore(&fcloop_lock, flags); 1772 1650 1773 - ret = __wait_localport_unreg(lport); 1651 + ret = __localport_unreg(lport); 1774 1652 if (ret) 1775 1653 pr_warn("%s: Failed deleting local port\n", __func__); 1776 1654 ··· 1785 1663 1786 1664 device_destroy(&fcloop_class, MKDEV(0, 0)); 1787 1665 class_unregister(&fcloop_class); 1666 + kmem_cache_destroy(lsreq_cache); 1788 1667 } 1789 1668 1790 1669 module_init(fcloop_init);
+20 -9
drivers/nvme/target/loop.c
··· 33 33 34 34 struct list_head list; 35 35 struct blk_mq_tag_set tag_set; 36 - struct nvme_loop_iod async_event_iod; 37 36 struct nvme_ctrl ctrl; 38 37 39 38 struct nvmet_port *port; 39 + 40 + /* Must be last --ends in a flexible-array member. */ 41 + struct nvme_loop_iod async_event_iod; 40 42 }; 41 43 42 44 static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) ··· 150 148 nvme_start_request(req); 151 149 iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; 152 150 iod->req.port = queue->ctrl->port; 153 - if (!nvmet_req_init(&iod->req, &queue->nvme_cq, 154 - &queue->nvme_sq, &nvme_loop_ops)) 151 + if (!nvmet_req_init(&iod->req, &queue->nvme_sq, &nvme_loop_ops)) 155 152 return BLK_STS_OK; 156 153 157 154 if (blk_rq_nr_phys_segments(req)) { ··· 182 181 iod->cmd.common.command_id = NVME_AQ_BLK_MQ_DEPTH; 183 182 iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; 184 183 185 - if (!nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq, 186 - &nvme_loop_ops)) { 184 + if (!nvmet_req_init(&iod->req, &queue->nvme_sq, &nvme_loop_ops)) { 187 185 dev_err(ctrl->ctrl.device, "failed async event work\n"); 188 186 return; 189 187 } ··· 273 273 nvme_unquiesce_admin_queue(&ctrl->ctrl); 274 274 275 275 nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); 276 + nvmet_cq_put(&ctrl->queues[0].nvme_cq); 276 277 nvme_remove_admin_tag_set(&ctrl->ctrl); 277 278 } 278 279 ··· 303 302 for (i = 1; i < ctrl->ctrl.queue_count; i++) { 304 303 clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); 305 304 nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); 305 + nvmet_cq_put(&ctrl->queues[i].nvme_cq); 306 306 } 307 307 ctrl->ctrl.queue_count = 1; 308 308 /* ··· 329 327 330 328 for (i = 1; i <= nr_io_queues; i++) { 331 329 ctrl->queues[i].ctrl = ctrl; 332 - ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); 333 - if (ret) 330 + nvmet_cq_init(&ctrl->queues[i].nvme_cq); 331 + ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq, 332 + &ctrl->queues[i].nvme_cq); 333 + if (ret) { 334 + nvmet_cq_put(&ctrl->queues[i].nvme_cq); 334 335 goto out_destroy_queues; 336 + } 335 337 336 338 ctrl->ctrl.queue_count++; 337 339 } ··· 366 360 int error; 367 361 368 362 ctrl->queues[0].ctrl = ctrl; 369 - error = nvmet_sq_init(&ctrl->queues[0].nvme_sq); 370 - if (error) 363 + nvmet_cq_init(&ctrl->queues[0].nvme_cq); 364 + error = nvmet_sq_init(&ctrl->queues[0].nvme_sq, 365 + &ctrl->queues[0].nvme_cq); 366 + if (error) { 367 + nvmet_cq_put(&ctrl->queues[0].nvme_cq); 371 368 return error; 369 + } 372 370 ctrl->ctrl.queue_count = 1; 373 371 374 372 error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, ··· 411 401 nvme_remove_admin_tag_set(&ctrl->ctrl); 412 402 out_free_sq: 413 403 nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); 404 + nvmet_cq_put(&ctrl->queues[0].nvme_cq); 414 405 return error; 415 406 } 416 407
+17 -7
drivers/nvme/target/nvmet.h
··· 141 141 } 142 142 143 143 struct nvmet_cq { 144 + struct nvmet_ctrl *ctrl; 144 145 u16 qid; 145 146 u16 size; 147 + refcount_t ref; 146 148 }; 147 149 148 150 struct nvmet_sq { 149 151 struct nvmet_ctrl *ctrl; 150 152 struct percpu_ref ref; 153 + struct nvmet_cq *cq; 151 154 u16 qid; 152 155 u16 size; 153 156 u32 sqhd; ··· 250 247 struct nvmet_ctrl { 251 248 struct nvmet_subsys *subsys; 252 249 struct nvmet_sq **sqs; 250 + struct nvmet_cq **cqs; 253 251 254 252 void *drvdata; 255 253 ··· 428 424 u16 (*get_max_queue_size)(const struct nvmet_ctrl *ctrl); 429 425 430 426 /* Operations mandatory for PCI target controllers */ 431 - u16 (*create_sq)(struct nvmet_ctrl *ctrl, u16 sqid, u16 flags, 427 + u16 (*create_sq)(struct nvmet_ctrl *ctrl, u16 sqid, u16 cqid, u16 flags, 432 428 u16 qsize, u64 prp1); 433 429 u16 (*delete_sq)(struct nvmet_ctrl *ctrl, u16 sqid); 434 430 u16 (*create_cq)(struct nvmet_ctrl *ctrl, u16 cqid, u16 flags, ··· 561 557 u16 nvmet_parse_fabrics_io_cmd(struct nvmet_req *req); 562 558 u32 nvmet_fabrics_io_cmd_data_len(struct nvmet_req *req); 563 559 564 - bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 565 - struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); 560 + bool nvmet_req_init(struct nvmet_req *req, struct nvmet_sq *sq, 561 + const struct nvmet_fabrics_ops *ops); 566 562 void nvmet_req_uninit(struct nvmet_req *req); 567 563 size_t nvmet_req_transfer_len(struct nvmet_req *req); 568 564 bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len); ··· 575 571 void nvmet_execute_get_features(struct nvmet_req *req); 576 572 void nvmet_execute_keep_alive(struct nvmet_req *req); 577 573 578 - u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid); 574 + u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create); 575 + u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create); 576 + void nvmet_cq_init(struct nvmet_cq *cq); 579 577 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid, 580 578 u16 size); 581 579 u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid, 582 580 u16 size); 581 + void nvmet_cq_destroy(struct nvmet_cq *cq); 582 + bool nvmet_cq_get(struct nvmet_cq *cq); 583 + void nvmet_cq_put(struct nvmet_cq *cq); 584 + bool nvmet_cq_in_use(struct nvmet_cq *cq); 583 585 u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, bool create); 584 586 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid, 585 587 u16 size); 586 - u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid, 587 - u16 size); 588 + u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 589 + struct nvmet_cq *cq, u16 qid, u16 size); 588 590 void nvmet_sq_destroy(struct nvmet_sq *sq); 589 - int nvmet_sq_init(struct nvmet_sq *sq); 591 + int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq); 590 592 591 593 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl); 592 594
+8 -6
drivers/nvme/target/pci-epf.c
··· 1346 1346 nvmet_pci_epf_drain_queue(cq); 1347 1347 nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); 1348 1348 nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map); 1349 + nvmet_cq_put(&cq->nvme_cq); 1349 1350 1350 1351 return NVME_SC_SUCCESS; 1351 1352 } 1352 1353 1353 1354 static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl, 1354 - u16 sqid, u16 flags, u16 qsize, u64 pci_addr) 1355 + u16 sqid, u16 cqid, u16 flags, u16 qsize, u64 pci_addr) 1355 1356 { 1356 1357 struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata; 1357 1358 struct nvmet_pci_epf_queue *sq = &ctrl->sq[sqid]; 1359 + struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid]; 1358 1360 u16 status; 1359 1361 1360 1362 if (test_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags)) ··· 1379 1377 sq->qes = ctrl->io_sqes; 1380 1378 sq->pci_size = sq->qes * sq->depth; 1381 1379 1382 - status = nvmet_sq_create(tctrl, &sq->nvme_sq, sqid, sq->depth); 1380 + status = nvmet_sq_create(tctrl, &sq->nvme_sq, &cq->nvme_cq, sqid, 1381 + sq->depth); 1383 1382 if (status != NVME_SC_SUCCESS) 1384 1383 return status; 1385 1384 ··· 1597 1594 goto complete; 1598 1595 } 1599 1596 1600 - if (!nvmet_req_init(req, &iod->cq->nvme_cq, &iod->sq->nvme_sq, 1601 - &nvmet_pci_epf_fabrics_ops)) 1597 + if (!nvmet_req_init(req, &iod->sq->nvme_sq, &nvmet_pci_epf_fabrics_ops)) 1602 1598 goto complete; 1603 1599 1604 1600 iod->data_len = nvmet_req_transfer_len(req); ··· 1874 1872 1875 1873 qsize = aqa & 0x00000fff; 1876 1874 pci_addr = asq & GENMASK_ULL(63, 12); 1877 - status = nvmet_pci_epf_create_sq(ctrl->tctrl, 0, NVME_QUEUE_PHYS_CONTIG, 1878 - qsize, pci_addr); 1875 + status = nvmet_pci_epf_create_sq(ctrl->tctrl, 0, 0, 1876 + NVME_QUEUE_PHYS_CONTIG, qsize, pci_addr); 1879 1877 if (status != NVME_SC_SUCCESS) { 1880 1878 dev_err(ctrl->dev, "Failed to create admin submission queue\n"); 1881 1879 nvmet_pci_epf_delete_cq(ctrl->tctrl, 0);
+5 -3
drivers/nvme/target/rdma.c
··· 976 976 cmd->send_sge.addr, cmd->send_sge.length, 977 977 DMA_TO_DEVICE); 978 978 979 - if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, 980 - &queue->nvme_sq, &nvmet_rdma_ops)) 979 + if (!nvmet_req_init(&cmd->req, &queue->nvme_sq, &nvmet_rdma_ops)) 981 980 return; 982 981 983 982 status = nvmet_rdma_map_sgl(cmd); ··· 1352 1353 pr_debug("freeing queue %d\n", queue->idx); 1353 1354 1354 1355 nvmet_sq_destroy(&queue->nvme_sq); 1356 + nvmet_cq_put(&queue->nvme_cq); 1355 1357 1356 1358 nvmet_rdma_destroy_queue_ib(queue); 1357 1359 if (!queue->nsrq) { ··· 1436 1436 goto out_reject; 1437 1437 } 1438 1438 1439 - ret = nvmet_sq_init(&queue->nvme_sq); 1439 + nvmet_cq_init(&queue->nvme_cq); 1440 + ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq); 1440 1441 if (ret) { 1441 1442 ret = NVME_RDMA_CM_NO_RSC; 1442 1443 goto out_free_queue; ··· 1518 1517 out_destroy_sq: 1519 1518 nvmet_sq_destroy(&queue->nvme_sq); 1520 1519 out_free_queue: 1520 + nvmet_cq_put(&queue->nvme_cq); 1521 1521 kfree(queue); 1522 1522 out_reject: 1523 1523 nvmet_rdma_cm_reject(cm_id, ret);
+31 -69
drivers/nvme/target/tcp.c
··· 7 7 #include <linux/module.h> 8 8 #include <linux/init.h> 9 9 #include <linux/slab.h> 10 + #include <linux/crc32c.h> 10 11 #include <linux/err.h> 11 12 #include <linux/nvme-tcp.h> 12 13 #include <linux/nvme-keyring.h> ··· 18 17 #include <net/handshake.h> 19 18 #include <linux/inet.h> 20 19 #include <linux/llist.h> 21 - #include <crypto/hash.h> 22 20 #include <trace/events/sock.h> 23 21 24 22 #include "nvmet.h" ··· 172 172 /* digest state */ 173 173 bool hdr_digest; 174 174 bool data_digest; 175 - struct ahash_request *snd_hash; 176 - struct ahash_request *rcv_hash; 177 175 178 176 /* TLS state */ 179 177 key_serial_t tls_pskid; ··· 292 294 return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; 293 295 } 294 296 295 - static inline void nvmet_tcp_hdgst(struct ahash_request *hash, 296 - void *pdu, size_t len) 297 + static inline void nvmet_tcp_hdgst(void *pdu, size_t len) 297 298 { 298 - struct scatterlist sg; 299 - 300 - sg_init_one(&sg, pdu, len); 301 - ahash_request_set_crypt(hash, &sg, pdu + len, len); 302 - crypto_ahash_digest(hash); 299 + put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len); 303 300 } 304 301 305 302 static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue, ··· 311 318 } 312 319 313 320 recv_digest = *(__le32 *)(pdu + hdr->hlen); 314 - nvmet_tcp_hdgst(queue->rcv_hash, pdu, len); 321 + nvmet_tcp_hdgst(pdu, len); 315 322 exp_digest = *(__le32 *)(pdu + hdr->hlen); 316 323 if (recv_digest != exp_digest) { 317 324 pr_err("queue %d: header digest error: recv %#x expected %#x\n", ··· 434 441 return NVME_SC_INTERNAL; 435 442 } 436 443 437 - static void nvmet_tcp_calc_ddgst(struct ahash_request *hash, 438 - struct nvmet_tcp_cmd *cmd) 444 + static void nvmet_tcp_calc_ddgst(struct nvmet_tcp_cmd *cmd) 439 445 { 440 - ahash_request_set_crypt(hash, cmd->req.sg, 441 - (void *)&cmd->exp_ddgst, cmd->req.transfer_len); 442 - crypto_ahash_digest(hash); 446 + size_t total_len = cmd->req.transfer_len; 447 + struct scatterlist *sg = cmd->req.sg; 448 + u32 crc = ~0; 449 + 450 + while (total_len) { 451 + size_t len = min_t(size_t, total_len, sg->length); 452 + 453 + /* 454 + * Note that the scatterlist does not contain any highmem pages, 455 + * as it was allocated by sgl_alloc() with GFP_KERNEL. 456 + */ 457 + crc = crc32c(crc, sg_virt(sg), len); 458 + total_len -= len; 459 + sg = sg_next(sg); 460 + } 461 + cmd->exp_ddgst = cpu_to_le32(~crc); 443 462 } 444 463 445 464 static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) ··· 478 473 479 474 if (queue->data_digest) { 480 475 pdu->hdr.flags |= NVME_TCP_F_DDGST; 481 - nvmet_tcp_calc_ddgst(queue->snd_hash, cmd); 476 + nvmet_tcp_calc_ddgst(cmd); 482 477 } 483 478 484 479 if (cmd->queue->hdr_digest) { 485 480 pdu->hdr.flags |= NVME_TCP_F_HDGST; 486 - nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); 481 + nvmet_tcp_hdgst(pdu, sizeof(*pdu)); 487 482 } 488 483 } 489 484 490 485 static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd) 491 486 { 492 487 struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu; 493 - struct nvmet_tcp_queue *queue = cmd->queue; 494 488 u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); 495 489 496 490 cmd->offset = 0; ··· 507 503 pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done); 508 504 if (cmd->queue->hdr_digest) { 509 505 pdu->hdr.flags |= NVME_TCP_F_HDGST; 510 - nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); 506 + nvmet_tcp_hdgst(pdu, sizeof(*pdu)); 511 507 } 512 508 } 513 509 514 510 static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd) 515 511 { 516 512 struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu; 517 - struct nvmet_tcp_queue *queue = cmd->queue; 518 513 u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); 519 514 520 515 cmd->offset = 0; ··· 526 523 pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); 527 524 if (cmd->queue->hdr_digest) { 528 525 pdu->hdr.flags |= NVME_TCP_F_HDGST; 529 - nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); 526 + nvmet_tcp_hdgst(pdu, sizeof(*pdu)); 530 527 } 531 528 } 532 529 ··· 860 857 smp_store_release(&queue->rcv_state, NVMET_TCP_RECV_PDU); 861 858 } 862 859 863 - static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue) 864 - { 865 - struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); 866 - 867 - ahash_request_free(queue->rcv_hash); 868 - ahash_request_free(queue->snd_hash); 869 - crypto_free_ahash(tfm); 870 - } 871 - 872 - static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue *queue) 873 - { 874 - struct crypto_ahash *tfm; 875 - 876 - tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); 877 - if (IS_ERR(tfm)) 878 - return PTR_ERR(tfm); 879 - 880 - queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); 881 - if (!queue->snd_hash) 882 - goto free_tfm; 883 - ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); 884 - 885 - queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); 886 - if (!queue->rcv_hash) 887 - goto free_snd_hash; 888 - ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); 889 - 890 - return 0; 891 - free_snd_hash: 892 - ahash_request_free(queue->snd_hash); 893 - free_tfm: 894 - crypto_free_ahash(tfm); 895 - return -ENOMEM; 896 - } 897 - 898 - 899 860 static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) 900 861 { 901 862 struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq; ··· 888 921 889 922 queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE); 890 923 queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE); 891 - if (queue->hdr_digest || queue->data_digest) { 892 - ret = nvmet_tcp_alloc_crypto(queue); 893 - if (ret) 894 - return ret; 895 - } 896 924 897 925 memset(icresp, 0, sizeof(*icresp)); 898 926 icresp->hdr.type = nvme_tcp_icresp; ··· 1039 1077 req = &queue->cmd->req; 1040 1078 memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd)); 1041 1079 1042 - if (unlikely(!nvmet_req_init(req, &queue->nvme_cq, 1043 - &queue->nvme_sq, &nvmet_tcp_ops))) { 1080 + if (unlikely(!nvmet_req_init(req, &queue->nvme_sq, &nvmet_tcp_ops))) { 1044 1081 pr_err("failed cmd %p id %d opcode %d, data_len: %d, status: %04x\n", 1045 1082 req->cmd, req->cmd->common.command_id, 1046 1083 req->cmd->common.opcode, ··· 1208 1247 { 1209 1248 struct nvmet_tcp_queue *queue = cmd->queue; 1210 1249 1211 - nvmet_tcp_calc_ddgst(queue->rcv_hash, cmd); 1250 + nvmet_tcp_calc_ddgst(cmd); 1212 1251 queue->offset = 0; 1213 1252 queue->left = NVME_TCP_DIGEST_LENGTH; 1214 1253 queue->rcv_state = NVMET_TCP_RECV_DDGST; ··· 1576 1615 nvmet_sq_put_tls_key(&queue->nvme_sq); 1577 1616 nvmet_tcp_uninit_data_in_cmds(queue); 1578 1617 nvmet_sq_destroy(&queue->nvme_sq); 1618 + nvmet_cq_put(&queue->nvme_cq); 1579 1619 cancel_work_sync(&queue->io_work); 1580 1620 nvmet_tcp_free_cmd_data_in_buffers(queue); 1581 1621 /* ->sock will be released by fput() */ 1582 1622 fput(queue->sock->file); 1583 1623 nvmet_tcp_free_cmds(queue); 1584 - if (queue->hdr_digest || queue->data_digest) 1585 - nvmet_tcp_free_crypto(queue); 1586 1624 ida_free(&nvmet_tcp_queue_ida, queue->idx); 1587 1625 page_frag_cache_drain(&queue->pf_cache); 1588 1626 kfree(queue); ··· 1910 1950 if (ret) 1911 1951 goto out_ida_remove; 1912 1952 1913 - ret = nvmet_sq_init(&queue->nvme_sq); 1953 + nvmet_cq_init(&queue->nvme_cq); 1954 + ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq); 1914 1955 if (ret) 1915 1956 goto out_free_connect; 1916 1957 ··· 1954 1993 mutex_unlock(&nvmet_tcp_queue_mutex); 1955 1994 nvmet_sq_destroy(&queue->nvme_sq); 1956 1995 out_free_connect: 1996 + nvmet_cq_put(&queue->nvme_cq); 1957 1997 nvmet_tcp_free_cmd(&queue->connect); 1958 1998 out_ida_remove: 1959 1999 ida_free(&nvmet_tcp_queue_ida, queue->idx);
+16 -5
include/linux/dmapool.h
··· 11 11 #ifndef LINUX_DMAPOOL_H 12 12 #define LINUX_DMAPOOL_H 13 13 14 + #include <linux/nodemask_types.h> 14 15 #include <linux/scatterlist.h> 15 16 #include <asm/io.h> 16 17 ··· 19 18 20 19 #ifdef CONFIG_HAS_DMA 21 20 22 - struct dma_pool *dma_pool_create(const char *name, struct device *dev, 23 - size_t size, size_t align, size_t allocation); 21 + struct dma_pool *dma_pool_create_node(const char *name, struct device *dev, 22 + size_t size, size_t align, size_t boundary, int node); 24 23 25 24 void dma_pool_destroy(struct dma_pool *pool); 26 25 ··· 36 35 void dmam_pool_destroy(struct dma_pool *pool); 37 36 38 37 #else /* !CONFIG_HAS_DMA */ 39 - static inline struct dma_pool *dma_pool_create(const char *name, 40 - struct device *dev, size_t size, size_t align, size_t allocation) 41 - { return NULL; } 38 + static inline struct dma_pool *dma_pool_create_node(const char *name, 39 + struct device *dev, size_t size, size_t align, size_t boundary, 40 + int node) 41 + { 42 + return NULL; 43 + } 42 44 static inline void dma_pool_destroy(struct dma_pool *pool) { } 43 45 static inline void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, 44 46 dma_addr_t *handle) { return NULL; } ··· 52 48 { return NULL; } 53 49 static inline void dmam_pool_destroy(struct dma_pool *pool) { } 54 50 #endif /* !CONFIG_HAS_DMA */ 51 + 52 + static inline struct dma_pool *dma_pool_create(const char *name, 53 + struct device *dev, size_t size, size_t align, size_t boundary) 54 + { 55 + return dma_pool_create_node(name, dev, size, align, boundary, 56 + NUMA_NO_NODE); 57 + } 55 58 56 59 static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, 57 60 dma_addr_t *handle)
+9 -6
mm/dmapool.c
··· 56 56 unsigned int size; 57 57 unsigned int allocation; 58 58 unsigned int boundary; 59 + int node; 59 60 char name[32]; 60 61 struct list_head pools; 61 62 }; ··· 200 199 201 200 202 201 /** 203 - * dma_pool_create - Creates a pool of consistent memory blocks, for dma. 202 + * dma_pool_create_node - Creates a pool of consistent memory blocks, for dma. 204 203 * @name: name of pool, for diagnostics 205 204 * @dev: device that will be doing the DMA 206 205 * @size: size of the blocks in this pool. 207 206 * @align: alignment requirement for blocks; must be a power of two 208 207 * @boundary: returned blocks won't cross this power of two boundary 208 + * @node: optional NUMA node to allocate structs 'dma_pool' and 'dma_page' on 209 209 * Context: not in_interrupt() 210 210 * 211 211 * Given one of these pools, dma_pool_alloc() ··· 223 221 * Return: a dma allocation pool with the requested characteristics, or 224 222 * %NULL if one can't be created. 225 223 */ 226 - struct dma_pool *dma_pool_create(const char *name, struct device *dev, 227 - size_t size, size_t align, size_t boundary) 224 + struct dma_pool *dma_pool_create_node(const char *name, struct device *dev, 225 + size_t size, size_t align, size_t boundary, int node) 228 226 { 229 227 struct dma_pool *retval; 230 228 size_t allocation; ··· 253 251 254 252 boundary = min(boundary, allocation); 255 253 256 - retval = kzalloc(sizeof(*retval), GFP_KERNEL); 254 + retval = kzalloc_node(sizeof(*retval), GFP_KERNEL, node); 257 255 if (!retval) 258 256 return retval; 259 257 ··· 266 264 retval->size = size; 267 265 retval->boundary = boundary; 268 266 retval->allocation = allocation; 267 + retval->node = node; 269 268 INIT_LIST_HEAD(&retval->pools); 270 269 271 270 /* ··· 298 295 mutex_unlock(&pools_reg_lock); 299 296 return retval; 300 297 } 301 - EXPORT_SYMBOL(dma_pool_create); 298 + EXPORT_SYMBOL(dma_pool_create_node); 302 299 303 300 static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page) 304 301 { ··· 338 335 { 339 336 struct dma_page *page; 340 337 341 - page = kmalloc(sizeof(*page), mem_flags); 338 + page = kmalloc_node(sizeof(*page), mem_flags, pool->node); 342 339 if (!page) 343 340 return NULL; 344 341