Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'nvme-4.15' of git://git.infradead.org/nvme into for-4.15/block

Pull NVMe changes from Christoph:

"Below are the currently queue nvme updates for Linux 4.15. There are
a few more things that could make it for this merge window, but I'd
like to get things into linux-next, especially for the unlikely case
that Linus decided to cut -rc8.

Highlights:
- support for SGLs in the PCIe driver (Chaitanya Kulkarni)
- disable I/O schedulers for the admin queue (Israel Rukshin)
- various Fibre Channel fixes and enhancements (James Smart)
- various refactoring for better code sharing between transports
(Sagi Grimberg and me)

as well as lots of little bits from various contributors."

+1048 -525
+5 -6
block/blk-mq-tag.c
··· 298 298 } 299 299 EXPORT_SYMBOL(blk_mq_tagset_busy_iter); 300 300 301 - int blk_mq_reinit_tagset(struct blk_mq_tag_set *set, 302 - int (reinit_request)(void *, struct request *)) 301 + int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data, 302 + int (fn)(void *, struct request *)) 303 303 { 304 304 int i, j, ret = 0; 305 305 306 - if (WARN_ON_ONCE(!reinit_request)) 306 + if (WARN_ON_ONCE(!fn)) 307 307 goto out; 308 308 309 309 for (i = 0; i < set->nr_hw_queues; i++) { ··· 316 316 if (!tags->static_rqs[j]) 317 317 continue; 318 318 319 - ret = reinit_request(set->driver_data, 320 - tags->static_rqs[j]); 319 + ret = fn(data, tags->static_rqs[j]); 321 320 if (ret) 322 321 goto out; 323 322 } ··· 325 326 out: 326 327 return ret; 327 328 } 328 - EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset); 329 + EXPORT_SYMBOL_GPL(blk_mq_tagset_iter); 329 330 330 331 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, 331 332 void *priv)
+4
drivers/nvme/Kconfig
··· 1 + menu "NVME Support" 2 + 1 3 source "drivers/nvme/host/Kconfig" 2 4 source "drivers/nvme/target/Kconfig" 5 + 6 + endmenu
+106 -152
drivers/nvme/host/core.c
··· 34 34 35 35 #define NVME_MINORS (1U << MINORBITS) 36 36 37 - unsigned char admin_timeout = 60; 38 - module_param(admin_timeout, byte, 0644); 37 + unsigned int admin_timeout = 60; 38 + module_param(admin_timeout, uint, 0644); 39 39 MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); 40 40 EXPORT_SYMBOL_GPL(admin_timeout); 41 41 42 - unsigned char nvme_io_timeout = 30; 43 - module_param_named(io_timeout, nvme_io_timeout, byte, 0644); 42 + unsigned int nvme_io_timeout = 30; 43 + module_param_named(io_timeout, nvme_io_timeout, uint, 0644); 44 44 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); 45 45 EXPORT_SYMBOL_GPL(nvme_io_timeout); 46 46 ··· 51 51 static u8 nvme_max_retries = 5; 52 52 module_param_named(max_retries, nvme_max_retries, byte, 0644); 53 53 MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); 54 - 55 - static int nvme_char_major; 56 - module_param(nvme_char_major, int, 0); 57 54 58 55 static unsigned long default_ps_max_latency_us = 100000; 59 56 module_param(default_ps_max_latency_us, ulong, 0644); ··· 68 71 struct workqueue_struct *nvme_wq; 69 72 EXPORT_SYMBOL_GPL(nvme_wq); 70 73 71 - static LIST_HEAD(nvme_ctrl_list); 72 - static DEFINE_SPINLOCK(dev_list_lock); 73 - 74 + static DEFINE_IDA(nvme_instance_ida); 75 + static dev_t nvme_chr_devt; 74 76 static struct class *nvme_class; 75 77 76 78 static __le32 nvme_get_log_dw10(u8 lid, size_t size) ··· 96 100 flush_work(&ctrl->reset_work); 97 101 return ret; 98 102 } 103 + 104 + static void nvme_delete_ctrl_work(struct work_struct *work) 105 + { 106 + struct nvme_ctrl *ctrl = 107 + container_of(work, struct nvme_ctrl, delete_work); 108 + 109 + flush_work(&ctrl->reset_work); 110 + nvme_stop_ctrl(ctrl); 111 + nvme_remove_namespaces(ctrl); 112 + ctrl->ops->delete_ctrl(ctrl); 113 + nvme_uninit_ctrl(ctrl); 114 + nvme_put_ctrl(ctrl); 115 + } 116 + 117 + int nvme_delete_ctrl(struct nvme_ctrl *ctrl) 118 + { 119 + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) 120 + return -EBUSY; 121 + if (!queue_work(nvme_wq, &ctrl->delete_work)) 122 + return -EBUSY; 123 + return 0; 124 + } 125 + EXPORT_SYMBOL_GPL(nvme_delete_ctrl); 126 + 127 + int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) 128 + { 129 + int ret = 0; 130 + 131 + /* 132 + * Keep a reference until the work is flushed since ->delete_ctrl 133 + * can free the controller. 134 + */ 135 + nvme_get_ctrl(ctrl); 136 + ret = nvme_delete_ctrl(ctrl); 137 + if (!ret) 138 + flush_work(&ctrl->delete_work); 139 + nvme_put_ctrl(ctrl); 140 + return ret; 141 + } 142 + EXPORT_SYMBOL_GPL(nvme_delete_ctrl_sync); 99 143 100 144 static blk_status_t nvme_error_status(struct request *req) 101 145 { ··· 241 205 case NVME_CTRL_RECONNECTING: 242 206 switch (old_state) { 243 207 case NVME_CTRL_LIVE: 208 + case NVME_CTRL_RESETTING: 244 209 changed = true; 245 210 /* FALLTHRU */ 246 211 default: ··· 288 251 if (ns->ndev) 289 252 nvme_nvm_unregister(ns); 290 253 291 - if (ns->disk) { 292 - spin_lock(&dev_list_lock); 293 - ns->disk->private_data = NULL; 294 - spin_unlock(&dev_list_lock); 295 - } 296 - 297 254 put_disk(ns->disk); 298 255 ida_simple_remove(&ns->ctrl->ns_ida, ns->instance); 299 256 nvme_put_ctrl(ns->ctrl); ··· 297 266 static void nvme_put_ns(struct nvme_ns *ns) 298 267 { 299 268 kref_put(&ns->kref, nvme_free_ns); 300 - } 301 - 302 - static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) 303 - { 304 - struct nvme_ns *ns; 305 - 306 - spin_lock(&dev_list_lock); 307 - ns = disk->private_data; 308 - if (ns) { 309 - if (!kref_get_unless_zero(&ns->kref)) 310 - goto fail; 311 - if (!try_module_get(ns->ctrl->ops->module)) 312 - goto fail_put_ns; 313 - } 314 - spin_unlock(&dev_list_lock); 315 - 316 - return ns; 317 - 318 - fail_put_ns: 319 - kref_put(&ns->kref, nvme_free_ns); 320 - fail: 321 - spin_unlock(&dev_list_lock); 322 - return NULL; 323 269 } 324 270 325 271 struct request *nvme_alloc_request(struct request_queue *q, ··· 1060 1052 } 1061 1053 } 1062 1054 1063 - #ifdef CONFIG_COMPAT 1064 - static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, 1065 - unsigned int cmd, unsigned long arg) 1066 - { 1067 - return nvme_ioctl(bdev, mode, cmd, arg); 1068 - } 1069 - #else 1070 - #define nvme_compat_ioctl NULL 1071 - #endif 1072 - 1073 1055 static int nvme_open(struct block_device *bdev, fmode_t mode) 1074 1056 { 1075 - return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; 1057 + struct nvme_ns *ns = bdev->bd_disk->private_data; 1058 + 1059 + if (!kref_get_unless_zero(&ns->kref)) 1060 + return -ENXIO; 1061 + return 0; 1076 1062 } 1077 1063 1078 1064 static void nvme_release(struct gendisk *disk, fmode_t mode) 1079 1065 { 1080 - struct nvme_ns *ns = disk->private_data; 1081 - 1082 - module_put(ns->ctrl->ops->module); 1083 - nvme_put_ns(ns); 1066 + nvme_put_ns(disk->private_data); 1084 1067 } 1085 1068 1086 1069 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) ··· 1379 1380 static const struct block_device_operations nvme_fops = { 1380 1381 .owner = THIS_MODULE, 1381 1382 .ioctl = nvme_ioctl, 1382 - .compat_ioctl = nvme_compat_ioctl, 1383 + .compat_ioctl = nvme_ioctl, 1383 1384 .open = nvme_open, 1384 1385 .release = nvme_release, 1385 1386 .getgeo = nvme_getgeo, ··· 1929 1930 1930 1931 static int nvme_dev_open(struct inode *inode, struct file *file) 1931 1932 { 1932 - struct nvme_ctrl *ctrl; 1933 - int instance = iminor(inode); 1934 - int ret = -ENODEV; 1933 + struct nvme_ctrl *ctrl = 1934 + container_of(inode->i_cdev, struct nvme_ctrl, cdev); 1935 1935 1936 - spin_lock(&dev_list_lock); 1937 - list_for_each_entry(ctrl, &nvme_ctrl_list, node) { 1938 - if (ctrl->instance != instance) 1939 - continue; 1940 - 1941 - if (!ctrl->admin_q) { 1942 - ret = -EWOULDBLOCK; 1943 - break; 1944 - } 1945 - if (!kref_get_unless_zero(&ctrl->kref)) 1946 - break; 1947 - file->private_data = ctrl; 1948 - ret = 0; 1949 - break; 1950 - } 1951 - spin_unlock(&dev_list_lock); 1952 - 1953 - return ret; 1954 - } 1955 - 1956 - static int nvme_dev_release(struct inode *inode, struct file *file) 1957 - { 1958 - nvme_put_ctrl(file->private_data); 1936 + if (ctrl->state != NVME_CTRL_LIVE) 1937 + return -EWOULDBLOCK; 1938 + file->private_data = ctrl; 1959 1939 return 0; 1960 1940 } 1961 1941 ··· 1998 2020 static const struct file_operations nvme_dev_fops = { 1999 2021 .owner = THIS_MODULE, 2000 2022 .open = nvme_dev_open, 2001 - .release = nvme_dev_release, 2002 2023 .unlocked_ioctl = nvme_dev_ioctl, 2003 2024 .compat_ioctl = nvme_dev_ioctl, 2004 2025 }; ··· 2163 2186 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); 2164 2187 2165 2188 if (device_remove_file_self(dev, attr)) 2166 - ctrl->ops->delete_ctrl(ctrl); 2189 + nvme_delete_ctrl_sync(ctrl); 2167 2190 return count; 2168 2191 } 2169 2192 static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete); ··· 2275 2298 mutex_lock(&ctrl->namespaces_mutex); 2276 2299 list_for_each_entry(ns, &ctrl->namespaces, list) { 2277 2300 if (ns->ns_id == nsid) { 2278 - kref_get(&ns->kref); 2301 + if (!kref_get_unless_zero(&ns->kref)) 2302 + continue; 2279 2303 ret = ns; 2280 2304 break; 2281 2305 } ··· 2379 2401 list_add_tail(&ns->list, &ctrl->namespaces); 2380 2402 mutex_unlock(&ctrl->namespaces_mutex); 2381 2403 2382 - kref_get(&ctrl->kref); 2404 + nvme_get_ctrl(ctrl); 2383 2405 2384 2406 kfree(id); 2385 2407 ··· 2637 2659 return; 2638 2660 2639 2661 nvme_start_queues(ctrl); 2640 - /* read FW slot informationi to clear the AER*/ 2662 + /* read FW slot information to clear the AER */ 2641 2663 nvme_get_fw_slot_info(ctrl); 2642 2664 } 2643 2665 ··· 2684 2706 } 2685 2707 EXPORT_SYMBOL_GPL(nvme_queue_async_events); 2686 2708 2687 - static DEFINE_IDA(nvme_instance_ida); 2688 - 2689 - static int nvme_set_instance(struct nvme_ctrl *ctrl) 2690 - { 2691 - int instance, error; 2692 - 2693 - do { 2694 - if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) 2695 - return -ENODEV; 2696 - 2697 - spin_lock(&dev_list_lock); 2698 - error = ida_get_new(&nvme_instance_ida, &instance); 2699 - spin_unlock(&dev_list_lock); 2700 - } while (error == -EAGAIN); 2701 - 2702 - if (error) 2703 - return -ENODEV; 2704 - 2705 - ctrl->instance = instance; 2706 - return 0; 2707 - } 2708 - 2709 - static void nvme_release_instance(struct nvme_ctrl *ctrl) 2710 - { 2711 - spin_lock(&dev_list_lock); 2712 - ida_remove(&nvme_instance_ida, ctrl->instance); 2713 - spin_unlock(&dev_list_lock); 2714 - } 2715 - 2716 2709 void nvme_stop_ctrl(struct nvme_ctrl *ctrl) 2717 2710 { 2718 2711 nvme_stop_keep_alive(ctrl); ··· 2708 2759 2709 2760 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) 2710 2761 { 2711 - device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); 2712 - 2713 - spin_lock(&dev_list_lock); 2714 - list_del(&ctrl->node); 2715 - spin_unlock(&dev_list_lock); 2762 + cdev_device_del(&ctrl->cdev, ctrl->device); 2716 2763 } 2717 2764 EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); 2718 2765 2719 - static void nvme_free_ctrl(struct kref *kref) 2766 + static void nvme_free_ctrl(struct device *dev) 2720 2767 { 2721 - struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); 2768 + struct nvme_ctrl *ctrl = 2769 + container_of(dev, struct nvme_ctrl, ctrl_device); 2722 2770 2723 - put_device(ctrl->device); 2724 - nvme_release_instance(ctrl); 2771 + ida_simple_remove(&nvme_instance_ida, ctrl->instance); 2725 2772 ida_destroy(&ctrl->ns_ida); 2726 2773 2727 2774 ctrl->ops->free_ctrl(ctrl); 2728 2775 } 2729 - 2730 - void nvme_put_ctrl(struct nvme_ctrl *ctrl) 2731 - { 2732 - kref_put(&ctrl->kref, nvme_free_ctrl); 2733 - } 2734 - EXPORT_SYMBOL_GPL(nvme_put_ctrl); 2735 2776 2736 2777 /* 2737 2778 * Initialize a NVMe controller structures. This needs to be called during ··· 2737 2798 spin_lock_init(&ctrl->lock); 2738 2799 INIT_LIST_HEAD(&ctrl->namespaces); 2739 2800 mutex_init(&ctrl->namespaces_mutex); 2740 - kref_init(&ctrl->kref); 2741 2801 ctrl->dev = dev; 2742 2802 ctrl->ops = ops; 2743 2803 ctrl->quirks = quirks; 2744 2804 INIT_WORK(&ctrl->scan_work, nvme_scan_work); 2745 2805 INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); 2746 2806 INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); 2807 + INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work); 2747 2808 2748 - ret = nvme_set_instance(ctrl); 2749 - if (ret) 2809 + ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL); 2810 + if (ret < 0) 2750 2811 goto out; 2812 + ctrl->instance = ret; 2751 2813 2752 - ctrl->device = device_create_with_groups(nvme_class, ctrl->dev, 2753 - MKDEV(nvme_char_major, ctrl->instance), 2754 - ctrl, nvme_dev_attr_groups, 2755 - "nvme%d", ctrl->instance); 2756 - if (IS_ERR(ctrl->device)) { 2757 - ret = PTR_ERR(ctrl->device); 2814 + device_initialize(&ctrl->ctrl_device); 2815 + ctrl->device = &ctrl->ctrl_device; 2816 + ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance); 2817 + ctrl->device->class = nvme_class; 2818 + ctrl->device->parent = ctrl->dev; 2819 + ctrl->device->groups = nvme_dev_attr_groups; 2820 + ctrl->device->release = nvme_free_ctrl; 2821 + dev_set_drvdata(ctrl->device, ctrl); 2822 + ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance); 2823 + if (ret) 2758 2824 goto out_release_instance; 2759 - } 2760 - get_device(ctrl->device); 2761 - ida_init(&ctrl->ns_ida); 2762 2825 2763 - spin_lock(&dev_list_lock); 2764 - list_add_tail(&ctrl->node, &nvme_ctrl_list); 2765 - spin_unlock(&dev_list_lock); 2826 + cdev_init(&ctrl->cdev, &nvme_dev_fops); 2827 + ctrl->cdev.owner = ops->module; 2828 + ret = cdev_device_add(&ctrl->cdev, ctrl->device); 2829 + if (ret) 2830 + goto out_free_name; 2831 + 2832 + ida_init(&ctrl->ns_ida); 2766 2833 2767 2834 /* 2768 2835 * Initialize latency tolerance controls. The sysfs files won't ··· 2779 2834 min(default_ps_max_latency_us, (unsigned long)S32_MAX)); 2780 2835 2781 2836 return 0; 2837 + out_free_name: 2838 + kfree_const(dev->kobj.name); 2782 2839 out_release_instance: 2783 - nvme_release_instance(ctrl); 2840 + ida_simple_remove(&nvme_instance_ida, ctrl->instance); 2784 2841 out: 2785 2842 return ret; 2786 2843 } ··· 2891 2944 } 2892 2945 EXPORT_SYMBOL_GPL(nvme_start_queues); 2893 2946 2947 + int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set) 2948 + { 2949 + if (!ctrl->ops->reinit_request) 2950 + return 0; 2951 + 2952 + return blk_mq_tagset_iter(set, set->driver_data, 2953 + ctrl->ops->reinit_request); 2954 + } 2955 + EXPORT_SYMBOL_GPL(nvme_reinit_tagset); 2956 + 2894 2957 int __init nvme_core_init(void) 2895 2958 { 2896 2959 int result; ··· 2910 2953 if (!nvme_wq) 2911 2954 return -ENOMEM; 2912 2955 2913 - result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", 2914 - &nvme_dev_fops); 2956 + result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme"); 2915 2957 if (result < 0) 2916 2958 goto destroy_wq; 2917 - else if (result > 0) 2918 - nvme_char_major = result; 2919 2959 2920 2960 nvme_class = class_create(THIS_MODULE, "nvme"); 2921 2961 if (IS_ERR(nvme_class)) { ··· 2923 2969 return 0; 2924 2970 2925 2971 unregister_chrdev: 2926 - __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 2972 + unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); 2927 2973 destroy_wq: 2928 2974 destroy_workqueue(nvme_wq); 2929 2975 return result; ··· 2932 2978 void nvme_core_exit(void) 2933 2979 { 2934 2980 class_destroy(nvme_class); 2935 - __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 2981 + unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); 2936 2982 destroy_workqueue(nvme_wq); 2937 2983 } 2938 2984
+10 -2
drivers/nvme/host/fabrics.c
··· 548 548 { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, 549 549 { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, 550 550 { NVMF_OPT_HOST_ID, "hostid=%s" }, 551 + { NVMF_OPT_DUP_CONNECT, "duplicate_connect" }, 551 552 { NVMF_OPT_ERR, NULL } 552 553 }; 553 554 ··· 567 566 opts->nr_io_queues = num_online_cpus(); 568 567 opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY; 569 568 opts->kato = NVME_DEFAULT_KATO; 569 + opts->duplicate_connect = false; 570 570 571 571 options = o = kstrdup(buf, GFP_KERNEL); 572 572 if (!options) ··· 744 742 goto out; 745 743 } 746 744 break; 745 + case NVMF_OPT_DUP_CONNECT: 746 + opts->duplicate_connect = true; 747 + break; 747 748 default: 748 749 pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", 749 750 p); ··· 828 823 #define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN) 829 824 #define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ 830 825 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \ 831 - NVMF_OPT_HOST_ID) 826 + NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT) 832 827 833 828 static struct nvme_ctrl * 834 829 nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) ··· 845 840 ret = nvmf_parse_options(opts, buf); 846 841 if (ret) 847 842 goto out_free_opts; 843 + 844 + 845 + request_module("nvme-%s", opts->transport); 848 846 849 847 /* 850 848 * Check the generic options first as we need a valid transport for ··· 887 879 "controller returned incorrect NQN: \"%s\".\n", 888 880 ctrl->subnqn); 889 881 up_read(&nvmf_transports_rwsem); 890 - ctrl->ops->delete_ctrl(ctrl); 882 + nvme_delete_ctrl_sync(ctrl); 891 883 return ERR_PTR(-EINVAL); 892 884 } 893 885
+14
drivers/nvme/host/fabrics.h
··· 57 57 NVMF_OPT_HOST_TRADDR = 1 << 10, 58 58 NVMF_OPT_CTRL_LOSS_TMO = 1 << 11, 59 59 NVMF_OPT_HOST_ID = 1 << 12, 60 + NVMF_OPT_DUP_CONNECT = 1 << 13, 60 61 }; 61 62 62 63 /** ··· 97 96 unsigned int nr_io_queues; 98 97 unsigned int reconnect_delay; 99 98 bool discovery_nqn; 99 + bool duplicate_connect; 100 100 unsigned int kato; 101 101 struct nvmf_host *host; 102 102 int max_reconnects; ··· 132 130 struct nvme_ctrl *(*create_ctrl)(struct device *dev, 133 131 struct nvmf_ctrl_options *opts); 134 132 }; 133 + 134 + static inline bool 135 + nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, 136 + struct nvmf_ctrl_options *opts) 137 + { 138 + if (strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || 139 + strcmp(opts->host->nqn, ctrl->opts->host->nqn) || 140 + memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t))) 141 + return false; 142 + 143 + return true; 144 + } 135 145 136 146 int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); 137 147 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
+514 -188
drivers/nvme/host/fc.c
··· 45 45 46 46 #define NVMEFC_QUEUE_DELAY 3 /* ms units */ 47 47 48 + #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ 49 + 48 50 struct nvme_fc_queue { 49 51 struct nvme_fc_ctrl *ctrl; 50 52 struct device *dev; ··· 138 136 struct nvme_fc_lport *lport; 139 137 spinlock_t lock; 140 138 struct kref ref; 139 + unsigned long dev_loss_end; 141 140 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 142 141 143 142 enum nvme_fcctrl_flags { ··· 160 157 struct blk_mq_tag_set admin_tag_set; 161 158 struct blk_mq_tag_set tag_set; 162 159 163 - struct work_struct delete_work; 164 160 struct delayed_work connect_work; 165 161 166 162 struct kref ref; ··· 215 213 216 214 217 215 216 + /* 217 + * These items are short-term. They will eventually be moved into 218 + * a generic FC class. See comments in module init. 219 + */ 220 + static struct class *fc_class; 221 + static struct device *fc_udev_device; 222 + 218 223 219 224 /* *********************** FC-NVME Port Management ************************ */ 220 225 221 - static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); 222 226 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, 223 227 struct nvme_fc_queue *, unsigned int); 224 228 ··· 460 452 } 461 453 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); 462 454 463 - /** 464 - * nvme_fc_register_remoteport - transport entry point called by an 465 - * LLDD to register the existence of a NVME 466 - * subsystem FC port on its fabric. 467 - * @localport: pointer to the (registered) local port that the remote 468 - * subsystem port is connected to. 469 - * @pinfo: pointer to information about the port to be registered 470 - * @rport_p: pointer to a remote port pointer. Upon success, the routine 471 - * will allocate a nvme_fc_remote_port structure and place its 472 - * address in the remote port pointer. Upon failure, remote port 473 - * pointer will be set to 0. 474 - * 475 - * Returns: 476 - * a completion status. Must be 0 upon success; a negative errno 477 - * (ex: -ENXIO) upon failure. 455 + /* 456 + * TRADDR strings, per FC-NVME are fixed format: 457 + * "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters 458 + * udev event will only differ by prefix of what field is 459 + * being specified: 460 + * "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters 461 + * 19 + 43 + null_fudge = 64 characters 478 462 */ 479 - int 480 - nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, 481 - struct nvme_fc_port_info *pinfo, 482 - struct nvme_fc_remote_port **portptr) 463 + #define FCNVME_TRADDR_LENGTH 64 464 + 465 + static void 466 + nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport, 467 + struct nvme_fc_rport *rport) 483 468 { 484 - struct nvme_fc_lport *lport = localport_to_lport(localport); 485 - struct nvme_fc_rport *newrec; 486 - unsigned long flags; 487 - int ret, idx; 469 + char hostaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_HOST_TRADDR=...*/ 470 + char tgtaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_TRADDR=...*/ 471 + char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL }; 488 472 489 - newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), 490 - GFP_KERNEL); 491 - if (!newrec) { 492 - ret = -ENOMEM; 493 - goto out_reghost_failed; 494 - } 473 + if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY)) 474 + return; 495 475 496 - if (!nvme_fc_lport_get(lport)) { 497 - ret = -ESHUTDOWN; 498 - goto out_kfree_rport; 499 - } 500 - 501 - idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); 502 - if (idx < 0) { 503 - ret = -ENOSPC; 504 - goto out_lport_put; 505 - } 506 - 507 - INIT_LIST_HEAD(&newrec->endp_list); 508 - INIT_LIST_HEAD(&newrec->ctrl_list); 509 - INIT_LIST_HEAD(&newrec->ls_req_list); 510 - kref_init(&newrec->ref); 511 - spin_lock_init(&newrec->lock); 512 - newrec->remoteport.localport = &lport->localport; 513 - newrec->dev = lport->dev; 514 - newrec->lport = lport; 515 - newrec->remoteport.private = &newrec[1]; 516 - newrec->remoteport.port_role = pinfo->port_role; 517 - newrec->remoteport.node_name = pinfo->node_name; 518 - newrec->remoteport.port_name = pinfo->port_name; 519 - newrec->remoteport.port_id = pinfo->port_id; 520 - newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; 521 - newrec->remoteport.port_num = idx; 522 - 523 - spin_lock_irqsave(&nvme_fc_lock, flags); 524 - list_add_tail(&newrec->endp_list, &lport->endp_list); 525 - spin_unlock_irqrestore(&nvme_fc_lock, flags); 526 - 527 - *portptr = &newrec->remoteport; 528 - return 0; 529 - 530 - out_lport_put: 531 - nvme_fc_lport_put(lport); 532 - out_kfree_rport: 533 - kfree(newrec); 534 - out_reghost_failed: 535 - *portptr = NULL; 536 - return ret; 476 + snprintf(hostaddr, sizeof(hostaddr), 477 + "NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx", 478 + lport->localport.node_name, lport->localport.port_name); 479 + snprintf(tgtaddr, sizeof(tgtaddr), 480 + "NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx", 481 + rport->remoteport.node_name, rport->remoteport.port_name); 482 + kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp); 537 483 } 538 - EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); 539 484 540 485 static void 541 486 nvme_fc_free_rport(struct kref *ref) ··· 529 568 return kref_get_unless_zero(&rport->ref); 530 569 } 531 570 571 + static void 572 + nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl) 573 + { 574 + switch (ctrl->ctrl.state) { 575 + case NVME_CTRL_NEW: 576 + case NVME_CTRL_RECONNECTING: 577 + /* 578 + * As all reconnects were suppressed, schedule a 579 + * connect. 580 + */ 581 + dev_info(ctrl->ctrl.device, 582 + "NVME-FC{%d}: connectivity re-established. " 583 + "Attempting reconnect\n", ctrl->cnum); 584 + 585 + queue_delayed_work(nvme_wq, &ctrl->connect_work, 0); 586 + break; 587 + 588 + case NVME_CTRL_RESETTING: 589 + /* 590 + * Controller is already in the process of terminating the 591 + * association. No need to do anything further. The reconnect 592 + * step will naturally occur after the reset completes. 593 + */ 594 + break; 595 + 596 + default: 597 + /* no action to take - let it delete */ 598 + break; 599 + } 600 + } 601 + 602 + static struct nvme_fc_rport * 603 + nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport, 604 + struct nvme_fc_port_info *pinfo) 605 + { 606 + struct nvme_fc_rport *rport; 607 + struct nvme_fc_ctrl *ctrl; 608 + unsigned long flags; 609 + 610 + spin_lock_irqsave(&nvme_fc_lock, flags); 611 + 612 + list_for_each_entry(rport, &lport->endp_list, endp_list) { 613 + if (rport->remoteport.node_name != pinfo->node_name || 614 + rport->remoteport.port_name != pinfo->port_name) 615 + continue; 616 + 617 + if (!nvme_fc_rport_get(rport)) { 618 + rport = ERR_PTR(-ENOLCK); 619 + goto out_done; 620 + } 621 + 622 + spin_unlock_irqrestore(&nvme_fc_lock, flags); 623 + 624 + spin_lock_irqsave(&rport->lock, flags); 625 + 626 + /* has it been unregistered */ 627 + if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) { 628 + /* means lldd called us twice */ 629 + spin_unlock_irqrestore(&rport->lock, flags); 630 + nvme_fc_rport_put(rport); 631 + return ERR_PTR(-ESTALE); 632 + } 633 + 634 + rport->remoteport.port_state = FC_OBJSTATE_ONLINE; 635 + rport->dev_loss_end = 0; 636 + 637 + /* 638 + * kick off a reconnect attempt on all associations to the 639 + * remote port. A successful reconnects will resume i/o. 640 + */ 641 + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) 642 + nvme_fc_resume_controller(ctrl); 643 + 644 + spin_unlock_irqrestore(&rport->lock, flags); 645 + 646 + return rport; 647 + } 648 + 649 + rport = NULL; 650 + 651 + out_done: 652 + spin_unlock_irqrestore(&nvme_fc_lock, flags); 653 + 654 + return rport; 655 + } 656 + 657 + static inline void 658 + __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, 659 + struct nvme_fc_port_info *pinfo) 660 + { 661 + if (pinfo->dev_loss_tmo) 662 + rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo; 663 + else 664 + rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO; 665 + } 666 + 667 + /** 668 + * nvme_fc_register_remoteport - transport entry point called by an 669 + * LLDD to register the existence of a NVME 670 + * subsystem FC port on its fabric. 671 + * @localport: pointer to the (registered) local port that the remote 672 + * subsystem port is connected to. 673 + * @pinfo: pointer to information about the port to be registered 674 + * @rport_p: pointer to a remote port pointer. Upon success, the routine 675 + * will allocate a nvme_fc_remote_port structure and place its 676 + * address in the remote port pointer. Upon failure, remote port 677 + * pointer will be set to 0. 678 + * 679 + * Returns: 680 + * a completion status. Must be 0 upon success; a negative errno 681 + * (ex: -ENXIO) upon failure. 682 + */ 683 + int 684 + nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, 685 + struct nvme_fc_port_info *pinfo, 686 + struct nvme_fc_remote_port **portptr) 687 + { 688 + struct nvme_fc_lport *lport = localport_to_lport(localport); 689 + struct nvme_fc_rport *newrec; 690 + unsigned long flags; 691 + int ret, idx; 692 + 693 + if (!nvme_fc_lport_get(lport)) { 694 + ret = -ESHUTDOWN; 695 + goto out_reghost_failed; 696 + } 697 + 698 + /* 699 + * look to see if there is already a remoteport that is waiting 700 + * for a reconnect (within dev_loss_tmo) with the same WWN's. 701 + * If so, transition to it and reconnect. 702 + */ 703 + newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo); 704 + 705 + /* found an rport, but something about its state is bad */ 706 + if (IS_ERR(newrec)) { 707 + ret = PTR_ERR(newrec); 708 + goto out_lport_put; 709 + 710 + /* found existing rport, which was resumed */ 711 + } else if (newrec) { 712 + nvme_fc_lport_put(lport); 713 + __nvme_fc_set_dev_loss_tmo(newrec, pinfo); 714 + nvme_fc_signal_discovery_scan(lport, newrec); 715 + *portptr = &newrec->remoteport; 716 + return 0; 717 + } 718 + 719 + /* nothing found - allocate a new remoteport struct */ 720 + 721 + newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), 722 + GFP_KERNEL); 723 + if (!newrec) { 724 + ret = -ENOMEM; 725 + goto out_lport_put; 726 + } 727 + 728 + idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); 729 + if (idx < 0) { 730 + ret = -ENOSPC; 731 + goto out_kfree_rport; 732 + } 733 + 734 + INIT_LIST_HEAD(&newrec->endp_list); 735 + INIT_LIST_HEAD(&newrec->ctrl_list); 736 + INIT_LIST_HEAD(&newrec->ls_req_list); 737 + kref_init(&newrec->ref); 738 + spin_lock_init(&newrec->lock); 739 + newrec->remoteport.localport = &lport->localport; 740 + newrec->dev = lport->dev; 741 + newrec->lport = lport; 742 + newrec->remoteport.private = &newrec[1]; 743 + newrec->remoteport.port_role = pinfo->port_role; 744 + newrec->remoteport.node_name = pinfo->node_name; 745 + newrec->remoteport.port_name = pinfo->port_name; 746 + newrec->remoteport.port_id = pinfo->port_id; 747 + newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; 748 + newrec->remoteport.port_num = idx; 749 + __nvme_fc_set_dev_loss_tmo(newrec, pinfo); 750 + 751 + spin_lock_irqsave(&nvme_fc_lock, flags); 752 + list_add_tail(&newrec->endp_list, &lport->endp_list); 753 + spin_unlock_irqrestore(&nvme_fc_lock, flags); 754 + 755 + nvme_fc_signal_discovery_scan(lport, newrec); 756 + 757 + *portptr = &newrec->remoteport; 758 + return 0; 759 + 760 + out_kfree_rport: 761 + kfree(newrec); 762 + out_lport_put: 763 + nvme_fc_lport_put(lport); 764 + out_reghost_failed: 765 + *portptr = NULL; 766 + return ret; 767 + } 768 + EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); 769 + 532 770 static int 533 771 nvme_fc_abort_lsops(struct nvme_fc_rport *rport) 534 772 { ··· 750 590 spin_unlock_irqrestore(&rport->lock, flags); 751 591 752 592 return 0; 593 + } 594 + 595 + static void 596 + nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) 597 + { 598 + dev_info(ctrl->ctrl.device, 599 + "NVME-FC{%d}: controller connectivity lost. Awaiting " 600 + "Reconnect", ctrl->cnum); 601 + 602 + switch (ctrl->ctrl.state) { 603 + case NVME_CTRL_NEW: 604 + case NVME_CTRL_LIVE: 605 + /* 606 + * Schedule a controller reset. The reset will terminate the 607 + * association and schedule the reconnect timer. Reconnects 608 + * will be attempted until either the ctlr_loss_tmo 609 + * (max_retries * connect_delay) expires or the remoteport's 610 + * dev_loss_tmo expires. 611 + */ 612 + if (nvme_reset_ctrl(&ctrl->ctrl)) { 613 + dev_warn(ctrl->ctrl.device, 614 + "NVME-FC{%d}: Couldn't schedule reset. " 615 + "Deleting controller.\n", 616 + ctrl->cnum); 617 + nvme_delete_ctrl(&ctrl->ctrl); 618 + } 619 + break; 620 + 621 + case NVME_CTRL_RECONNECTING: 622 + /* 623 + * The association has already been terminated and the 624 + * controller is attempting reconnects. No need to do anything 625 + * futher. Reconnects will be attempted until either the 626 + * ctlr_loss_tmo (max_retries * connect_delay) expires or the 627 + * remoteport's dev_loss_tmo expires. 628 + */ 629 + break; 630 + 631 + case NVME_CTRL_RESETTING: 632 + /* 633 + * Controller is already in the process of terminating the 634 + * association. No need to do anything further. The reconnect 635 + * step will kick in naturally after the association is 636 + * terminated. 637 + */ 638 + break; 639 + 640 + case NVME_CTRL_DELETING: 641 + default: 642 + /* no action to take - let it delete */ 643 + break; 644 + } 753 645 } 754 646 755 647 /** ··· 833 621 } 834 622 portptr->port_state = FC_OBJSTATE_DELETED; 835 623 836 - /* tear down all associations to the remote port */ 837 - list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) 838 - __nvme_fc_del_ctrl(ctrl); 624 + rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ); 625 + 626 + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { 627 + /* if dev_loss_tmo==0, dev loss is immediate */ 628 + if (!portptr->dev_loss_tmo) { 629 + dev_warn(ctrl->ctrl.device, 630 + "NVME-FC{%d}: controller connectivity lost. " 631 + "Deleting controller.\n", 632 + ctrl->cnum); 633 + nvme_delete_ctrl(&ctrl->ctrl); 634 + } else 635 + nvme_fc_ctrl_connectivity_loss(ctrl); 636 + } 839 637 840 638 spin_unlock_irqrestore(&rport->lock, flags); 841 639 842 640 nvme_fc_abort_lsops(rport); 843 641 642 + /* 643 + * release the reference, which will allow, if all controllers 644 + * go away, which should only occur after dev_loss_tmo occurs, 645 + * for the rport to be torn down. 646 + */ 844 647 nvme_fc_rport_put(rport); 648 + 845 649 return 0; 846 650 } 847 651 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); 652 + 653 + /** 654 + * nvme_fc_rescan_remoteport - transport entry point called by an 655 + * LLDD to request a nvme device rescan. 656 + * @remoteport: pointer to the (registered) remote port that is to be 657 + * rescanned. 658 + * 659 + * Returns: N/A 660 + */ 661 + void 662 + nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport) 663 + { 664 + struct nvme_fc_rport *rport = remoteport_to_rport(remoteport); 665 + 666 + nvme_fc_signal_discovery_scan(rport->lport, rport); 667 + } 668 + EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport); 669 + 670 + int 671 + nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr, 672 + u32 dev_loss_tmo) 673 + { 674 + struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 675 + unsigned long flags; 676 + 677 + spin_lock_irqsave(&rport->lock, flags); 678 + 679 + if (portptr->port_state != FC_OBJSTATE_ONLINE) { 680 + spin_unlock_irqrestore(&rport->lock, flags); 681 + return -EINVAL; 682 + } 683 + 684 + /* a dev_loss_tmo of 0 (immediate) is allowed to be set */ 685 + rport->remoteport.dev_loss_tmo = dev_loss_tmo; 686 + 687 + spin_unlock_irqrestore(&rport->lock, flags); 688 + 689 + return 0; 690 + } 691 + EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss); 848 692 849 693 850 694 /* *********************** FC-NVME DMA Handling **************************** */ ··· 990 722 if (dev) 991 723 dma_unmap_sg(dev, sg, nents, dir); 992 724 } 993 - 994 725 995 726 /* *********************** FC-NVME LS Handling **************************** */ 996 727 ··· 1598 1331 struct nvme_command *sqe = &op->cmd_iu.sqe; 1599 1332 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); 1600 1333 union nvme_result result; 1601 - bool complete_rq, terminate_assoc = true; 1334 + bool terminate_assoc = true; 1602 1335 1603 1336 /* 1604 1337 * WARNING: ··· 1640 1373 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, 1641 1374 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1642 1375 1643 - if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) 1644 - status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); 1376 + if (atomic_read(&op->state) == FCPOP_STATE_ABORTED || 1377 + op->flags & FCOP_FLAGS_TERMIO) 1378 + status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1645 1379 else if (freq->status) 1646 1380 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1647 1381 ··· 1706 1438 done: 1707 1439 if (op->flags & FCOP_FLAGS_AEN) { 1708 1440 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); 1709 - complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1441 + __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1710 1442 atomic_set(&op->state, FCPOP_STATE_IDLE); 1711 1443 op->flags = FCOP_FLAGS_AEN; /* clear other flags */ 1712 1444 nvme_fc_ctrl_put(ctrl); 1713 1445 goto check_error; 1714 1446 } 1715 1447 1716 - complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1717 - if (!complete_rq) { 1718 - if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1719 - status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1720 - if (blk_queue_dying(rq->q)) 1721 - status |= cpu_to_le16(NVME_SC_DNR << 1); 1722 - } 1723 - nvme_end_request(rq, status, result); 1724 - } else 1448 + /* 1449 + * Force failures of commands if we're killing the controller 1450 + * or have an error on a command used to create an new association 1451 + */ 1452 + if (status && 1453 + (blk_queue_dying(rq->q) || 1454 + ctrl->ctrl.state == NVME_CTRL_NEW || 1455 + ctrl->ctrl.state == NVME_CTRL_RECONNECTING)) 1456 + status |= cpu_to_le16(NVME_SC_DNR << 1); 1457 + 1458 + if (__nvme_fc_fcpop_chk_teardowns(ctrl, op)) 1725 1459 __nvme_fc_final_op_cleanup(rq); 1460 + else 1461 + nvme_end_request(rq, status, result); 1726 1462 1727 1463 check_error: 1728 1464 if (terminate_assoc) ··· 2097 1825 dev_warn(ctrl->ctrl.device, 2098 1826 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); 2099 1827 2100 - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 2101 - dev_err(ctrl->ctrl.device, 2102 - "NVME-FC{%d}: error_recovery: Couldn't change state " 2103 - "to RECONNECTING\n", ctrl->cnum); 2104 - return; 2105 - } 2106 - 2107 1828 nvme_reset_ctrl(&ctrl->ctrl); 2108 1829 } 2109 1830 ··· 2107 1842 struct nvme_fc_ctrl *ctrl = op->ctrl; 2108 1843 int ret; 2109 1844 2110 - if (reserved) 1845 + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || 1846 + atomic_read(&op->state) == FCPOP_STATE_ABORTED) 2111 1847 return BLK_EH_RESET_TIMER; 2112 1848 2113 1849 ret = __nvme_fc_abort_op(ctrl, op); 2114 1850 if (ret) 2115 - /* io wasn't active to abort consider it done */ 2116 - return BLK_EH_HANDLED; 1851 + /* io wasn't active to abort */ 1852 + return BLK_EH_NOT_HANDLED; 2117 1853 2118 1854 /* 2119 1855 * we can't individually ABTS an io without affecting the queue, ··· 2125 1859 */ 2126 1860 nvme_fc_error_recovery(ctrl, "io timeout error"); 2127 1861 2128 - return BLK_EH_HANDLED; 1862 + /* 1863 + * the io abort has been initiated. Have the reset timer 1864 + * restarted and the abort completion will complete the io 1865 + * shortly. Avoids a synchronous wait while the abort finishes. 1866 + */ 1867 + return BLK_EH_RESET_TIMER; 2129 1868 } 2130 1869 2131 1870 static int ··· 2608 2337 2609 2338 nvme_fc_init_io_queues(ctrl); 2610 2339 2611 - ret = blk_mq_reinit_tagset(&ctrl->tag_set, nvme_fc_reinit_request); 2340 + ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); 2612 2341 if (ret) 2613 2342 goto out_free_io_queues; 2614 2343 ··· 2639 2368 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) 2640 2369 { 2641 2370 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2642 - u32 segs; 2643 2371 int ret; 2644 2372 bool changed; 2645 2373 2646 2374 ++ctrl->ctrl.nr_reconnects; 2375 + 2376 + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 2377 + return -ENODEV; 2647 2378 2648 2379 /* 2649 2380 * Create the admin queue ··· 2692 2419 if (ret) 2693 2420 goto out_disconnect_admin_queue; 2694 2421 2695 - segs = min_t(u32, NVME_FC_MAX_SEGMENTS, 2696 - ctrl->lport->ops->max_sgl_segments); 2697 - ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); 2422 + ctrl->ctrl.max_hw_sectors = 2423 + (ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9); 2698 2424 2699 2425 ret = nvme_init_identify(&ctrl->ctrl); 2700 2426 if (ret) ··· 2737 2465 } 2738 2466 2739 2467 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 2740 - WARN_ON_ONCE(!changed); 2741 2468 2742 2469 ctrl->ctrl.nr_reconnects = 0; 2743 2470 2744 - nvme_start_ctrl(&ctrl->ctrl); 2471 + if (changed) 2472 + nvme_start_ctrl(&ctrl->ctrl); 2745 2473 2746 2474 return 0; /* Success */ 2747 2475 ··· 2809 2537 * use blk_mq_tagset_busy_itr() and the transport routine to 2810 2538 * terminate the exchanges. 2811 2539 */ 2812 - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 2540 + if (ctrl->ctrl.state != NVME_CTRL_NEW) 2541 + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 2813 2542 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2814 2543 nvme_fc_terminate_exchange, &ctrl->ctrl); 2815 2544 ··· 2844 2571 } 2845 2572 2846 2573 static void 2847 - nvme_fc_delete_ctrl_work(struct work_struct *work) 2574 + nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) 2848 2575 { 2849 - struct nvme_fc_ctrl *ctrl = 2850 - container_of(work, struct nvme_fc_ctrl, delete_work); 2576 + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2851 2577 2852 - cancel_work_sync(&ctrl->ctrl.reset_work); 2853 2578 cancel_delayed_work_sync(&ctrl->connect_work); 2854 - nvme_stop_ctrl(&ctrl->ctrl); 2855 - nvme_remove_namespaces(&ctrl->ctrl); 2856 2579 /* 2857 2580 * kill the association on the link side. this will block 2858 2581 * waiting for io to terminate 2859 2582 */ 2860 2583 nvme_fc_delete_association(ctrl); 2861 - 2862 - /* 2863 - * tear down the controller 2864 - * After the last reference on the nvme ctrl is removed, 2865 - * the transport nvme_fc_nvme_ctrl_freed() callback will be 2866 - * invoked. From there, the transport will tear down it's 2867 - * logical queues and association. 2868 - */ 2869 - nvme_uninit_ctrl(&ctrl->ctrl); 2870 - 2871 - nvme_put_ctrl(&ctrl->ctrl); 2872 - } 2873 - 2874 - static bool 2875 - __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl) 2876 - { 2877 - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 2878 - return true; 2879 - 2880 - if (!queue_work(nvme_wq, &ctrl->delete_work)) 2881 - return true; 2882 - 2883 - return false; 2884 - } 2885 - 2886 - static int 2887 - __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) 2888 - { 2889 - return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0; 2890 - } 2891 - 2892 - /* 2893 - * Request from nvme core layer to delete the controller 2894 - */ 2895 - static int 2896 - nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) 2897 - { 2898 - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2899 - int ret; 2900 - 2901 - if (!kref_get_unless_zero(&ctrl->ctrl.kref)) 2902 - return -EBUSY; 2903 - 2904 - ret = __nvme_fc_del_ctrl(ctrl); 2905 - 2906 - if (!ret) 2907 - flush_workqueue(nvme_wq); 2908 - 2909 - nvme_put_ctrl(&ctrl->ctrl); 2910 - 2911 - return ret; 2912 2584 } 2913 2585 2914 2586 static void 2915 2587 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) 2916 2588 { 2917 - /* If we are resetting/deleting then do nothing */ 2918 - if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { 2919 - WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || 2920 - ctrl->ctrl.state == NVME_CTRL_LIVE); 2589 + struct nvme_fc_rport *rport = ctrl->rport; 2590 + struct nvme_fc_remote_port *portptr = &rport->remoteport; 2591 + unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ; 2592 + bool recon = true; 2593 + 2594 + if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) 2921 2595 return; 2922 - } 2923 2596 2924 - dev_info(ctrl->ctrl.device, 2925 - "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", 2926 - ctrl->cnum, status); 2927 - 2928 - if (nvmf_should_reconnect(&ctrl->ctrl)) { 2597 + if (portptr->port_state == FC_OBJSTATE_ONLINE) 2929 2598 dev_info(ctrl->ctrl.device, 2930 - "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", 2931 - ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); 2932 - queue_delayed_work(nvme_wq, &ctrl->connect_work, 2933 - ctrl->ctrl.opts->reconnect_delay * HZ); 2599 + "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", 2600 + ctrl->cnum, status); 2601 + else if (time_after_eq(jiffies, rport->dev_loss_end)) 2602 + recon = false; 2603 + 2604 + if (recon && nvmf_should_reconnect(&ctrl->ctrl)) { 2605 + if (portptr->port_state == FC_OBJSTATE_ONLINE) 2606 + dev_info(ctrl->ctrl.device, 2607 + "NVME-FC{%d}: Reconnect attempt in %ld " 2608 + "seconds\n", 2609 + ctrl->cnum, recon_delay / HZ); 2610 + else if (time_after(jiffies + recon_delay, rport->dev_loss_end)) 2611 + recon_delay = rport->dev_loss_end - jiffies; 2612 + 2613 + queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay); 2934 2614 } else { 2935 - dev_warn(ctrl->ctrl.device, 2615 + if (portptr->port_state == FC_OBJSTATE_ONLINE) 2616 + dev_warn(ctrl->ctrl.device, 2936 2617 "NVME-FC{%d}: Max reconnect attempts (%d) " 2937 2618 "reached. Removing controller\n", 2938 2619 ctrl->cnum, ctrl->ctrl.nr_reconnects); 2939 - WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); 2620 + else 2621 + dev_warn(ctrl->ctrl.device, 2622 + "NVME-FC{%d}: dev_loss_tmo (%d) expired " 2623 + "while waiting for remoteport connectivity. " 2624 + "Removing controller\n", ctrl->cnum, 2625 + portptr->dev_loss_tmo); 2626 + WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); 2940 2627 } 2941 2628 } 2942 2629 ··· 2908 2675 int ret; 2909 2676 2910 2677 nvme_stop_ctrl(&ctrl->ctrl); 2678 + 2911 2679 /* will block will waiting for io to terminate */ 2912 2680 nvme_fc_delete_association(ctrl); 2913 2681 2914 - ret = nvme_fc_create_association(ctrl); 2682 + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 2683 + dev_err(ctrl->ctrl.device, 2684 + "NVME-FC{%d}: error_recovery: Couldn't change state " 2685 + "to RECONNECTING\n", ctrl->cnum); 2686 + return; 2687 + } 2688 + 2689 + if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) 2690 + ret = nvme_fc_create_association(ctrl); 2691 + else 2692 + ret = -ENOTCONN; 2693 + 2915 2694 if (ret) 2916 2695 nvme_fc_reconnect_or_delete(ctrl, ret); 2917 2696 else 2918 2697 dev_info(ctrl->ctrl.device, 2919 - "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); 2698 + "NVME-FC{%d}: controller reset complete\n", 2699 + ctrl->cnum); 2920 2700 } 2921 2701 2922 2702 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { ··· 2941 2695 .reg_write32 = nvmf_reg_write32, 2942 2696 .free_ctrl = nvme_fc_nvme_ctrl_freed, 2943 2697 .submit_async_event = nvme_fc_submit_async_event, 2944 - .delete_ctrl = nvme_fc_del_nvme_ctrl, 2698 + .delete_ctrl = nvme_fc_delete_ctrl, 2945 2699 .get_address = nvmf_get_address, 2700 + .reinit_request = nvme_fc_reinit_request, 2946 2701 }; 2947 2702 2948 2703 static void ··· 2975 2728 }; 2976 2729 2977 2730 2731 + /* 2732 + * Fails a controller request if it matches an existing controller 2733 + * (association) with the same tuple: 2734 + * <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN> 2735 + * 2736 + * The ports don't need to be compared as they are intrinsically 2737 + * already matched by the port pointers supplied. 2738 + */ 2739 + static bool 2740 + nvme_fc_existing_controller(struct nvme_fc_rport *rport, 2741 + struct nvmf_ctrl_options *opts) 2742 + { 2743 + struct nvme_fc_ctrl *ctrl; 2744 + unsigned long flags; 2745 + bool found = false; 2746 + 2747 + spin_lock_irqsave(&rport->lock, flags); 2748 + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { 2749 + found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts); 2750 + if (found) 2751 + break; 2752 + } 2753 + spin_unlock_irqrestore(&rport->lock, flags); 2754 + 2755 + return found; 2756 + } 2757 + 2978 2758 static struct nvme_ctrl * 2979 2759 nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, 2980 2760 struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) ··· 3013 2739 if (!(rport->remoteport.port_role & 3014 2740 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { 3015 2741 ret = -EBADR; 2742 + goto out_fail; 2743 + } 2744 + 2745 + if (!opts->duplicate_connect && 2746 + nvme_fc_existing_controller(rport, opts)) { 2747 + ret = -EALREADY; 3016 2748 goto out_fail; 3017 2749 } 3018 2750 ··· 3044 2764 get_device(ctrl->dev); 3045 2765 kref_init(&ctrl->ref); 3046 2766 3047 - INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); 3048 2767 INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); 3049 2768 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 3050 2769 spin_lock_init(&ctrl->lock); ··· 3075 2796 ctrl->admin_tag_set.driver_data = ctrl; 3076 2797 ctrl->admin_tag_set.nr_hw_queues = 1; 3077 2798 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; 2799 + ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; 3078 2800 3079 2801 ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); 3080 2802 if (ret) ··· 3129 2849 return ERR_PTR(ret); 3130 2850 } 3131 2851 3132 - kref_get(&ctrl->ctrl.kref); 2852 + nvme_get_ctrl(&ctrl->ctrl); 3133 2853 3134 2854 dev_info(ctrl->ctrl.device, 3135 2855 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", ··· 3277 2997 3278 2998 static int __init nvme_fc_init_module(void) 3279 2999 { 3280 - return nvmf_register_transport(&nvme_fc_transport); 3000 + int ret; 3001 + 3002 + /* 3003 + * NOTE: 3004 + * It is expected that in the future the kernel will combine 3005 + * the FC-isms that are currently under scsi and now being 3006 + * added to by NVME into a new standalone FC class. The SCSI 3007 + * and NVME protocols and their devices would be under this 3008 + * new FC class. 3009 + * 3010 + * As we need something to post FC-specific udev events to, 3011 + * specifically for nvme probe events, start by creating the 3012 + * new device class. When the new standalone FC class is 3013 + * put in place, this code will move to a more generic 3014 + * location for the class. 3015 + */ 3016 + fc_class = class_create(THIS_MODULE, "fc"); 3017 + if (IS_ERR(fc_class)) { 3018 + pr_err("couldn't register class fc\n"); 3019 + return PTR_ERR(fc_class); 3020 + } 3021 + 3022 + /* 3023 + * Create a device for the FC-centric udev events 3024 + */ 3025 + fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL, 3026 + "fc_udev_device"); 3027 + if (IS_ERR(fc_udev_device)) { 3028 + pr_err("couldn't create fc_udev device!\n"); 3029 + ret = PTR_ERR(fc_udev_device); 3030 + goto out_destroy_class; 3031 + } 3032 + 3033 + ret = nvmf_register_transport(&nvme_fc_transport); 3034 + if (ret) 3035 + goto out_destroy_device; 3036 + 3037 + return 0; 3038 + 3039 + out_destroy_device: 3040 + device_destroy(fc_class, MKDEV(0, 0)); 3041 + out_destroy_class: 3042 + class_destroy(fc_class); 3043 + return ret; 3281 3044 } 3282 3045 3283 3046 static void __exit nvme_fc_exit_module(void) ··· 3333 3010 3334 3011 ida_destroy(&nvme_fc_local_port_cnt); 3335 3012 ida_destroy(&nvme_fc_ctrl_cnt); 3013 + 3014 + device_destroy(fc_class, MKDEV(0, 0)); 3015 + class_destroy(fc_class); 3336 3016 } 3337 3017 3338 3018 module_init(nvme_fc_init_module);
+21 -5
drivers/nvme/host/nvme.h
··· 15 15 #define _NVME_H 16 16 17 17 #include <linux/nvme.h> 18 + #include <linux/cdev.h> 18 19 #include <linux/pci.h> 19 20 #include <linux/kref.h> 20 21 #include <linux/blk-mq.h> 21 22 #include <linux/lightnvm.h> 22 23 #include <linux/sed-opal.h> 23 24 24 - extern unsigned char nvme_io_timeout; 25 + extern unsigned int nvme_io_timeout; 25 26 #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) 26 27 27 - extern unsigned char admin_timeout; 28 + extern unsigned int admin_timeout; 28 29 #define ADMIN_TIMEOUT (admin_timeout * HZ) 29 30 30 31 #define NVME_DEFAULT_KATO 5 ··· 128 127 struct request_queue *admin_q; 129 128 struct request_queue *connect_q; 130 129 struct device *dev; 131 - struct kref kref; 132 130 int instance; 133 131 struct blk_mq_tag_set *tagset; 134 132 struct blk_mq_tag_set *admin_tagset; 135 133 struct list_head namespaces; 136 134 struct mutex namespaces_mutex; 135 + struct device ctrl_device; 137 136 struct device *device; /* char device */ 138 - struct list_head node; 137 + struct cdev cdev; 139 138 struct ida ns_ida; 140 139 struct work_struct reset_work; 140 + struct work_struct delete_work; 141 141 142 142 struct opal_dev *opal_dev; 143 143 ··· 237 235 int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); 238 236 void (*free_ctrl)(struct nvme_ctrl *ctrl); 239 237 void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); 240 - int (*delete_ctrl)(struct nvme_ctrl *ctrl); 238 + void (*delete_ctrl)(struct nvme_ctrl *ctrl); 241 239 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); 240 + int (*reinit_request)(void *data, struct request *rq); 242 241 }; 243 242 244 243 static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) ··· 281 278 blk_mq_complete_request(req); 282 279 } 283 280 281 + static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl) 282 + { 283 + get_device(ctrl->device); 284 + } 285 + 286 + static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) 287 + { 288 + put_device(ctrl->device); 289 + } 290 + 284 291 void nvme_complete_rq(struct request *req); 285 292 void nvme_cancel_request(struct request *req, void *data, bool reserved); 286 293 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, ··· 324 311 void nvme_wait_freeze(struct nvme_ctrl *ctrl); 325 312 void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); 326 313 void nvme_start_freeze(struct nvme_ctrl *ctrl); 314 + int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set); 327 315 328 316 #define NVME_QID_ANY -1 329 317 struct request *nvme_alloc_request(struct request_queue *q, ··· 340 326 void nvme_start_keep_alive(struct nvme_ctrl *ctrl); 341 327 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); 342 328 int nvme_reset_ctrl(struct nvme_ctrl *ctrl); 329 + int nvme_delete_ctrl(struct nvme_ctrl *ctrl); 330 + int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); 343 331 344 332 #ifdef CONFIG_NVM 345 333 int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
+190 -34
drivers/nvme/host/pci.c
··· 13 13 */ 14 14 15 15 #include <linux/aer.h> 16 - #include <linux/bitops.h> 17 16 #include <linux/blkdev.h> 18 17 #include <linux/blk-mq.h> 19 18 #include <linux/blk-mq-pci.h> ··· 25 26 #include <linux/mutex.h> 26 27 #include <linux/once.h> 27 28 #include <linux/pci.h> 28 - #include <linux/poison.h> 29 29 #include <linux/t10-pi.h> 30 - #include <linux/timer.h> 31 30 #include <linux/types.h> 32 31 #include <linux/io-64-nonatomic-lo-hi.h> 33 - #include <asm/unaligned.h> 34 32 #include <linux/sed-opal.h> 35 33 36 34 #include "nvme.h" ··· 41 45 */ 42 46 #define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AERS) 43 47 48 + #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) 49 + 44 50 static int use_threaded_interrupts; 45 51 module_param(use_threaded_interrupts, int, 0); 46 52 ··· 54 56 module_param(max_host_mem_size_mb, uint, 0444); 55 57 MODULE_PARM_DESC(max_host_mem_size_mb, 56 58 "Maximum Host Memory Buffer (HMB) size per controller (in MiB)"); 59 + 60 + static unsigned int sgl_threshold = SZ_32K; 61 + module_param(sgl_threshold, uint, 0644); 62 + MODULE_PARM_DESC(sgl_threshold, 63 + "Use SGLs when average request segment size is larger or equal to " 64 + "this size. Use 0 to disable SGLs."); 57 65 58 66 static int io_queue_depth_set(const char *val, const struct kernel_param *kp); 59 67 static const struct kernel_param_ops io_queue_depth_ops = { ··· 182 178 struct nvme_iod { 183 179 struct nvme_request req; 184 180 struct nvme_queue *nvmeq; 181 + bool use_sgl; 185 182 int aborted; 186 183 int npages; /* In the PRP list. 0 means small pool in use */ 187 184 int nents; /* Used in scatterlist */ ··· 336 331 return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); 337 332 } 338 333 339 - static unsigned int nvme_iod_alloc_size(struct nvme_dev *dev, 340 - unsigned int size, unsigned int nseg) 334 + /* 335 + * Calculates the number of pages needed for the SGL segments. For example a 4k 336 + * page can accommodate 256 SGL descriptors. 337 + */ 338 + static int nvme_pci_npages_sgl(unsigned int num_seg) 341 339 { 342 - return sizeof(__le64 *) * nvme_npages(size, dev) + 343 - sizeof(struct scatterlist) * nseg; 340 + return DIV_ROUND_UP(num_seg * sizeof(struct nvme_sgl_desc), PAGE_SIZE); 344 341 } 345 342 346 - static unsigned int nvme_cmd_size(struct nvme_dev *dev) 343 + static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev, 344 + unsigned int size, unsigned int nseg, bool use_sgl) 347 345 { 348 - return sizeof(struct nvme_iod) + 349 - nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES); 346 + size_t alloc_size; 347 + 348 + if (use_sgl) 349 + alloc_size = sizeof(__le64 *) * nvme_pci_npages_sgl(nseg); 350 + else 351 + alloc_size = sizeof(__le64 *) * nvme_npages(size, dev); 352 + 353 + return alloc_size + sizeof(struct scatterlist) * nseg; 354 + } 355 + 356 + static unsigned int nvme_pci_cmd_size(struct nvme_dev *dev, bool use_sgl) 357 + { 358 + unsigned int alloc_size = nvme_pci_iod_alloc_size(dev, 359 + NVME_INT_BYTES(dev), NVME_INT_PAGES, 360 + use_sgl); 361 + 362 + return sizeof(struct nvme_iod) + alloc_size; 350 363 } 351 364 352 365 static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, ··· 448 425 nvmeq->sq_tail = tail; 449 426 } 450 427 451 - static __le64 **iod_list(struct request *req) 428 + static void **nvme_pci_iod_list(struct request *req) 452 429 { 453 430 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 454 - return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req)); 431 + return (void **)(iod->sg + blk_rq_nr_phys_segments(req)); 455 432 } 456 433 457 434 static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) ··· 461 438 unsigned int size = blk_rq_payload_bytes(rq); 462 439 463 440 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { 464 - iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); 441 + size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg, 442 + iod->use_sgl); 443 + 444 + iod->sg = kmalloc(alloc_size, GFP_ATOMIC); 465 445 if (!iod->sg) 466 446 return BLK_STS_RESOURCE; 467 447 } else { ··· 482 456 static void nvme_free_iod(struct nvme_dev *dev, struct request *req) 483 457 { 484 458 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 485 - const int last_prp = dev->ctrl.page_size / 8 - 1; 459 + const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1; 460 + dma_addr_t dma_addr = iod->first_dma, next_dma_addr; 461 + 486 462 int i; 487 - __le64 **list = iod_list(req); 488 - dma_addr_t prp_dma = iod->first_dma; 489 463 490 464 if (iod->npages == 0) 491 - dma_pool_free(dev->prp_small_pool, list[0], prp_dma); 465 + dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], 466 + dma_addr); 467 + 492 468 for (i = 0; i < iod->npages; i++) { 493 - __le64 *prp_list = list[i]; 494 - dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]); 495 - dma_pool_free(dev->prp_page_pool, prp_list, prp_dma); 496 - prp_dma = next_prp_dma; 469 + void *addr = nvme_pci_iod_list(req)[i]; 470 + 471 + if (iod->use_sgl) { 472 + struct nvme_sgl_desc *sg_list = addr; 473 + 474 + next_dma_addr = 475 + le64_to_cpu((sg_list[SGES_PER_PAGE - 1]).addr); 476 + } else { 477 + __le64 *prp_list = addr; 478 + 479 + next_dma_addr = le64_to_cpu(prp_list[last_prp]); 480 + } 481 + 482 + dma_pool_free(dev->prp_page_pool, addr, dma_addr); 483 + dma_addr = next_dma_addr; 497 484 } 498 485 499 486 if (iod->sg != iod->inline_sg) ··· 594 555 } 595 556 } 596 557 597 - static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) 558 + static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, 559 + struct request *req, struct nvme_rw_command *cmnd) 598 560 { 599 561 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 600 562 struct dma_pool *pool; ··· 606 566 u32 page_size = dev->ctrl.page_size; 607 567 int offset = dma_addr & (page_size - 1); 608 568 __le64 *prp_list; 609 - __le64 **list = iod_list(req); 569 + void **list = nvme_pci_iod_list(req); 610 570 dma_addr_t prp_dma; 611 571 int nprps, i; 572 + 573 + iod->use_sgl = false; 612 574 613 575 length -= (page_size - offset); 614 576 if (length <= 0) { 615 577 iod->first_dma = 0; 616 - return BLK_STS_OK; 578 + goto done; 617 579 } 618 580 619 581 dma_len -= (page_size - offset); ··· 629 587 630 588 if (length <= page_size) { 631 589 iod->first_dma = dma_addr; 632 - return BLK_STS_OK; 590 + goto done; 633 591 } 634 592 635 593 nprps = DIV_ROUND_UP(length, page_size); ··· 676 634 dma_len = sg_dma_len(sg); 677 635 } 678 636 637 + done: 638 + cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); 639 + cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); 640 + 679 641 return BLK_STS_OK; 680 642 681 643 bad_sgl: ··· 687 641 "Invalid SGL for payload:%d nents:%d\n", 688 642 blk_rq_payload_bytes(req), iod->nents); 689 643 return BLK_STS_IOERR; 644 + } 645 + 646 + static void nvme_pci_sgl_set_data(struct nvme_sgl_desc *sge, 647 + struct scatterlist *sg) 648 + { 649 + sge->addr = cpu_to_le64(sg_dma_address(sg)); 650 + sge->length = cpu_to_le32(sg_dma_len(sg)); 651 + sge->type = NVME_SGL_FMT_DATA_DESC << 4; 652 + } 653 + 654 + static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, 655 + dma_addr_t dma_addr, int entries) 656 + { 657 + sge->addr = cpu_to_le64(dma_addr); 658 + if (entries < SGES_PER_PAGE) { 659 + sge->length = cpu_to_le32(entries * sizeof(*sge)); 660 + sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; 661 + } else { 662 + sge->length = cpu_to_le32(PAGE_SIZE); 663 + sge->type = NVME_SGL_FMT_SEG_DESC << 4; 664 + } 665 + } 666 + 667 + static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, 668 + struct request *req, struct nvme_rw_command *cmd) 669 + { 670 + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 671 + int length = blk_rq_payload_bytes(req); 672 + struct dma_pool *pool; 673 + struct nvme_sgl_desc *sg_list; 674 + struct scatterlist *sg = iod->sg; 675 + int entries = iod->nents, i = 0; 676 + dma_addr_t sgl_dma; 677 + 678 + iod->use_sgl = true; 679 + 680 + /* setting the transfer type as SGL */ 681 + cmd->flags = NVME_CMD_SGL_METABUF; 682 + 683 + if (length == sg_dma_len(sg)) { 684 + nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg); 685 + return BLK_STS_OK; 686 + } 687 + 688 + if (entries <= (256 / sizeof(struct nvme_sgl_desc))) { 689 + pool = dev->prp_small_pool; 690 + iod->npages = 0; 691 + } else { 692 + pool = dev->prp_page_pool; 693 + iod->npages = 1; 694 + } 695 + 696 + sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); 697 + if (!sg_list) { 698 + iod->npages = -1; 699 + return BLK_STS_RESOURCE; 700 + } 701 + 702 + nvme_pci_iod_list(req)[0] = sg_list; 703 + iod->first_dma = sgl_dma; 704 + 705 + nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries); 706 + 707 + do { 708 + if (i == SGES_PER_PAGE) { 709 + struct nvme_sgl_desc *old_sg_desc = sg_list; 710 + struct nvme_sgl_desc *link = &old_sg_desc[i - 1]; 711 + 712 + sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); 713 + if (!sg_list) 714 + return BLK_STS_RESOURCE; 715 + 716 + i = 0; 717 + nvme_pci_iod_list(req)[iod->npages++] = sg_list; 718 + sg_list[i++] = *link; 719 + nvme_pci_sgl_set_seg(link, sgl_dma, entries); 720 + } 721 + 722 + nvme_pci_sgl_set_data(&sg_list[i++], sg); 723 + 724 + length -= sg_dma_len(sg); 725 + sg = sg_next(sg); 726 + entries--; 727 + } while (length > 0); 728 + 729 + WARN_ON(entries > 0); 730 + return BLK_STS_OK; 731 + } 732 + 733 + static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) 734 + { 735 + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 736 + unsigned int avg_seg_size; 737 + 738 + avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), 739 + blk_rq_nr_phys_segments(req)); 740 + 741 + if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) 742 + return false; 743 + if (!iod->nvmeq->qid) 744 + return false; 745 + if (!sgl_threshold || avg_seg_size < sgl_threshold) 746 + return false; 747 + return true; 690 748 } 691 749 692 750 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, ··· 812 662 DMA_ATTR_NO_WARN)) 813 663 goto out; 814 664 815 - ret = nvme_setup_prps(dev, req); 665 + if (nvme_pci_use_sgls(dev, req)) 666 + ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); 667 + else 668 + ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); 669 + 816 670 if (ret != BLK_STS_OK) 817 671 goto out_unmap; 818 672 ··· 836 682 goto out_unmap; 837 683 } 838 684 839 - cmnd->rw.dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); 840 - cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma); 841 685 if (blk_integrity_rq(req)) 842 686 cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); 843 687 return BLK_STS_OK; ··· 1082 930 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; 1083 931 1084 932 /* 1085 - * Note: we (ab)use the fact the the prp fields survive if no data 933 + * Note: we (ab)use the fact that the prp fields survive if no data 1086 934 * is attached to the request. 1087 935 */ 1088 936 memset(&c, 0, sizeof(c)); ··· 1103 951 int flags = NVME_QUEUE_PHYS_CONTIG; 1104 952 1105 953 /* 1106 - * Note: we (ab)use the fact the the prp fields survive if no data 954 + * Note: we (ab)use the fact that the prp fields survive if no data 1107 955 * is attached to the request. 1108 956 */ 1109 957 memset(&c, 0, sizeof(c)); ··· 1531 1379 dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1; 1532 1380 dev->admin_tagset.timeout = ADMIN_TIMEOUT; 1533 1381 dev->admin_tagset.numa_node = dev_to_node(dev->dev); 1534 - dev->admin_tagset.cmd_size = nvme_cmd_size(dev); 1382 + dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false); 1535 1383 dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; 1536 1384 dev->admin_tagset.driver_data = dev; 1537 1385 ··· 2058 1906 dev->tagset.numa_node = dev_to_node(dev->dev); 2059 1907 dev->tagset.queue_depth = 2060 1908 min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; 2061 - dev->tagset.cmd_size = nvme_cmd_size(dev); 1909 + dev->tagset.cmd_size = nvme_pci_cmd_size(dev, false); 1910 + if ((dev->ctrl.sgls & ((1 << 0) | (1 << 1))) && sgl_threshold) { 1911 + dev->tagset.cmd_size = max(dev->tagset.cmd_size, 1912 + nvme_pci_cmd_size(dev, true)); 1913 + } 2062 1914 dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; 2063 1915 dev->tagset.driver_data = dev; 2064 1916 ··· 2288 2132 { 2289 2133 dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status); 2290 2134 2291 - kref_get(&dev->ctrl.kref); 2135 + nvme_get_ctrl(&dev->ctrl); 2292 2136 nvme_dev_disable(dev, false); 2293 2137 if (!schedule_work(&dev->remove_work)) 2294 2138 nvme_put_ctrl(&dev->ctrl);
+136 -89
drivers/nvme/host/rdma.c
··· 50 50 (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS) 51 51 52 52 struct nvme_rdma_device { 53 - struct ib_device *dev; 54 - struct ib_pd *pd; 53 + struct ib_device *dev; 54 + struct ib_pd *pd; 55 55 struct kref ref; 56 56 struct list_head entry; 57 57 }; ··· 79 79 }; 80 80 81 81 enum nvme_rdma_queue_flags { 82 - NVME_RDMA_Q_LIVE = 0, 83 - NVME_RDMA_Q_DELETING = 1, 82 + NVME_RDMA_Q_ALLOCATED = 0, 83 + NVME_RDMA_Q_LIVE = 1, 84 84 }; 85 85 86 86 struct nvme_rdma_queue { ··· 105 105 106 106 /* other member variables */ 107 107 struct blk_mq_tag_set tag_set; 108 - struct work_struct delete_work; 109 108 struct work_struct err_work; 110 109 111 110 struct nvme_rdma_qe async_event_sqe; ··· 273 274 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 274 275 int ret = 0; 275 276 277 + if (WARN_ON_ONCE(!req->mr)) 278 + return 0; 279 + 276 280 ib_dereg_mr(req->mr); 277 281 278 282 req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, ··· 436 434 437 435 static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) 438 436 { 439 - struct nvme_rdma_device *dev; 440 - struct ib_device *ibdev; 437 + struct nvme_rdma_device *dev = queue->device; 438 + struct ib_device *ibdev = dev->dev; 441 439 442 - dev = queue->device; 443 - ibdev = dev->dev; 444 440 rdma_destroy_qp(queue->cm_id); 445 441 ib_free_cq(queue->ib_cq); 446 442 ··· 544 544 ret = nvme_rdma_wait_for_cm(queue); 545 545 if (ret) { 546 546 dev_info(ctrl->ctrl.device, 547 - "rdma_resolve_addr wait failed (%d).\n", ret); 547 + "rdma connection establishment failed (%d)\n", ret); 548 548 goto out_destroy_cm_id; 549 549 } 550 550 551 - clear_bit(NVME_RDMA_Q_DELETING, &queue->flags); 551 + set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags); 552 552 553 553 return 0; 554 554 ··· 568 568 569 569 static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) 570 570 { 571 - if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) 571 + if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) 572 572 return; 573 573 574 574 nvme_rdma_destroy_queue_ib(queue); ··· 670 670 return ret; 671 671 } 672 672 673 - static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin) 673 + static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, 674 + struct blk_mq_tag_set *set) 674 675 { 675 676 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); 676 - struct blk_mq_tag_set *set = admin ? 677 - &ctrl->admin_tag_set : &ctrl->tag_set; 678 677 679 678 blk_mq_free_tag_set(set); 680 679 nvme_rdma_dev_put(ctrl->device); ··· 698 699 set->driver_data = ctrl; 699 700 set->nr_hw_queues = 1; 700 701 set->timeout = ADMIN_TIMEOUT; 702 + set->flags = BLK_MQ_F_NO_SCHED; 701 703 } else { 702 704 set = &ctrl->tag_set; 703 705 memset(set, 0, sizeof(*set)); ··· 744 744 nvme_rdma_stop_queue(&ctrl->queues[0]); 745 745 if (remove) { 746 746 blk_cleanup_queue(ctrl->ctrl.admin_q); 747 - nvme_rdma_free_tagset(&ctrl->ctrl, true); 747 + nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 748 748 } 749 749 nvme_rdma_free_queue(&ctrl->queues[0]); 750 750 } ··· 774 774 goto out_free_tagset; 775 775 } 776 776 } else { 777 - error = blk_mq_reinit_tagset(&ctrl->admin_tag_set, 778 - nvme_rdma_reinit_request); 777 + error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 779 778 if (error) 780 779 goto out_free_queue; 781 780 } ··· 818 819 blk_cleanup_queue(ctrl->ctrl.admin_q); 819 820 out_free_tagset: 820 821 if (new) 821 - nvme_rdma_free_tagset(&ctrl->ctrl, true); 822 + nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 822 823 out_free_queue: 823 824 nvme_rdma_free_queue(&ctrl->queues[0]); 824 825 return error; ··· 830 831 nvme_rdma_stop_io_queues(ctrl); 831 832 if (remove) { 832 833 blk_cleanup_queue(ctrl->ctrl.connect_q); 833 - nvme_rdma_free_tagset(&ctrl->ctrl, false); 834 + nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); 834 835 } 835 836 nvme_rdma_free_io_queues(ctrl); 836 837 } ··· 854 855 goto out_free_tag_set; 855 856 } 856 857 } else { 857 - ret = blk_mq_reinit_tagset(&ctrl->tag_set, 858 - nvme_rdma_reinit_request); 858 + ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); 859 859 if (ret) 860 860 goto out_free_io_queues; 861 861 ··· 873 875 blk_cleanup_queue(ctrl->ctrl.connect_q); 874 876 out_free_tag_set: 875 877 if (new) 876 - nvme_rdma_free_tagset(&ctrl->ctrl, false); 878 + nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); 877 879 out_free_io_queues: 878 880 nvme_rdma_free_io_queues(ctrl); 879 881 return ret; ··· 912 914 ctrl->ctrl.opts->reconnect_delay * HZ); 913 915 } else { 914 916 dev_info(ctrl->ctrl.device, "Removing controller...\n"); 915 - queue_work(nvme_wq, &ctrl->delete_work); 917 + nvme_delete_ctrl(&ctrl->ctrl); 916 918 } 917 919 } 918 920 ··· 925 927 926 928 ++ctrl->ctrl.nr_reconnects; 927 929 928 - if (ctrl->ctrl.queue_count > 1) 929 - nvme_rdma_destroy_io_queues(ctrl, false); 930 - 931 - nvme_rdma_destroy_admin_queue(ctrl, false); 932 930 ret = nvme_rdma_configure_admin_queue(ctrl, false); 933 931 if (ret) 934 932 goto requeue; ··· 932 938 if (ctrl->ctrl.queue_count > 1) { 933 939 ret = nvme_rdma_configure_io_queues(ctrl, false); 934 940 if (ret) 935 - goto requeue; 941 + goto destroy_admin; 936 942 } 937 943 938 944 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); ··· 942 948 return; 943 949 } 944 950 945 - ctrl->ctrl.nr_reconnects = 0; 946 - 947 951 nvme_start_ctrl(&ctrl->ctrl); 948 952 949 - dev_info(ctrl->ctrl.device, "Successfully reconnected\n"); 953 + dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", 954 + ctrl->ctrl.nr_reconnects); 955 + 956 + ctrl->ctrl.nr_reconnects = 0; 950 957 951 958 return; 952 959 960 + destroy_admin: 961 + nvme_rdma_destroy_admin_queue(ctrl, false); 953 962 requeue: 954 963 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", 955 964 ctrl->ctrl.nr_reconnects); ··· 968 971 969 972 if (ctrl->ctrl.queue_count > 1) { 970 973 nvme_stop_queues(&ctrl->ctrl); 971 - nvme_rdma_stop_io_queues(ctrl); 972 - } 973 - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 974 - nvme_rdma_stop_queue(&ctrl->queues[0]); 975 - 976 - /* We must take care of fastfail/requeue all our inflight requests */ 977 - if (ctrl->ctrl.queue_count > 1) 978 974 blk_mq_tagset_busy_iter(&ctrl->tag_set, 979 975 nvme_cancel_request, &ctrl->ctrl); 976 + nvme_rdma_destroy_io_queues(ctrl, false); 977 + } 978 + 979 + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 980 980 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 981 981 nvme_cancel_request, &ctrl->ctrl); 982 + nvme_rdma_destroy_admin_queue(ctrl, false); 982 983 983 984 /* 984 985 * queues are not a live anymore, so restart the queues to fail fast ··· 1052 1057 if (!blk_rq_bytes(rq)) 1053 1058 return; 1054 1059 1055 - if (req->mr->need_inval) { 1060 + if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) { 1056 1061 res = nvme_rdma_inv_rkey(queue, req); 1057 1062 if (unlikely(res < 0)) { 1058 1063 dev_err(ctrl->ctrl.device, ··· 1577 1582 { 1578 1583 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 1579 1584 1585 + dev_warn(req->queue->ctrl->ctrl.device, 1586 + "I/O %d QID %d timeout, reset controller\n", 1587 + rq->tag, nvme_rdma_queue_idx(req->queue)); 1588 + 1580 1589 /* queue error recovery */ 1581 1590 nvme_rdma_error_recovery(req->queue->ctrl); 1582 1591 ··· 1755 1756 nvme_rdma_destroy_admin_queue(ctrl, shutdown); 1756 1757 } 1757 1758 1758 - static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl) 1759 + static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl) 1759 1760 { 1760 - nvme_remove_namespaces(&ctrl->ctrl); 1761 - nvme_rdma_shutdown_ctrl(ctrl, true); 1762 - nvme_uninit_ctrl(&ctrl->ctrl); 1763 - nvme_put_ctrl(&ctrl->ctrl); 1764 - } 1765 - 1766 - static void nvme_rdma_del_ctrl_work(struct work_struct *work) 1767 - { 1768 - struct nvme_rdma_ctrl *ctrl = container_of(work, 1769 - struct nvme_rdma_ctrl, delete_work); 1770 - 1771 - nvme_stop_ctrl(&ctrl->ctrl); 1772 - nvme_rdma_remove_ctrl(ctrl); 1773 - } 1774 - 1775 - static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) 1776 - { 1777 - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 1778 - return -EBUSY; 1779 - 1780 - if (!queue_work(nvme_wq, &ctrl->delete_work)) 1781 - return -EBUSY; 1782 - 1783 - return 0; 1784 - } 1785 - 1786 - static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) 1787 - { 1788 - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); 1789 - int ret = 0; 1790 - 1791 - /* 1792 - * Keep a reference until all work is flushed since 1793 - * __nvme_rdma_del_ctrl can free the ctrl mem 1794 - */ 1795 - if (!kref_get_unless_zero(&ctrl->ctrl.kref)) 1796 - return -EBUSY; 1797 - ret = __nvme_rdma_del_ctrl(ctrl); 1798 - if (!ret) 1799 - flush_work(&ctrl->delete_work); 1800 - nvme_put_ctrl(&ctrl->ctrl); 1801 - return ret; 1761 + nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true); 1802 1762 } 1803 1763 1804 1764 static void nvme_rdma_reset_ctrl_work(struct work_struct *work) ··· 1781 1823 } 1782 1824 1783 1825 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 1784 - WARN_ON_ONCE(!changed); 1826 + if (!changed) { 1827 + /* state change failure is ok if we're in DELETING state */ 1828 + WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); 1829 + return; 1830 + } 1785 1831 1786 1832 nvme_start_ctrl(&ctrl->ctrl); 1787 1833 ··· 1793 1831 1794 1832 out_fail: 1795 1833 dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); 1796 - nvme_rdma_remove_ctrl(ctrl); 1834 + nvme_remove_namespaces(&ctrl->ctrl); 1835 + nvme_rdma_shutdown_ctrl(ctrl, true); 1836 + nvme_uninit_ctrl(&ctrl->ctrl); 1837 + nvme_put_ctrl(&ctrl->ctrl); 1797 1838 } 1798 1839 1799 1840 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { ··· 1808 1843 .reg_write32 = nvmf_reg_write32, 1809 1844 .free_ctrl = nvme_rdma_free_ctrl, 1810 1845 .submit_async_event = nvme_rdma_submit_async_event, 1811 - .delete_ctrl = nvme_rdma_del_ctrl, 1846 + .delete_ctrl = nvme_rdma_delete_ctrl, 1812 1847 .get_address = nvmf_get_address, 1848 + .reinit_request = nvme_rdma_reinit_request, 1813 1849 }; 1850 + 1851 + static inline bool 1852 + __nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl, 1853 + struct nvmf_ctrl_options *opts) 1854 + { 1855 + char *stdport = __stringify(NVME_RDMA_IP_PORT); 1856 + 1857 + 1858 + if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) || 1859 + strcmp(opts->traddr, ctrl->ctrl.opts->traddr)) 1860 + return false; 1861 + 1862 + if (opts->mask & NVMF_OPT_TRSVCID && 1863 + ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { 1864 + if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid)) 1865 + return false; 1866 + } else if (opts->mask & NVMF_OPT_TRSVCID) { 1867 + if (strcmp(opts->trsvcid, stdport)) 1868 + return false; 1869 + } else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { 1870 + if (strcmp(stdport, ctrl->ctrl.opts->trsvcid)) 1871 + return false; 1872 + } 1873 + /* else, it's a match as both have stdport. Fall to next checks */ 1874 + 1875 + /* 1876 + * checking the local address is rough. In most cases, one 1877 + * is not specified and the host port is selected by the stack. 1878 + * 1879 + * Assume no match if: 1880 + * local address is specified and address is not the same 1881 + * local address is not specified but remote is, or vice versa 1882 + * (admin using specific host_traddr when it matters). 1883 + */ 1884 + if (opts->mask & NVMF_OPT_HOST_TRADDR && 1885 + ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) { 1886 + if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr)) 1887 + return false; 1888 + } else if (opts->mask & NVMF_OPT_HOST_TRADDR || 1889 + ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) 1890 + return false; 1891 + /* 1892 + * if neither controller had an host port specified, assume it's 1893 + * a match as everything else matched. 1894 + */ 1895 + 1896 + return true; 1897 + } 1898 + 1899 + /* 1900 + * Fails a connection request if it matches an existing controller 1901 + * (association) with the same tuple: 1902 + * <Host NQN, Host ID, local address, remote address, remote port, SUBSYS NQN> 1903 + * 1904 + * if local address is not specified in the request, it will match an 1905 + * existing controller with all the other parameters the same and no 1906 + * local port address specified as well. 1907 + * 1908 + * The ports don't need to be compared as they are intrinsically 1909 + * already matched by the port pointers supplied. 1910 + */ 1911 + static bool 1912 + nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts) 1913 + { 1914 + struct nvme_rdma_ctrl *ctrl; 1915 + bool found = false; 1916 + 1917 + mutex_lock(&nvme_rdma_ctrl_mutex); 1918 + list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { 1919 + found = __nvme_rdma_options_match(ctrl, opts); 1920 + if (found) 1921 + break; 1922 + } 1923 + mutex_unlock(&nvme_rdma_ctrl_mutex); 1924 + 1925 + return found; 1926 + } 1814 1927 1815 1928 static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, 1816 1929 struct nvmf_ctrl_options *opts) ··· 1926 1883 } 1927 1884 } 1928 1885 1886 + if (!opts->duplicate_connect && nvme_rdma_existing_controller(opts)) { 1887 + ret = -EALREADY; 1888 + goto out_free_ctrl; 1889 + } 1890 + 1929 1891 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 1930 1892 0 /* no quirks, we're perfect! */); 1931 1893 if (ret) ··· 1939 1891 INIT_DELAYED_WORK(&ctrl->reconnect_work, 1940 1892 nvme_rdma_reconnect_ctrl_work); 1941 1893 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); 1942 - INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work); 1943 1894 INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work); 1944 1895 1945 1896 ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ ··· 1997 1950 dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", 1998 1951 ctrl->ctrl.opts->subsysnqn, &ctrl->addr); 1999 1952 2000 - kref_get(&ctrl->ctrl.kref); 1953 + nvme_get_ctrl(&ctrl->ctrl); 2001 1954 2002 1955 mutex_lock(&nvme_rdma_ctrl_mutex); 2003 1956 list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); ··· 2042 1995 dev_info(ctrl->ctrl.device, 2043 1996 "Removing ctrl: NQN \"%s\", addr %pISp\n", 2044 1997 ctrl->ctrl.opts->subsysnqn, &ctrl->addr); 2045 - __nvme_rdma_del_ctrl(ctrl); 1998 + nvme_delete_ctrl(&ctrl->ctrl); 2046 1999 } 2047 2000 mutex_unlock(&nvme_rdma_ctrl_mutex); 2048 2001
+13
drivers/nvme/target/core.c
··· 57 57 return 0; 58 58 } 59 59 60 + static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 61 + { 62 + struct nvmet_ns *ns; 63 + 64 + if (list_empty(&subsys->namespaces)) 65 + return 0; 66 + 67 + ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); 68 + return ns->nsid; 69 + } 70 + 60 71 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 61 72 { 62 73 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); ··· 345 334 346 335 ns->enabled = false; 347 336 list_del_rcu(&ns->dev_link); 337 + if (ns->nsid == subsys->max_nsid) 338 + subsys->max_nsid = nvmet_max_nsid(subsys); 348 339 mutex_unlock(&subsys->lock); 349 340 350 341 /*
+14 -2
drivers/nvme/target/fc.c
··· 150 150 struct list_head a_list; 151 151 struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; 152 152 struct kref ref; 153 + struct work_struct del_work; 153 154 }; 154 155 155 156 ··· 233 232 static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); 234 233 static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, 235 234 struct nvmet_fc_fcp_iod *fod); 235 + static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc); 236 236 237 237 238 238 /* *********************** FC-NVME DMA Handling **************************** */ ··· 804 802 return NULL; 805 803 } 806 804 805 + static void 806 + nvmet_fc_delete_assoc(struct work_struct *work) 807 + { 808 + struct nvmet_fc_tgt_assoc *assoc = 809 + container_of(work, struct nvmet_fc_tgt_assoc, del_work); 810 + 811 + nvmet_fc_delete_target_assoc(assoc); 812 + nvmet_fc_tgt_a_put(assoc); 813 + } 814 + 807 815 static struct nvmet_fc_tgt_assoc * 808 816 nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) 809 817 { ··· 838 826 assoc->a_id = idx; 839 827 INIT_LIST_HEAD(&assoc->a_list); 840 828 kref_init(&assoc->ref); 829 + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); 841 830 842 831 while (needrandom) { 843 832 get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); ··· 1131 1118 nvmet_fc_tgtport_put(tgtport); 1132 1119 1133 1120 if (found_ctrl) { 1134 - nvmet_fc_delete_target_assoc(assoc); 1135 - nvmet_fc_tgt_a_put(assoc); 1121 + schedule_work(&assoc->del_work); 1136 1122 return; 1137 1123 } 1138 1124
+7 -40
drivers/nvme/target/loop.c
··· 53 53 struct nvme_ctrl ctrl; 54 54 55 55 struct nvmet_ctrl *target_ctrl; 56 - struct work_struct delete_work; 57 56 }; 58 57 59 58 static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) ··· 364 365 ctrl->admin_tag_set.driver_data = ctrl; 365 366 ctrl->admin_tag_set.nr_hw_queues = 1; 366 367 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; 368 + ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; 367 369 368 370 ctrl->queues[0].ctrl = ctrl; 369 371 error = nvmet_sq_init(&ctrl->queues[0].nvme_sq); ··· 438 438 nvme_loop_destroy_admin_queue(ctrl); 439 439 } 440 440 441 - static void nvme_loop_del_ctrl_work(struct work_struct *work) 441 + static void nvme_loop_delete_ctrl_host(struct nvme_ctrl *ctrl) 442 442 { 443 - struct nvme_loop_ctrl *ctrl = container_of(work, 444 - struct nvme_loop_ctrl, delete_work); 445 - 446 - nvme_stop_ctrl(&ctrl->ctrl); 447 - nvme_remove_namespaces(&ctrl->ctrl); 448 - nvme_loop_shutdown_ctrl(ctrl); 449 - nvme_uninit_ctrl(&ctrl->ctrl); 450 - nvme_put_ctrl(&ctrl->ctrl); 451 - } 452 - 453 - static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl) 454 - { 455 - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 456 - return -EBUSY; 457 - 458 - if (!queue_work(nvme_wq, &ctrl->delete_work)) 459 - return -EBUSY; 460 - 461 - return 0; 462 - } 463 - 464 - static int nvme_loop_del_ctrl(struct nvme_ctrl *nctrl) 465 - { 466 - struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl); 467 - int ret; 468 - 469 - ret = __nvme_loop_del_ctrl(ctrl); 470 - if (ret) 471 - return ret; 472 - 473 - flush_work(&ctrl->delete_work); 474 - 475 - return 0; 443 + nvme_loop_shutdown_ctrl(to_loop_ctrl(ctrl)); 476 444 } 477 445 478 446 static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl) ··· 450 482 mutex_lock(&nvme_loop_ctrl_mutex); 451 483 list_for_each_entry(ctrl, &nvme_loop_ctrl_list, list) { 452 484 if (ctrl->ctrl.cntlid == nctrl->cntlid) 453 - __nvme_loop_del_ctrl(ctrl); 485 + nvme_delete_ctrl(&ctrl->ctrl); 454 486 } 455 487 mutex_unlock(&nvme_loop_ctrl_mutex); 456 488 } ··· 506 538 .reg_write32 = nvmf_reg_write32, 507 539 .free_ctrl = nvme_loop_free_ctrl, 508 540 .submit_async_event = nvme_loop_submit_async_event, 509 - .delete_ctrl = nvme_loop_del_ctrl, 541 + .delete_ctrl = nvme_loop_delete_ctrl_host, 510 542 }; 511 543 512 544 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) ··· 568 600 ctrl->ctrl.opts = opts; 569 601 INIT_LIST_HEAD(&ctrl->list); 570 602 571 - INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work); 572 603 INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work); 573 604 574 605 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops, ··· 608 641 dev_info(ctrl->ctrl.device, 609 642 "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn); 610 643 611 - kref_get(&ctrl->ctrl.kref); 644 + nvme_get_ctrl(&ctrl->ctrl); 612 645 613 646 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 614 647 WARN_ON_ONCE(!changed); ··· 697 730 698 731 mutex_lock(&nvme_loop_ctrl_mutex); 699 732 list_for_each_entry_safe(ctrl, next, &nvme_loop_ctrl_list, list) 700 - __nvme_loop_del_ctrl(ctrl); 733 + nvme_delete_ctrl(&ctrl->ctrl); 701 734 mutex_unlock(&nvme_loop_ctrl_mutex); 702 735 703 736 flush_workqueue(nvme_wq);
+1 -1
drivers/nvme/target/nvmet.h
··· 314 314 u32 nvmet_get_log_page_len(struct nvme_command *cmd); 315 315 316 316 #define NVMET_QUEUE_SIZE 1024 317 - #define NVMET_NR_QUEUES 64 317 + #define NVMET_NR_QUEUES 128 318 318 #define NVMET_MAX_CMD NVMET_QUEUE_SIZE 319 319 #define NVMET_KAS 10 320 320 #define NVMET_DISC_KATO 120
+2 -2
include/linux/blk-mq.h
··· 272 272 void blk_mq_freeze_queue_wait(struct request_queue *q); 273 273 int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, 274 274 unsigned long timeout); 275 - int blk_mq_reinit_tagset(struct blk_mq_tag_set *set, 276 - int (reinit_request)(void *, struct request *)); 275 + int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data, 276 + int (reinit_request)(void *, struct request *)); 277 277 278 278 int blk_mq_map_queues(struct blk_mq_tag_set *set); 279 279 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
+11 -4
include/linux/nvme-fc-driver.h
··· 40 40 * @node_name: FC WWNN for the port 41 41 * @port_name: FC WWPN for the port 42 42 * @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx) 43 + * @dev_loss_tmo: maximum delay for reconnects to an association on 44 + * this device. Used only on a remoteport. 43 45 * 44 46 * Initialization values for dynamic port fields: 45 47 * @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must ··· 52 50 u64 port_name; 53 51 u32 port_role; 54 52 u32 port_id; 53 + u32 dev_loss_tmo; 55 54 }; 56 55 57 56 ··· 104 101 NVMEFC_FCP_READ, 105 102 }; 106 103 107 - 108 - #define NVME_FC_MAX_SEGMENTS 256 109 104 110 105 /** 111 106 * struct nvmefc_fcp_req - Request structure passed from NVME-FC transport ··· 203 202 * The length of the buffer corresponds to the local_priv_sz 204 203 * value specified in the nvme_fc_port_template supplied by 205 204 * the LLDD. 205 + * @dev_loss_tmo: maximum delay for reconnects to an association on 206 + * this device. To modify, lldd must call 207 + * nvme_fc_set_remoteport_devloss(). 206 208 * 207 209 * Fields with dynamic values. Values may change base on link state. LLDD 208 210 * may reference fields directly to change them. Initialized by the ··· 263 259 u32 port_role; 264 260 u64 node_name; 265 261 u64 port_name; 266 - 267 262 struct nvme_fc_local_port *localport; 268 - 269 263 void *private; 264 + u32 dev_loss_tmo; 270 265 271 266 /* dynamic fields */ 272 267 u32 port_id; ··· 449 446 450 447 int nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *remoteport); 451 448 449 + void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport); 450 + 451 + int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport, 452 + u32 dev_loss_tmo); 452 453 453 454 454 455 /*