Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NVMe: Fix hot cpu notification dead lock

There is a potential dead lock if a cpu event occurs during nvme probe
since it registered with hot cpu notification. This fixes the race by
having the module register with notification outside of probe rather
than have each device register.

The actual work is done in a scheduled work queue instead of in the
notifier since assigning IO queues has the potential to block if the
driver creates additional queues.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>

authored by

Keith Busch and committed by
Matthew Wilcox
f3db22fe bd67608a

+26 -11
+25 -10
drivers/block/nvme-core.c
··· 73 73 static struct task_struct *nvme_thread; 74 74 static struct workqueue_struct *nvme_workq; 75 75 static wait_queue_head_t nvme_kthread_wait; 76 + static struct notifier_block nvme_nb; 76 77 77 78 static void nvme_reset_failed_dev(struct work_struct *ws); 78 79 ··· 2116 2115 return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); 2117 2116 } 2118 2117 2118 + static void nvme_cpu_workfn(struct work_struct *work) 2119 + { 2120 + struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work); 2121 + if (dev->initialized) 2122 + nvme_assign_io_queues(dev); 2123 + } 2124 + 2119 2125 static int nvme_cpu_notify(struct notifier_block *self, 2120 2126 unsigned long action, void *hcpu) 2121 2127 { 2122 - struct nvme_dev *dev = container_of(self, struct nvme_dev, nb); 2128 + struct nvme_dev *dev; 2129 + 2123 2130 switch (action) { 2124 2131 case CPU_ONLINE: 2125 2132 case CPU_DEAD: 2126 - nvme_assign_io_queues(dev); 2133 + spin_lock(&dev_list_lock); 2134 + list_for_each_entry(dev, &dev_list, node) 2135 + schedule_work(&dev->cpu_work); 2136 + spin_unlock(&dev_list_lock); 2127 2137 break; 2128 2138 } 2129 2139 return NOTIFY_OK; ··· 2202 2190 /* Free previously allocated queues that are no longer usable */ 2203 2191 nvme_free_queues(dev, nr_io_queues + 1); 2204 2192 nvme_assign_io_queues(dev); 2205 - 2206 - dev->nb.notifier_call = &nvme_cpu_notify; 2207 - result = register_hotcpu_notifier(&dev->nb); 2208 - if (result) 2209 - goto free_queues; 2210 2193 2211 2194 return 0; 2212 2195 ··· 2502 2495 int i; 2503 2496 2504 2497 dev->initialized = 0; 2505 - unregister_hotcpu_notifier(&dev->nb); 2506 - 2507 2498 nvme_dev_list_remove(dev); 2508 2499 2509 2500 if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) { ··· 2772 2767 INIT_LIST_HEAD(&dev->namespaces); 2773 2768 dev->reset_workfn = nvme_reset_failed_dev; 2774 2769 INIT_WORK(&dev->reset_work, nvme_reset_workfn); 2770 + INIT_WORK(&dev->cpu_work, nvme_cpu_workfn); 2775 2771 dev->pci_dev = pdev; 2776 2772 pci_set_drvdata(pdev, dev); 2777 2773 result = nvme_set_instance(dev); ··· 2842 2836 2843 2837 pci_set_drvdata(pdev, NULL); 2844 2838 flush_work(&dev->reset_work); 2839 + flush_work(&dev->cpu_work); 2845 2840 misc_deregister(&dev->miscdev); 2846 2841 nvme_dev_remove(dev); 2847 2842 nvme_dev_shutdown(dev); ··· 2930 2923 else if (result > 0) 2931 2924 nvme_major = result; 2932 2925 2933 - result = pci_register_driver(&nvme_driver); 2926 + nvme_nb.notifier_call = &nvme_cpu_notify; 2927 + result = register_hotcpu_notifier(&nvme_nb); 2934 2928 if (result) 2935 2929 goto unregister_blkdev; 2930 + 2931 + result = pci_register_driver(&nvme_driver); 2932 + if (result) 2933 + goto unregister_hotcpu; 2936 2934 return 0; 2937 2935 2936 + unregister_hotcpu: 2937 + unregister_hotcpu_notifier(&nvme_nb); 2938 2938 unregister_blkdev: 2939 2939 unregister_blkdev(nvme_major, "nvme"); 2940 2940 kill_workq: ··· 2952 2938 static void __exit nvme_exit(void) 2953 2939 { 2954 2940 pci_unregister_driver(&nvme_driver); 2941 + unregister_hotcpu_notifier(&nvme_nb); 2955 2942 unregister_blkdev(nvme_major, "nvme"); 2956 2943 destroy_workqueue(nvme_workq); 2957 2944 BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
+1 -1
include/linux/nvme.h
··· 90 90 struct miscdevice miscdev; 91 91 work_func_t reset_workfn; 92 92 struct work_struct reset_work; 93 - struct notifier_block nb; 93 + struct work_struct cpu_work; 94 94 char name[12]; 95 95 char serial[20]; 96 96 char model[40];