Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

acpi, nfit: fix module unload vs workqueue shutdown race

The workqueue may still be running when the devres callbacks start
firing to deallocate an acpi_nfit_desc instance. Stop and flush the
workqueue before letting any other devres de-allocations proceed.

Reported-by: Linda Knippers <linda.knippers@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

+52 -31
+47 -31
drivers/acpi/nfit/core.c
··· 2604 2604 return rc; 2605 2605 } 2606 2606 2607 - queue_work(nfit_wq, &acpi_desc->work); 2607 + if (!acpi_desc->cancel) 2608 + queue_work(nfit_wq, &acpi_desc->work); 2608 2609 return 0; 2609 2610 } 2610 2611 ··· 2651 2650 return 0; 2652 2651 } 2653 2652 2654 - static void acpi_nfit_destruct(void *data) 2653 + static void acpi_nfit_unregister(void *data) 2655 2654 { 2656 2655 struct acpi_nfit_desc *acpi_desc = data; 2657 - struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); 2658 2656 2659 - /* 2660 - * Destruct under acpi_desc_lock so that nfit_handle_mce does not 2661 - * race teardown 2662 - */ 2663 - mutex_lock(&acpi_desc_lock); 2664 - acpi_desc->cancel = 1; 2665 - /* 2666 - * Bounce the nvdimm bus lock to make sure any in-flight 2667 - * acpi_nfit_ars_rescan() submissions have had a chance to 2668 - * either submit or see ->cancel set. 2669 - */ 2670 - device_lock(bus_dev); 2671 - device_unlock(bus_dev); 2672 - 2673 - flush_workqueue(nfit_wq); 2674 - if (acpi_desc->scrub_count_state) 2675 - sysfs_put(acpi_desc->scrub_count_state); 2676 2657 nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 2677 - acpi_desc->nvdimm_bus = NULL; 2678 - list_del(&acpi_desc->list); 2679 - mutex_unlock(&acpi_desc_lock); 2680 2658 } 2681 2659 2682 2660 int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz) ··· 2673 2693 if (!acpi_desc->nvdimm_bus) 2674 2694 return -ENOMEM; 2675 2695 2676 - rc = devm_add_action_or_reset(dev, acpi_nfit_destruct, 2696 + rc = devm_add_action_or_reset(dev, acpi_nfit_unregister, 2677 2697 acpi_desc); 2678 2698 if (rc) 2679 2699 return rc; ··· 2767 2787 2768 2788 /* bounce the init_mutex to make init_complete valid */ 2769 2789 mutex_lock(&acpi_desc->init_mutex); 2770 - mutex_unlock(&acpi_desc->init_mutex); 2771 - if (acpi_desc->init_complete) 2790 + if (acpi_desc->cancel || acpi_desc->init_complete) { 2791 + mutex_unlock(&acpi_desc->init_mutex); 2772 2792 return 0; 2793 + } 2773 2794 2774 2795 /* 2775 2796 * Scrub work could take 10s of seconds, userspace may give up so we ··· 2779 2798 INIT_WORK_ONSTACK(&flush.work, flush_probe); 2780 2799 COMPLETION_INITIALIZER_ONSTACK(flush.cmp); 2781 2800 queue_work(nfit_wq, &flush.work); 2801 + mutex_unlock(&acpi_desc->init_mutex); 2782 2802 2783 2803 rc = wait_for_completion_interruptible(&flush.cmp); 2784 2804 cancel_work_sync(&flush.work); ··· 2816 2834 if (work_busy(&acpi_desc->work)) 2817 2835 return -EBUSY; 2818 2836 2819 - if (acpi_desc->cancel) 2820 - return 0; 2821 - 2822 2837 mutex_lock(&acpi_desc->init_mutex); 2838 + if (acpi_desc->cancel) { 2839 + mutex_unlock(&acpi_desc->init_mutex); 2840 + return 0; 2841 + } 2842 + 2823 2843 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 2824 2844 struct acpi_nfit_system_address *spa = nfit_spa->spa; 2825 2845 ··· 2870 2886 acpi_put_table(table); 2871 2887 } 2872 2888 2889 + void acpi_nfit_shutdown(void *data) 2890 + { 2891 + struct acpi_nfit_desc *acpi_desc = data; 2892 + struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); 2893 + 2894 + /* 2895 + * Destruct under acpi_desc_lock so that nfit_handle_mce does not 2896 + * race teardown 2897 + */ 2898 + mutex_lock(&acpi_desc_lock); 2899 + list_del(&acpi_desc->list); 2900 + mutex_unlock(&acpi_desc_lock); 2901 + 2902 + mutex_lock(&acpi_desc->init_mutex); 2903 + acpi_desc->cancel = 1; 2904 + mutex_unlock(&acpi_desc->init_mutex); 2905 + 2906 + /* 2907 + * Bounce the nvdimm bus lock to make sure any in-flight 2908 + * acpi_nfit_ars_rescan() submissions have had a chance to 2909 + * either submit or see ->cancel set. 2910 + */ 2911 + device_lock(bus_dev); 2912 + device_unlock(bus_dev); 2913 + 2914 + flush_workqueue(nfit_wq); 2915 + } 2916 + EXPORT_SYMBOL_GPL(acpi_nfit_shutdown); 2917 + 2873 2918 static int acpi_nfit_add(struct acpi_device *adev) 2874 2919 { 2875 2920 struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; ··· 2946 2933 rc = acpi_nfit_init(acpi_desc, (void *) tbl 2947 2934 + sizeof(struct acpi_table_nfit), 2948 2935 sz - sizeof(struct acpi_table_nfit)); 2949 - return rc; 2936 + 2937 + if (rc) 2938 + return rc; 2939 + return devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc); 2950 2940 } 2951 2941 2952 2942 static int acpi_nfit_remove(struct acpi_device *adev) 2953 2943 { 2954 - /* see acpi_nfit_destruct */ 2944 + /* see acpi_nfit_unregister */ 2955 2945 return 0; 2956 2946 } 2957 2947
+1
drivers/acpi/nfit/nfit.h
··· 239 239 240 240 const u8 *to_nfit_uuid(enum nfit_uuids id); 241 241 int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); 242 + void acpi_nfit_shutdown(void *data); 242 243 void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event); 243 244 void __acpi_nvdimm_notify(struct device *dev, u32 event); 244 245 int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
+4
tools/testing/nvdimm/test/nfit.c
··· 1851 1851 if (rc) 1852 1852 return rc; 1853 1853 1854 + rc = devm_add_action_or_reset(&pdev->dev, acpi_nfit_shutdown, acpi_desc); 1855 + if (rc) 1856 + return rc; 1857 + 1854 1858 if (nfit_test->setup != nfit_test0_setup) 1855 1859 return 0; 1856 1860