Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cxl/pmem: Fix nvdimm registration races

A loop of the form:

while true; do modprobe cxl_pci; modprobe -r cxl_pci; done

...fails with the following crash signature:

BUG: kernel NULL pointer dereference, address: 0000000000000040
[..]
RIP: 0010:cxl_internal_send_cmd+0x5/0xb0 [cxl_core]
[..]
Call Trace:
<TASK>
cxl_pmem_ctl+0x121/0x240 [cxl_pmem]
nvdimm_get_config_data+0xd6/0x1a0 [libnvdimm]
nd_label_data_init+0x135/0x7e0 [libnvdimm]
nvdimm_probe+0xd6/0x1c0 [libnvdimm]
nvdimm_bus_probe+0x7a/0x1e0 [libnvdimm]
really_probe+0xde/0x380
__driver_probe_device+0x78/0x170
driver_probe_device+0x1f/0x90
__device_attach_driver+0x85/0x110
bus_for_each_drv+0x7d/0xc0
__device_attach+0xb4/0x1e0
bus_probe_device+0x9f/0xc0
device_add+0x445/0x9c0
nd_async_device_register+0xe/0x40 [libnvdimm]
async_run_entry_fn+0x30/0x130

...namely that the bottom half of async nvdimm device registration runs
after the CXL has already torn down the context that cxl_pmem_ctl()
needs. Unlike the ACPI NFIT case that benefits from launching multiple
nvdimm device registrations in parallel from those listed in the table,
CXL is already marked PROBE_PREFER_ASYNCHRONOUS. So provide for a
synchronous registration path to preclude this scenario.

Fixes: 21083f51521f ("cxl/pmem: Register 'pmem' / cxl_nvdimm devices")
Cc: <stable@vger.kernel.org>
Reported-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

+25 -4
+1
drivers/cxl/pmem.c
··· 76 76 return rc; 77 77 78 78 set_bit(NDD_LABELING, &flags); 79 + set_bit(NDD_REGISTER_SYNC, &flags); 79 80 set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); 80 81 set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); 81 82 set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
+16 -3
drivers/nvdimm/bus.c
··· 508 508 put_device(dev); 509 509 } 510 510 511 - void nd_device_register(struct device *dev) 511 + static void __nd_device_register(struct device *dev, bool sync) 512 512 { 513 513 if (!dev) 514 514 return; ··· 531 531 } 532 532 get_device(dev); 533 533 534 - async_schedule_dev_domain(nd_async_device_register, dev, 535 - &nd_async_domain); 534 + if (sync) 535 + nd_async_device_register(dev, 0); 536 + else 537 + async_schedule_dev_domain(nd_async_device_register, dev, 538 + &nd_async_domain); 539 + } 540 + 541 + void nd_device_register(struct device *dev) 542 + { 543 + __nd_device_register(dev, false); 536 544 } 537 545 EXPORT_SYMBOL(nd_device_register); 546 + 547 + void nd_device_register_sync(struct device *dev) 548 + { 549 + __nd_device_register(dev, true); 550 + } 538 551 539 552 void nd_device_unregister(struct device *dev, enum nd_async_mode mode) 540 553 {
+4 -1
drivers/nvdimm/dimm_devs.c
··· 624 624 nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); 625 625 device_initialize(dev); 626 626 lockdep_set_class(&dev->mutex, &nvdimm_key); 627 - nd_device_register(dev); 627 + if (test_bit(NDD_REGISTER_SYNC, &flags)) 628 + nd_device_register_sync(dev); 629 + else 630 + nd_device_register(dev); 628 631 629 632 return nvdimm; 630 633 }
+1
drivers/nvdimm/nd-core.h
··· 107 107 void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); 108 108 void nd_synchronize(void); 109 109 void nd_device_register(struct device *dev); 110 + void nd_device_register_sync(struct device *dev); 110 111 struct nd_label_id; 111 112 char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid, 112 113 u32 flags);
+3
include/linux/libnvdimm.h
··· 41 41 */ 42 42 NDD_INCOHERENT = 7, 43 43 44 + /* dimm provider wants synchronous registration by __nvdimm_create() */ 45 + NDD_REGISTER_SYNC = 8, 46 + 44 47 /* need to set a limit somewhere, but yes, this is likely overkill */ 45 48 ND_IOCTL_MAX_BUFLEN = SZ_4M, 46 49 ND_CMD_MAX_ELEM = 5,