Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cxl/dax: Create dax devices for CXL RAM regions

While platform firmware takes some responsibility for mapping the RAM
capacity of CXL devices present at boot, the OS is responsible for
mapping the remainder and hot-added devices. Platform firmware is also
responsible for identifying the platform general purpose memory pool,
typically DDR attached DRAM, and arranging for the remainder to be 'Soft
Reserved'. That reservation allows the CXL subsystem to route the memory
to core-mm via memory-hotplug (dax_kmem), or leave it for dedicated
access (device-dax).

The new 'struct cxl_dax_region' object allows for a CXL memory resource
(region) to be published, but also allow for udev and module policy to
act on that event. It also prevents cxl_core.ko from having a module
loading dependency on any drivers/dax/ modules.

Tested-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/167602003896.1924368.10335442077318970468.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

+209 -4
+1
MAINTAINERS
··· 6035 6035 M: Vishal Verma <vishal.l.verma@intel.com> 6036 6036 M: Dave Jiang <dave.jiang@intel.com> 6037 6037 L: nvdimm@lists.linux.dev 6038 + L: linux-cxl@vger.kernel.org 6038 6039 S: Supported 6039 6040 F: drivers/dax/ 6040 6041
+2 -1
drivers/cxl/acpi.c
··· 731 731 cxl_bus_drain(); 732 732 } 733 733 734 - module_init(cxl_acpi_init); 734 + /* load before dax_hmem sees 'Soft Reserved' CXL ranges */ 735 + subsys_initcall(cxl_acpi_init); 735 736 module_exit(cxl_acpi_exit); 736 737 MODULE_LICENSE("GPL v2"); 737 738 MODULE_IMPORT_NS(CXL);
+3
drivers/cxl/core/core.h
··· 15 15 extern struct device_attribute dev_attr_delete_region; 16 16 extern struct device_attribute dev_attr_region; 17 17 extern const struct device_type cxl_pmem_region_type; 18 + extern const struct device_type cxl_dax_region_type; 18 19 extern const struct device_type cxl_region_type; 19 20 void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); 20 21 #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) 21 22 #define CXL_REGION_TYPE(x) (&cxl_region_type) 22 23 #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), 23 24 #define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type) 25 + #define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type) 24 26 int cxl_region_init(void); 25 27 void cxl_region_exit(void); 26 28 #else ··· 40 38 #define CXL_REGION_TYPE(x) NULL 41 39 #define SET_CXL_REGION_ATTR(x) 42 40 #define CXL_PMEM_REGION_TYPE(x) NULL 41 + #define CXL_DAX_REGION_TYPE(x) NULL 43 42 #endif 44 43 45 44 struct cxl_send_command;
+3 -1
drivers/cxl/core/port.c
··· 46 46 return CXL_DEVICE_NVDIMM; 47 47 if (dev->type == CXL_PMEM_REGION_TYPE()) 48 48 return CXL_DEVICE_PMEM_REGION; 49 + if (dev->type == CXL_DAX_REGION_TYPE()) 50 + return CXL_DEVICE_DAX_REGION; 49 51 if (is_cxl_port(dev)) { 50 52 if (is_cxl_root(to_cxl_port(dev))) 51 53 return CXL_DEVICE_ROOT; ··· 2017 2015 debugfs_remove_recursive(cxl_debugfs); 2018 2016 } 2019 2017 2020 - module_init(cxl_core_init); 2018 + subsys_initcall(cxl_core_init); 2021 2019 module_exit(cxl_core_exit); 2022 2020 MODULE_LICENSE("GPL v2");
+106 -2
drivers/cxl/core/region.c
··· 2278 2278 return cxlr_pmem; 2279 2279 } 2280 2280 2281 + static void cxl_dax_region_release(struct device *dev) 2282 + { 2283 + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); 2284 + 2285 + kfree(cxlr_dax); 2286 + } 2287 + 2288 + static const struct attribute_group *cxl_dax_region_attribute_groups[] = { 2289 + &cxl_base_attribute_group, 2290 + NULL, 2291 + }; 2292 + 2293 + const struct device_type cxl_dax_region_type = { 2294 + .name = "cxl_dax_region", 2295 + .release = cxl_dax_region_release, 2296 + .groups = cxl_dax_region_attribute_groups, 2297 + }; 2298 + 2299 + static bool is_cxl_dax_region(struct device *dev) 2300 + { 2301 + return dev->type == &cxl_dax_region_type; 2302 + } 2303 + 2304 + struct cxl_dax_region *to_cxl_dax_region(struct device *dev) 2305 + { 2306 + if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev), 2307 + "not a cxl_dax_region device\n")) 2308 + return NULL; 2309 + return container_of(dev, struct cxl_dax_region, dev); 2310 + } 2311 + EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL); 2312 + 2313 + static struct lock_class_key cxl_dax_region_key; 2314 + 2315 + static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) 2316 + { 2317 + struct cxl_region_params *p = &cxlr->params; 2318 + struct cxl_dax_region *cxlr_dax; 2319 + struct device *dev; 2320 + 2321 + down_read(&cxl_region_rwsem); 2322 + if (p->state != CXL_CONFIG_COMMIT) { 2323 + cxlr_dax = ERR_PTR(-ENXIO); 2324 + goto out; 2325 + } 2326 + 2327 + cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL); 2328 + if (!cxlr_dax) { 2329 + cxlr_dax = ERR_PTR(-ENOMEM); 2330 + goto out; 2331 + } 2332 + 2333 + cxlr_dax->hpa_range.start = p->res->start; 2334 + cxlr_dax->hpa_range.end = p->res->end; 2335 + 2336 + dev = &cxlr_dax->dev; 2337 + cxlr_dax->cxlr = cxlr; 2338 + device_initialize(dev); 2339 + lockdep_set_class(&dev->mutex, &cxl_dax_region_key); 2340 + device_set_pm_not_required(dev); 2341 + dev->parent = &cxlr->dev; 2342 + dev->bus = &cxl_bus_type; 2343 + dev->type = &cxl_dax_region_type; 2344 + out: 2345 + up_read(&cxl_region_rwsem); 2346 + 2347 + return cxlr_dax; 2348 + } 2349 + 2281 2350 static void cxlr_pmem_unregister(void *_cxlr_pmem) 2282 2351 { 2283 2352 struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem; ··· 2428 2359 err_bridge: 2429 2360 put_device(&cxl_nvb->dev); 2430 2361 cxlr->cxl_nvb = NULL; 2362 + return rc; 2363 + } 2364 + 2365 + static void cxlr_dax_unregister(void *_cxlr_dax) 2366 + { 2367 + struct cxl_dax_region *cxlr_dax = _cxlr_dax; 2368 + 2369 + device_unregister(&cxlr_dax->dev); 2370 + } 2371 + 2372 + static int devm_cxl_add_dax_region(struct cxl_region *cxlr) 2373 + { 2374 + struct cxl_dax_region *cxlr_dax; 2375 + struct device *dev; 2376 + int rc; 2377 + 2378 + cxlr_dax = cxl_dax_region_alloc(cxlr); 2379 + if (IS_ERR(cxlr_dax)) 2380 + return PTR_ERR(cxlr_dax); 2381 + 2382 + dev = &cxlr_dax->dev; 2383 + rc = dev_set_name(dev, "dax_region%d", cxlr->id); 2384 + if (rc) 2385 + goto err; 2386 + 2387 + rc = device_add(dev); 2388 + if (rc) 2389 + goto err; 2390 + 2391 + dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), 2392 + dev_name(dev)); 2393 + 2394 + return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister, 2395 + cxlr_dax); 2396 + err: 2397 + put_device(dev); 2431 2398 return rc; 2432 2399 } 2433 2400 ··· 2729 2624 p->res->start, p->res->end, cxlr, 2730 2625 is_system_ram) > 0) 2731 2626 return 0; 2732 - dev_dbg(dev, "TODO: hookup devdax\n"); 2733 - return 0; 2627 + return devm_cxl_add_dax_region(cxlr); 2734 2628 default: 2735 2629 dev_dbg(&cxlr->dev, "unsupported region mode: %d\n", 2736 2630 cxlr->mode);
+12
drivers/cxl/cxl.h
··· 513 513 struct cxl_pmem_region_mapping mapping[]; 514 514 }; 515 515 516 + struct cxl_dax_region { 517 + struct device dev; 518 + struct cxl_region *cxlr; 519 + struct range hpa_range; 520 + }; 521 + 516 522 /** 517 523 * struct cxl_port - logical collection of upstream port devices and 518 524 * downstream port devices to construct a CXL memory ··· 713 707 #define CXL_DEVICE_MEMORY_EXPANDER 5 714 708 #define CXL_DEVICE_REGION 6 715 709 #define CXL_DEVICE_PMEM_REGION 7 710 + #define CXL_DEVICE_DAX_REGION 8 716 711 717 712 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") 718 713 #define CXL_MODALIAS_FMT "cxl:t%d" ··· 732 725 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); 733 726 int cxl_add_to_region(struct cxl_port *root, 734 727 struct cxl_endpoint_decoder *cxled); 728 + struct cxl_dax_region *to_cxl_dax_region(struct device *dev); 735 729 #else 736 730 static inline bool is_cxl_pmem_region(struct device *dev) 737 731 { ··· 746 738 struct cxl_endpoint_decoder *cxled) 747 739 { 748 740 return 0; 741 + } 742 + static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) 743 + { 744 + return NULL; 749 745 } 750 746 #endif 751 747
+13
drivers/dax/Kconfig
··· 45 45 46 46 Say M if unsure. 47 47 48 + config DEV_DAX_CXL 49 + tristate "CXL DAX: direct access to CXL RAM regions" 50 + depends on CXL_REGION && DEV_DAX 51 + default CXL_REGION && DEV_DAX 52 + help 53 + CXL RAM regions are either mapped by platform-firmware 54 + and published in the initial system-memory map as "System RAM", mapped 55 + by platform-firmware as "Soft Reserved", or dynamically provisioned 56 + after boot by the CXL driver. In the latter two cases a device-dax 57 + instance is created to access that unmapped-by-default address range. 58 + Per usual it can remain as dedicated access via a device interface, or 59 + converted to "System RAM" via the dax_kmem facility. 60 + 48 61 config DEV_DAX_HMEM_DEVICES 49 62 depends on DEV_DAX_HMEM && DAX 50 63 def_bool y
+2
drivers/dax/Makefile
··· 3 3 obj-$(CONFIG_DEV_DAX) += device_dax.o 4 4 obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o 5 5 obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o 6 + obj-$(CONFIG_DEV_DAX_CXL) += dax_cxl.o 6 7 7 8 dax-y := super.o 8 9 dax-y += bus.o 9 10 device_dax-y := device.o 10 11 dax_pmem-y := pmem.o 12 + dax_cxl-y := cxl.o 11 13 12 14 obj-y += hmem/
+53
drivers/dax/cxl.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright(c) 2023 Intel Corporation. All rights reserved. */ 3 + #include <linux/module.h> 4 + #include <linux/dax.h> 5 + 6 + #include "../cxl/cxl.h" 7 + #include "bus.h" 8 + 9 + static int cxl_dax_region_probe(struct device *dev) 10 + { 11 + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); 12 + int nid = phys_to_target_node(cxlr_dax->hpa_range.start); 13 + struct cxl_region *cxlr = cxlr_dax->cxlr; 14 + struct dax_region *dax_region; 15 + struct dev_dax_data data; 16 + struct dev_dax *dev_dax; 17 + 18 + if (nid == NUMA_NO_NODE) 19 + nid = memory_add_physaddr_to_nid(cxlr_dax->hpa_range.start); 20 + 21 + dax_region = alloc_dax_region(dev, cxlr->id, &cxlr_dax->hpa_range, nid, 22 + PMD_SIZE, IORESOURCE_DAX_KMEM); 23 + if (!dax_region) 24 + return -ENOMEM; 25 + 26 + data = (struct dev_dax_data) { 27 + .dax_region = dax_region, 28 + .id = -1, 29 + .size = range_len(&cxlr_dax->hpa_range), 30 + }; 31 + dev_dax = devm_create_dev_dax(&data); 32 + if (IS_ERR(dev_dax)) 33 + return PTR_ERR(dev_dax); 34 + 35 + /* child dev_dax instances now own the lifetime of the dax_region */ 36 + dax_region_put(dax_region); 37 + return 0; 38 + } 39 + 40 + static struct cxl_driver cxl_dax_region_driver = { 41 + .name = "cxl_dax_region", 42 + .probe = cxl_dax_region_probe, 43 + .id = CXL_DEVICE_DAX_REGION, 44 + .drv = { 45 + .suppress_bind_attrs = true, 46 + }, 47 + }; 48 + 49 + module_cxl_driver(cxl_dax_region_driver); 50 + MODULE_ALIAS_CXL(CXL_DEVICE_DAX_REGION); 51 + MODULE_LICENSE("GPL"); 52 + MODULE_AUTHOR("Intel Corporation"); 53 + MODULE_IMPORT_NS(CXL);
+14
drivers/dax/hmem/hmem.c
··· 72 72 long id; 73 73 int rc; 74 74 75 + if (IS_ENABLED(CONFIG_CXL_REGION) && 76 + region_intersects(res->start, resource_size(res), IORESOURCE_MEM, 77 + IORES_DESC_CXL) != REGION_DISJOINT) { 78 + dev_dbg(host, "deferring range to CXL: %pr\n", res); 79 + return 0; 80 + } 81 + 75 82 rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM, 76 83 IORES_DESC_SOFT_RESERVED); 77 84 if (rc != REGION_INTERSECTS) ··· 163 156 164 157 module_init(dax_hmem_init); 165 158 module_exit(dax_hmem_exit); 159 + 160 + /* Allow for CXL to define its own dax regions */ 161 + #if IS_ENABLED(CONFIG_CXL_REGION) 162 + #if IS_MODULE(CONFIG_CXL_ACPI) 163 + MODULE_SOFTDEP("pre: cxl_acpi"); 164 + #endif 165 + #endif 166 166 167 167 MODULE_ALIAS("platform:hmem*"); 168 168 MODULE_ALIAS("platform:hmem_platform*");