commit 01ecadbe09b6c685de413ada8ba6688e9467c4b3 · tjh.dev/kernel

+6

Documentation/ABI/stable/sysfs-devices-node

··· 177 177 The cache write policy: 0 for write-back, 1 for write-through, 178 178 other or unknown. 179 179 180 + What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/address_mode 181 + Date: March 2025 182 + Contact: Dave Jiang <dave.jiang@intel.com> 183 + Description: 184 + The address mode: 0 for reserved, 1 for extended-linear. 185 + 180 186 What: /sys/devices/system/node/nodeX/x86/sgx_total_bytes 181 187 Date: November 2021 182 188 Contact: Jarkko Sakkinen <jarkko@kernel.org>

+41 -12

Documentation/ABI/testing/sysfs-bus-cxl

··· 1 1 What: /sys/bus/cxl/flush 2 - Date: Januarry, 2022 2 + Date: January, 2022 3 3 KernelVersion: v5.18 4 4 Contact: linux-cxl@vger.kernel.org 5 5 Description: ··· 18 18 specification. 19 19 20 20 21 + What: /sys/bus/cxl/devices/memX/payload_max 22 + Date: December, 2020 23 + KernelVersion: v5.12 24 + Contact: linux-cxl@vger.kernel.org 25 + Description: 26 + (RO) Maximum size (in bytes) of the mailbox command payload 27 + registers. Linux caps this at 1MB if the device reports a 28 + larger size. 29 + 30 + 31 + What: /sys/bus/cxl/devices/memX/label_storage_size 32 + Date: May, 2021 33 + KernelVersion: v5.13 34 + Contact: linux-cxl@vger.kernel.org 35 + Description: 36 + (RO) Size (in bytes) of the Label Storage Area (LSA). 37 + 38 + 21 39 What: /sys/bus/cxl/devices/memX/ram/size 22 40 Date: December, 2020 23 41 KernelVersion: v5.12 ··· 51 33 KernelVersion: v6.8 52 34 Contact: linux-cxl@vger.kernel.org 53 35 Description: 54 - (RO) For CXL host platforms that support "QoS Telemmetry" 36 + (RO) For CXL host platforms that support "QoS Telemetry" 55 37 this attribute conveys a comma delimited list of platform 56 38 specific cookies that identifies a QoS performance class 57 39 for the volatile partition of the CXL mem device. These ··· 78 60 KernelVersion: v6.8 79 61 Contact: linux-cxl@vger.kernel.org 80 62 Description: 81 - (RO) For CXL host platforms that support "QoS Telemmetry" 63 + (RO) For CXL host platforms that support "QoS Telemetry" 82 64 this attribute conveys a comma delimited list of platform 83 65 specific cookies that identifies a QoS performance class 84 66 for the persistent partition of the CXL mem device. These ··· 339 321 Contact: linux-cxl@vger.kernel.org 340 322 Description: 341 323 (RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it 342 - translates from a host physical address range, to a device local 343 - address range. Device-local address ranges are further split 344 - into a 'ram' (volatile memory) range and 'pmem' (persistent 345 - memory) range. The 'mode' attribute emits one of 'ram', 'pmem', 346 - 'mixed', or 'none'. The 'mixed' indication is for error cases 347 - when a decoder straddles the volatile/persistent partition 348 - boundary, and 'none' indicates the decoder is not actively 349 - decoding, or no DPA allocation policy has been set. 324 + translates from a host physical address range, to a device 325 + local address range. Device-local address ranges are further 326 + split into a 'ram' (volatile memory) range and 'pmem' 327 + (persistent memory) range. The 'mode' attribute emits one of 328 + 'ram', 'pmem', or 'none'. The 'none' indicates the decoder is 329 + not actively decoding, or no DPA allocation policy has been 330 + set. 350 331 351 332 'mode' can be written, when the decoder is in the 'disabled' 352 333 state, with either 'ram' or 'pmem' to set the boundaries for the ··· 440 423 KernelVersion: v6.5 441 424 Contact: linux-cxl@vger.kernel.org 442 425 Description: 443 - (RO) For CXL host platforms that support "QoS Telemmetry" this 426 + (RO) For CXL host platforms that support "QoS Telemetry" this 444 427 root-decoder-only attribute conveys a platform specific cookie 445 428 that identifies a QoS performance class for the CXL Window. 446 429 This class-id can be compared against a similar "qos_class" ··· 603 586 See Documentation/ABI/stable/sysfs-devices-node. access0 provides 604 587 the number to the closest initiator and access1 provides the 605 588 number to the closest CPU. 589 + 590 + 591 + What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown 592 + Date: Feb, 2025 593 + KernelVersion: v6.15 594 + Contact: linux-cxl@vger.kernel.org 595 + Description: 596 + (RO) The device dirty shutdown count value, which is the number 597 + of times the device could have incurred in potential data loss. 598 + The count is persistent across power loss and wraps back to 0 599 + upon overflow. If this file is not present, the device does not 600 + have the necessary support for dirty tracking.

+1 -1

Documentation/driver-api/cxl/maturity-map.rst

··· 130 130 * [0] Switch CCI 131 131 * [3] Timestamp 132 132 * [1] PMEM labels 133 - * [0] PMEM GPF / Dirty Shutdown 133 + * [3] PMEM GPF / Dirty Shutdown 134 134 * [0] Scan Media 135 135 136 136 PMU

+1

arch/x86/mm/pat/set_memory.c

··· 2274 2274 pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); 2275 2275 return rc; 2276 2276 } 2277 + EXPORT_SYMBOL_GPL(set_mce_nospec); 2277 2278 2278 2279 /* Restore full speculative operation to the pfn. */ 2279 2280 int clear_mce_nospec(unsigned long pfn)

+103

drivers/acpi/apei/ghes.c

··· 674 674 schedule_work(&entry->work); 675 675 } 676 676 677 + /* Room for 8 entries */ 678 + #define CXL_CPER_PROT_ERR_FIFO_DEPTH 8 679 + static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data, 680 + CXL_CPER_PROT_ERR_FIFO_DEPTH); 681 + 682 + /* Synchronize schedule_work() with cxl_cper_prot_err_work changes */ 683 + static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock); 684 + struct work_struct *cxl_cper_prot_err_work; 685 + 686 + static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err, 687 + int severity) 688 + { 689 + #ifdef CONFIG_ACPI_APEI_PCIEAER 690 + struct cxl_cper_prot_err_work_data wd; 691 + u8 *dvsec_start, *cap_start; 692 + 693 + if (!(prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS)) { 694 + pr_err_ratelimited("CXL CPER invalid agent type\n"); 695 + return; 696 + } 697 + 698 + if (!(prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG)) { 699 + pr_err_ratelimited("CXL CPER invalid protocol error log\n"); 700 + return; 701 + } 702 + 703 + if (prot_err->err_len != sizeof(struct cxl_ras_capability_regs)) { 704 + pr_err_ratelimited("CXL CPER invalid RAS Cap size (%u)\n", 705 + prot_err->err_len); 706 + return; 707 + } 708 + 709 + if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER)) 710 + pr_warn(FW_WARN "CXL CPER no device serial number\n"); 711 + 712 + guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock); 713 + 714 + if (!cxl_cper_prot_err_work) 715 + return; 716 + 717 + switch (prot_err->agent_type) { 718 + case RCD: 719 + case DEVICE: 720 + case LD: 721 + case FMLD: 722 + case RP: 723 + case DSP: 724 + case USP: 725 + memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err)); 726 + 727 + dvsec_start = (u8 *)(prot_err + 1); 728 + cap_start = dvsec_start + prot_err->dvsec_len; 729 + 730 + memcpy(&wd.ras_cap, cap_start, sizeof(wd.ras_cap)); 731 + wd.severity = cper_severity_to_aer(severity); 732 + break; 733 + default: 734 + pr_err_ratelimited("CXL CPER invalid agent type: %d\n", 735 + prot_err->agent_type); 736 + return; 737 + } 738 + 739 + if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) { 740 + pr_err_ratelimited("CXL CPER kfifo overflow\n"); 741 + return; 742 + } 743 + 744 + schedule_work(cxl_cper_prot_err_work); 745 + #endif 746 + } 747 + 748 + int cxl_cper_register_prot_err_work(struct work_struct *work) 749 + { 750 + if (cxl_cper_prot_err_work) 751 + return -EINVAL; 752 + 753 + guard(spinlock)(&cxl_cper_prot_err_work_lock); 754 + cxl_cper_prot_err_work = work; 755 + return 0; 756 + } 757 + EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL"); 758 + 759 + int cxl_cper_unregister_prot_err_work(struct work_struct *work) 760 + { 761 + if (cxl_cper_prot_err_work != work) 762 + return -EINVAL; 763 + 764 + guard(spinlock)(&cxl_cper_prot_err_work_lock); 765 + cxl_cper_prot_err_work = NULL; 766 + return 0; 767 + } 768 + EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL"); 769 + 770 + int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd) 771 + { 772 + return kfifo_get(&cxl_cper_prot_err_fifo, wd); 773 + } 774 + EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL"); 775 + 677 776 /* Room for 8 entries for each of the 4 event log queues */ 678 777 #define CXL_CPER_FIFO_DEPTH 32 679 778 DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH); ··· 876 777 } 877 778 else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { 878 779 queued = ghes_handle_arm_hw_error(gdata, sev, sync); 780 + } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) { 781 + struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); 782 + 783 + cxl_cper_post_prot_err(prot_err, gdata->error_severity); 879 784 } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { 880 785 struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); 881 786

+44

drivers/acpi/numa/hmat.c

··· 108 108 return NULL; 109 109 } 110 110 111 + /** 112 + * hmat_get_extended_linear_cache_size - Retrieve the extended linear cache size 113 + * @backing_res: resource from the backing media 114 + * @nid: node id for the memory region 115 + * @cache_size: (Output) size of extended linear cache. 116 + * 117 + * Return: 0 on success. Errno on failure. 118 + * 119 + */ 120 + int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid, 121 + resource_size_t *cache_size) 122 + { 123 + unsigned int pxm = node_to_pxm(nid); 124 + struct memory_target *target; 125 + struct target_cache *tcache; 126 + struct resource *res; 127 + 128 + target = find_mem_target(pxm); 129 + if (!target) 130 + return -ENOENT; 131 + 132 + list_for_each_entry(tcache, &target->caches, node) { 133 + if (tcache->cache_attrs.address_mode != 134 + NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR) 135 + continue; 136 + 137 + res = &target->memregions; 138 + if (!resource_contains(res, backing_res)) 139 + continue; 140 + 141 + *cache_size = tcache->cache_attrs.size; 142 + return 0; 143 + } 144 + 145 + *cache_size = 0; 146 + return 0; 147 + } 148 + EXPORT_SYMBOL_NS_GPL(hmat_get_extended_linear_cache_size, "CXL"); 149 + 111 150 static struct memory_target *acpi_find_genport_target(u32 uid) 112 151 { 113 152 struct memory_target *target; ··· 545 506 switch ((attrs & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8) { 546 507 case ACPI_HMAT_CA_DIRECT_MAPPED: 547 508 tcache->cache_attrs.indexing = NODE_CACHE_DIRECT_MAP; 509 + /* Extended Linear mode is only valid if cache is direct mapped */ 510 + if (cache->address_mode == ACPI_HMAT_CACHE_MODE_EXTENDED_LINEAR) { 511 + tcache->cache_attrs.address_mode = 512 + NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR; 513 + } 548 514 break; 549 515 case ACPI_HMAT_CA_COMPLEX_CACHE_INDEXING: 550 516 tcache->cache_attrs.indexing = NODE_CACHE_INDEXED;

+2

drivers/base/node.c

··· 244 244 CACHE_ATTR(line_size, "%u") 245 245 CACHE_ATTR(indexing, "%u") 246 246 CACHE_ATTR(write_policy, "%u") 247 + CACHE_ATTR(address_mode, "%#x") 247 248 248 249 static struct attribute *cache_attrs[] = { 249 250 &dev_attr_indexing.attr, 250 251 &dev_attr_size.attr, 251 252 &dev_attr_line_size.attr, 252 253 &dev_attr_write_policy.attr, 254 + &dev_attr_address_mode.attr, 253 255 NULL, 254 256 }; 255 257 ATTRIBUTE_GROUPS(cache);

+4

drivers/cxl/Kconfig

··· 158 158 If unsure, or if this kernel is meant for production environments, 159 159 say N. 160 160 161 + config CXL_MCE 162 + def_bool y 163 + depends on X86_MCE && MEMORY_FAILURE 164 + 161 165 endif

+3

drivers/cxl/core/Makefile

··· 14 14 cxl_core-y += hdm.o 15 15 cxl_core-y += pmu.o 16 16 cxl_core-y += cdat.o 17 + cxl_core-y += ras.o 18 + cxl_core-y += acpi.o 17 19 cxl_core-$(CONFIG_TRACING) += trace.o 18 20 cxl_core-$(CONFIG_CXL_REGION) += region.o 21 + cxl_core-$(CONFIG_CXL_MCE) += mce.o 19 22 cxl_core-$(CONFIG_CXL_FEATURES) += features.o

+11

drivers/cxl/core/acpi.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright(c) 2024 Intel Corporation. All rights reserved. */ 3 + #include <linux/acpi.h> 4 + #include "cxl.h" 5 + #include "core.h" 6 + 7 + int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, 8 + int nid, resource_size_t *size) 9 + { 10 + return hmat_get_extended_linear_cache_size(backing_res, nid, size); 11 + }

+53 -47

drivers/cxl/core/cdat.c

··· 258 258 static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, 259 259 struct xarray *dsmas_xa) 260 260 { 261 - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 262 261 struct device *dev = cxlds->dev; 263 - struct range pmem_range = { 264 - .start = cxlds->pmem_res.start, 265 - .end = cxlds->pmem_res.end, 266 - }; 267 - struct range ram_range = { 268 - .start = cxlds->ram_res.start, 269 - .end = cxlds->ram_res.end, 270 - }; 271 262 struct dsmas_entry *dent; 272 263 unsigned long index; 273 264 274 265 xa_for_each(dsmas_xa, index, dent) { 275 - if (resource_size(&cxlds->ram_res) && 276 - range_contains(&ram_range, &dent->dpa_range)) 277 - update_perf_entry(dev, dent, &mds->ram_perf); 278 - else if (resource_size(&cxlds->pmem_res) && 279 - range_contains(&pmem_range, &dent->dpa_range)) 280 - update_perf_entry(dev, dent, &mds->pmem_perf); 281 - else 266 + bool found = false; 267 + 268 + for (int i = 0; i < cxlds->nr_partitions; i++) { 269 + struct resource *res = &cxlds->part[i].res; 270 + struct range range = { 271 + .start = res->start, 272 + .end = res->end, 273 + }; 274 + 275 + if (range_contains(&range, &dent->dpa_range)) { 276 + update_perf_entry(dev, dent, 277 + &cxlds->part[i].perf); 278 + found = true; 279 + break; 280 + } 281 + } 282 + 283 + if (!found) 282 284 dev_dbg(dev, "no partition for dsmas dpa: %pra\n", 283 285 &dent->dpa_range); 284 286 } ··· 345 343 return 0; 346 344 } 347 345 348 - static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) 346 + static void cxl_qos_class_verify(struct cxl_memdev *cxlmd) 349 347 { 350 348 struct cxl_dev_state *cxlds = cxlmd->cxlds; 351 - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 352 349 struct cxl_port *root_port; 353 - int rc; 354 350 355 351 struct cxl_root *cxl_root __free(put_cxl_root) = 356 352 find_cxl_root(cxlmd->endpoint); 357 353 354 + /* 355 + * No need to reset_dpa_perf() here as find_cxl_root() is guaranteed to 356 + * succeed when called in the cxl_endpoint_port_probe() path. 357 + */ 358 358 if (!cxl_root) 359 - return -ENODEV; 359 + return; 360 360 361 361 root_port = &cxl_root->port; 362 362 363 - /* Check that the QTG IDs are all sane between end device and root decoders */ 364 - if (!cxl_qos_match(root_port, &mds->ram_perf)) 365 - reset_dpa_perf(&mds->ram_perf); 366 - if (!cxl_qos_match(root_port, &mds->pmem_perf)) 367 - reset_dpa_perf(&mds->pmem_perf); 363 + /* 364 + * Save userspace from needing to check if a qos class has any matches 365 + * by hiding qos class info if the memdev is not mapped by a root 366 + * decoder, or the partition class does not match any root decoder 367 + * class. 368 + */ 369 + if (!device_for_each_child(&root_port->dev, 370 + cxlmd->endpoint->host_bridge, 371 + match_cxlrd_hb)) { 372 + for (int i = 0; i < cxlds->nr_partitions; i++) { 373 + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; 368 374 369 - /* Check to make sure that the device's host bridge is under a root decoder */ 370 - rc = device_for_each_child(&root_port->dev, 371 - cxlmd->endpoint->host_bridge, match_cxlrd_hb); 372 - if (!rc) { 373 - reset_dpa_perf(&mds->ram_perf); 374 - reset_dpa_perf(&mds->pmem_perf); 375 + reset_dpa_perf(perf); 376 + } 377 + return; 375 378 } 376 379 377 - return rc; 380 + for (int i = 0; i < cxlds->nr_partitions; i++) { 381 + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; 382 + 383 + if (!cxl_qos_match(root_port, perf)) 384 + reset_dpa_perf(perf); 385 + } 378 386 } 379 387 380 388 static void discard_dsmas(struct xarray *xa) ··· 582 570 return range_contains(&perf->dpa_range, &dpa); 583 571 } 584 572 585 - static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled, 586 - enum cxl_decoder_mode mode) 573 + static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled) 587 574 { 588 575 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); 589 - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); 576 + struct cxl_dev_state *cxlds = cxlmd->cxlds; 590 577 struct cxl_dpa_perf *perf; 591 578 592 - switch (mode) { 593 - case CXL_DECODER_RAM: 594 - perf = &mds->ram_perf; 595 - break; 596 - case CXL_DECODER_PMEM: 597 - perf = &mds->pmem_perf; 598 - break; 599 - default: 579 + if (cxled->part < 0) 600 580 return ERR_PTR(-EINVAL); 601 - } 581 + perf = &cxlds->part[cxled->part].perf; 582 + 583 + if (!perf) 584 + return ERR_PTR(-EINVAL); 602 585 603 586 if (!dpa_perf_contains(perf, cxled->dpa_res)) 604 587 return ERR_PTR(-EINVAL); ··· 654 647 if (cxlds->rcd) 655 648 return -ENODEV; 656 649 657 - perf = cxled_get_dpa_perf(cxled, cxlr->mode); 650 + perf = cxled_get_dpa_perf(cxled); 658 651 if (IS_ERR(perf)) 659 652 return PTR_ERR(perf); 660 653 661 - gp_port = to_cxl_port(parent_port->dev.parent); 662 654 *gp_is_root = is_cxl_root(gp_port); 663 655 664 656 /* ··· 1059 1053 1060 1054 lockdep_assert_held(&cxl_dpa_rwsem); 1061 1055 1062 - perf = cxled_get_dpa_perf(cxled, cxlr->mode); 1056 + perf = cxled_get_dpa_perf(cxled); 1063 1057 if (IS_ERR(perf)) 1064 1058 return; 1065 1059

+8 -2

drivers/cxl/core/core.h

··· 74 74 resource_size_t length); 75 75 76 76 struct dentry *cxl_debugfs_create_dir(const char *dir); 77 - int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, 78 - enum cxl_decoder_mode mode); 77 + int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, 78 + enum cxl_partition_mode mode); 79 79 int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size); 80 80 int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); 81 81 resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); ··· 116 116 bool cxl_need_node_perf_attrs_update(int nid); 117 117 int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, 118 118 struct access_coordinate *c); 119 + 120 + int cxl_ras_init(void); 121 + void cxl_ras_exit(void); 122 + int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port); 123 + int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, 124 + int nid, resource_size_t *size); 119 125 120 126 #ifdef CONFIG_CXL_FEATURES 121 127 size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,

+265 -117

drivers/cxl/core/hdm.c

··· 213 213 { 214 214 struct resource *p1, *p2; 215 215 216 - down_read(&cxl_dpa_rwsem); 216 + guard(rwsem_read)(&cxl_dpa_rwsem); 217 217 for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) { 218 218 __cxl_dpa_debug(file, p1, 0); 219 219 for (p2 = p1->child; p2; p2 = p2->sibling) 220 220 __cxl_dpa_debug(file, p2, 1); 221 221 } 222 - up_read(&cxl_dpa_rwsem); 223 222 } 224 223 EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, "CXL"); 224 + 225 + /* See request_skip() kernel-doc */ 226 + static resource_size_t __adjust_skip(struct cxl_dev_state *cxlds, 227 + const resource_size_t skip_base, 228 + const resource_size_t skip_len, 229 + const char *requester) 230 + { 231 + const resource_size_t skip_end = skip_base + skip_len - 1; 232 + 233 + for (int i = 0; i < cxlds->nr_partitions; i++) { 234 + const struct resource *part_res = &cxlds->part[i].res; 235 + resource_size_t adjust_start, adjust_end, size; 236 + 237 + adjust_start = max(skip_base, part_res->start); 238 + adjust_end = min(skip_end, part_res->end); 239 + 240 + if (adjust_end < adjust_start) 241 + continue; 242 + 243 + size = adjust_end - adjust_start + 1; 244 + 245 + if (!requester) 246 + __release_region(&cxlds->dpa_res, adjust_start, size); 247 + else if (!__request_region(&cxlds->dpa_res, adjust_start, size, 248 + requester, 0)) 249 + return adjust_start - skip_base; 250 + } 251 + 252 + return skip_len; 253 + } 254 + #define release_skip(c, b, l) __adjust_skip((c), (b), (l), NULL) 225 255 226 256 /* 227 257 * Must be called in a context that synchronizes against this decoder's ··· 271 241 skip_start = res->start - cxled->skip; 272 242 __release_region(&cxlds->dpa_res, res->start, resource_size(res)); 273 243 if (cxled->skip) 274 - __release_region(&cxlds->dpa_res, skip_start, cxled->skip); 244 + release_skip(cxlds, skip_start, cxled->skip); 275 245 cxled->skip = 0; 276 246 cxled->dpa_res = NULL; 277 247 put_device(&cxled->cxld.dev); ··· 280 250 281 251 static void cxl_dpa_release(void *cxled) 282 252 { 283 - down_write(&cxl_dpa_rwsem); 253 + guard(rwsem_write)(&cxl_dpa_rwsem); 284 254 __cxl_dpa_release(cxled); 285 - up_write(&cxl_dpa_rwsem); 286 255 } 287 256 288 257 /* ··· 297 268 __cxl_dpa_release(cxled); 298 269 } 299 270 271 + /** 272 + * request_skip() - Track DPA 'skip' in @cxlds->dpa_res resource tree 273 + * @cxlds: CXL.mem device context that parents @cxled 274 + * @cxled: Endpoint decoder establishing new allocation that skips lower DPA 275 + * @skip_base: DPA < start of new DPA allocation (DPAnew) 276 + * @skip_len: @skip_base + @skip_len == DPAnew 277 + * 278 + * DPA 'skip' arises from out-of-sequence DPA allocation events relative 279 + * to free capacity across multiple partitions. It is a wasteful event 280 + * as usable DPA gets thrown away, but if a deployment has, for example, 281 + * a dual RAM+PMEM device, wants to use PMEM, and has unallocated RAM 282 + * DPA, the free RAM DPA must be sacrificed to start allocating PMEM. 283 + * See third "Implementation Note" in CXL 3.1 8.2.4.19.13 "Decoder 284 + * Protection" for more details. 285 + * 286 + * A 'skip' always covers the last allocated DPA in a previous partition 287 + * to the start of the current partition to allocate. Allocations never 288 + * start in the middle of a partition, and allocations are always 289 + * de-allocated in reverse order (see cxl_dpa_free(), or natural devm 290 + * unwind order from forced in-order allocation). 291 + * 292 + * If @cxlds->nr_partitions was guaranteed to be <= 2 then the 'skip' 293 + * would always be contained to a single partition. Given 294 + * @cxlds->nr_partitions may be > 2 it results in cases where the 'skip' 295 + * might span "tail capacity of partition[0], all of partition[1], ..., 296 + * all of partition[N-1]" to support allocating from partition[N]. That 297 + * in turn interacts with the partition 'struct resource' boundaries 298 + * within @cxlds->dpa_res whereby 'skip' requests need to be divided by 299 + * partition. I.e. this is a quirk of using a 'struct resource' tree to 300 + * detect range conflicts while also tracking partition boundaries in 301 + * @cxlds->dpa_res. 302 + */ 303 + static int request_skip(struct cxl_dev_state *cxlds, 304 + struct cxl_endpoint_decoder *cxled, 305 + const resource_size_t skip_base, 306 + const resource_size_t skip_len) 307 + { 308 + resource_size_t skipped = __adjust_skip(cxlds, skip_base, skip_len, 309 + dev_name(&cxled->cxld.dev)); 310 + 311 + if (skipped == skip_len) 312 + return 0; 313 + 314 + dev_dbg(cxlds->dev, 315 + "%s: failed to reserve skipped space (%pa %pa %pa)\n", 316 + dev_name(&cxled->cxld.dev), &skip_base, &skip_len, &skipped); 317 + 318 + release_skip(cxlds, skip_base, skipped); 319 + 320 + return -EBUSY; 321 + } 322 + 300 323 static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, 301 324 resource_size_t base, resource_size_t len, 302 325 resource_size_t skipped) ··· 358 277 struct cxl_dev_state *cxlds = cxlmd->cxlds; 359 278 struct device *dev = &port->dev; 360 279 struct resource *res; 280 + int rc; 361 281 362 282 lockdep_assert_held_write(&cxl_dpa_rwsem); 363 283 ··· 387 305 } 388 306 389 307 if (skipped) { 390 - res = __request_region(&cxlds->dpa_res, base - skipped, skipped, 391 - dev_name(&cxled->cxld.dev), 0); 392 - if (!res) { 393 - dev_dbg(dev, 394 - "decoder%d.%d: failed to reserve skipped space\n", 395 - port->id, cxled->cxld.id); 396 - return -EBUSY; 397 - } 308 + rc = request_skip(cxlds, cxled, base - skipped, skipped); 309 + if (rc) 310 + return rc; 398 311 } 399 312 res = __request_region(&cxlds->dpa_res, base, len, 400 313 dev_name(&cxled->cxld.dev), 0); ··· 397 320 dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n", 398 321 port->id, cxled->cxld.id); 399 322 if (skipped) 400 - __release_region(&cxlds->dpa_res, base - skipped, 401 - skipped); 323 + release_skip(cxlds, base - skipped, skipped); 402 324 return -EBUSY; 403 325 } 404 326 cxled->dpa_res = res; 405 327 cxled->skip = skipped; 406 328 407 - if (resource_contains(&cxlds->pmem_res, res)) 408 - cxled->mode = CXL_DECODER_PMEM; 409 - else if (resource_contains(&cxlds->ram_res, res)) 410 - cxled->mode = CXL_DECODER_RAM; 411 - else { 412 - dev_warn(dev, "decoder%d.%d: %pr mixed mode not supported\n", 413 - port->id, cxled->cxld.id, cxled->dpa_res); 414 - cxled->mode = CXL_DECODER_MIXED; 415 - } 329 + /* 330 + * When allocating new capacity, ->part is already set, when 331 + * discovering decoder settings at initial enumeration, ->part 332 + * is not set. 333 + */ 334 + if (cxled->part < 0) 335 + for (int i = 0; cxlds->nr_partitions; i++) 336 + if (resource_contains(&cxlds->part[i].res, res)) { 337 + cxled->part = i; 338 + break; 339 + } 340 + 341 + if (cxled->part < 0) 342 + dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n", 343 + port->id, cxled->cxld.id, res); 416 344 417 345 port->hdm_end++; 418 346 get_device(&cxled->cxld.dev); 419 347 return 0; 420 348 } 349 + 350 + static int add_dpa_res(struct device *dev, struct resource *parent, 351 + struct resource *res, resource_size_t start, 352 + resource_size_t size, const char *type) 353 + { 354 + int rc; 355 + 356 + *res = (struct resource) { 357 + .name = type, 358 + .start = start, 359 + .end = start + size - 1, 360 + .flags = IORESOURCE_MEM, 361 + }; 362 + if (resource_size(res) == 0) { 363 + dev_dbg(dev, "DPA(%s): no capacity\n", res->name); 364 + return 0; 365 + } 366 + rc = request_resource(parent, res); 367 + if (rc) { 368 + dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name, 369 + res, rc); 370 + return rc; 371 + } 372 + 373 + dev_dbg(dev, "DPA(%s): %pr\n", res->name, res); 374 + 375 + return 0; 376 + } 377 + 378 + static const char *cxl_mode_name(enum cxl_partition_mode mode) 379 + { 380 + switch (mode) { 381 + case CXL_PARTMODE_RAM: 382 + return "ram"; 383 + case CXL_PARTMODE_PMEM: 384 + return "pmem"; 385 + default: 386 + return ""; 387 + }; 388 + } 389 + 390 + /* if this fails the caller must destroy @cxlds, there is no recovery */ 391 + int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info) 392 + { 393 + struct device *dev = cxlds->dev; 394 + 395 + guard(rwsem_write)(&cxl_dpa_rwsem); 396 + 397 + if (cxlds->nr_partitions) 398 + return -EBUSY; 399 + 400 + if (!info->size || !info->nr_partitions) { 401 + cxlds->dpa_res = DEFINE_RES_MEM(0, 0); 402 + cxlds->nr_partitions = 0; 403 + return 0; 404 + } 405 + 406 + cxlds->dpa_res = DEFINE_RES_MEM(0, info->size); 407 + 408 + for (int i = 0; i < info->nr_partitions; i++) { 409 + const struct cxl_dpa_part_info *part = &info->part[i]; 410 + int rc; 411 + 412 + cxlds->part[i].perf.qos_class = CXL_QOS_CLASS_INVALID; 413 + cxlds->part[i].mode = part->mode; 414 + 415 + /* Require ordered + contiguous partitions */ 416 + if (i) { 417 + const struct cxl_dpa_part_info *prev = &info->part[i - 1]; 418 + 419 + if (prev->range.end + 1 != part->range.start) 420 + return -EINVAL; 421 + } 422 + rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->part[i].res, 423 + part->range.start, range_len(&part->range), 424 + cxl_mode_name(part->mode)); 425 + if (rc) 426 + return rc; 427 + cxlds->nr_partitions++; 428 + } 429 + 430 + return 0; 431 + } 432 + EXPORT_SYMBOL_GPL(cxl_dpa_setup); 421 433 422 434 int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, 423 435 resource_size_t base, resource_size_t len, ··· 528 362 529 363 resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled) 530 364 { 531 - resource_size_t size = 0; 532 - 533 - down_read(&cxl_dpa_rwsem); 365 + guard(rwsem_read)(&cxl_dpa_rwsem); 534 366 if (cxled->dpa_res) 535 - size = resource_size(cxled->dpa_res); 536 - up_read(&cxl_dpa_rwsem); 367 + return resource_size(cxled->dpa_res); 537 368 538 - return size; 369 + return 0; 539 370 } 540 371 541 372 resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) ··· 550 387 { 551 388 struct cxl_port *port = cxled_to_port(cxled); 552 389 struct device *dev = &cxled->cxld.dev; 553 - int rc; 554 390 555 - down_write(&cxl_dpa_rwsem); 556 - if (!cxled->dpa_res) { 557 - rc = 0; 558 - goto out; 559 - } 391 + guard(rwsem_write)(&cxl_dpa_rwsem); 392 + if (!cxled->dpa_res) 393 + return 0; 560 394 if (cxled->cxld.region) { 561 395 dev_dbg(dev, "decoder assigned to: %s\n", 562 396 dev_name(&cxled->cxld.region->dev)); 563 - rc = -EBUSY; 564 - goto out; 397 + return -EBUSY; 565 398 } 566 399 if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) { 567 400 dev_dbg(dev, "decoder enabled\n"); 568 - rc = -EBUSY; 569 - goto out; 401 + return -EBUSY; 570 402 } 571 403 if (cxled->cxld.id != port->hdm_end) { 572 404 dev_dbg(dev, "expected decoder%d.%d\n", port->id, 573 405 port->hdm_end); 574 - rc = -EBUSY; 575 - goto out; 406 + return -EBUSY; 576 407 } 408 + 577 409 devm_cxl_dpa_release(cxled); 578 - rc = 0; 579 - out: 580 - up_write(&cxl_dpa_rwsem); 581 - return rc; 410 + return 0; 582 411 } 583 412 584 - int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, 585 - enum cxl_decoder_mode mode) 413 + int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, 414 + enum cxl_partition_mode mode) 586 415 { 587 416 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); 588 417 struct cxl_dev_state *cxlds = cxlmd->cxlds; 589 418 struct device *dev = &cxled->cxld.dev; 590 - 591 - switch (mode) { 592 - case CXL_DECODER_RAM: 593 - case CXL_DECODER_PMEM: 594 - break; 595 - default: 596 - dev_dbg(dev, "unsupported mode: %d\n", mode); 597 - return -EINVAL; 598 - } 419 + int part; 599 420 600 421 guard(rwsem_write)(&cxl_dpa_rwsem); 601 422 if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) 602 423 return -EBUSY; 603 424 604 - /* 605 - * Only allow modes that are supported by the current partition 606 - * configuration 607 - */ 608 - if (mode == CXL_DECODER_PMEM && !resource_size(&cxlds->pmem_res)) { 609 - dev_dbg(dev, "no available pmem capacity\n"); 610 - return -ENXIO; 425 + for (part = 0; part < cxlds->nr_partitions; part++) 426 + if (cxlds->part[part].mode == mode) 427 + break; 428 + 429 + if (part >= cxlds->nr_partitions) { 430 + dev_dbg(dev, "unsupported mode: %d\n", mode); 431 + return -EINVAL; 611 432 } 612 - if (mode == CXL_DECODER_RAM && !resource_size(&cxlds->ram_res)) { 613 - dev_dbg(dev, "no available ram capacity\n"); 433 + 434 + if (!resource_size(&cxlds->part[part].res)) { 435 + dev_dbg(dev, "no available capacity for mode: %d\n", mode); 614 436 return -ENXIO; 615 437 } 616 438 617 - cxled->mode = mode; 439 + cxled->part = part; 618 440 return 0; 619 441 } 620 442 621 - int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) 443 + static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) 622 444 { 623 445 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); 624 - resource_size_t free_ram_start, free_pmem_start; 625 - struct cxl_port *port = cxled_to_port(cxled); 626 446 struct cxl_dev_state *cxlds = cxlmd->cxlds; 627 447 struct device *dev = &cxled->cxld.dev; 628 - resource_size_t start, avail, skip; 448 + struct resource *res, *prev = NULL; 449 + resource_size_t start, avail, skip, skip_start; 629 450 struct resource *p, *last; 630 - int rc; 451 + int part; 631 452 632 - down_write(&cxl_dpa_rwsem); 453 + guard(rwsem_write)(&cxl_dpa_rwsem); 633 454 if (cxled->cxld.region) { 634 455 dev_dbg(dev, "decoder attached to %s\n", 635 456 dev_name(&cxled->cxld.region->dev)); 636 - rc = -EBUSY; 637 - goto out; 457 + return -EBUSY; 638 458 } 639 459 640 460 if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) { 641 461 dev_dbg(dev, "decoder enabled\n"); 642 - rc = -EBUSY; 643 - goto out; 462 + return -EBUSY; 644 463 } 645 464 646 - for (p = cxlds->ram_res.child, last = NULL; p; p = p->sibling) 465 + part = cxled->part; 466 + if (part < 0) { 467 + dev_dbg(dev, "partition not set\n"); 468 + return -EBUSY; 469 + } 470 + 471 + res = &cxlds->part[part].res; 472 + for (p = res->child, last = NULL; p; p = p->sibling) 647 473 last = p; 648 474 if (last) 649 - free_ram_start = last->end + 1; 475 + start = last->end + 1; 650 476 else 651 - free_ram_start = cxlds->ram_res.start; 477 + start = res->start; 652 478 653 - for (p = cxlds->pmem_res.child, last = NULL; p; p = p->sibling) 654 - last = p; 655 - if (last) 656 - free_pmem_start = last->end + 1; 657 - else 658 - free_pmem_start = cxlds->pmem_res.start; 479 + /* 480 + * To allocate at partition N, a skip needs to be calculated for all 481 + * unallocated space at lower partitions indices. 482 + * 483 + * If a partition has any allocations, the search can end because a 484 + * previous cxl_dpa_alloc() invocation is assumed to have accounted for 485 + * all previous partitions. 486 + */ 487 + skip_start = CXL_RESOURCE_NONE; 488 + for (int i = part; i; i--) { 489 + prev = &cxlds->part[i - 1].res; 490 + for (p = prev->child, last = NULL; p; p = p->sibling) 491 + last = p; 492 + if (last) { 493 + skip_start = last->end + 1; 494 + break; 495 + } 496 + skip_start = prev->start; 497 + } 659 498 660 - if (cxled->mode == CXL_DECODER_RAM) { 661 - start = free_ram_start; 662 - avail = cxlds->ram_res.end - start + 1; 499 + avail = res->end - start + 1; 500 + if (skip_start == CXL_RESOURCE_NONE) 663 501 skip = 0; 664 - } else if (cxled->mode == CXL_DECODER_PMEM) { 665 - resource_size_t skip_start, skip_end; 666 - 667 - start = free_pmem_start; 668 - avail = cxlds->pmem_res.end - start + 1; 669 - skip_start = free_ram_start; 670 - 671 - /* 672 - * If some pmem is already allocated, then that allocation 673 - * already handled the skip. 674 - */ 675 - if (cxlds->pmem_res.child && 676 - skip_start == cxlds->pmem_res.child->start) 677 - skip_end = skip_start - 1; 678 - else 679 - skip_end = start - 1; 680 - skip = skip_end - skip_start + 1; 681 - } else { 682 - dev_dbg(dev, "mode not set\n"); 683 - rc = -EINVAL; 684 - goto out; 685 - } 502 + else 503 + skip = res->start - skip_start; 686 504 687 505 if (size > avail) { 688 506 dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size, 689 - cxl_decoder_mode_name(cxled->mode), &avail); 690 - rc = -ENOSPC; 691 - goto out; 507 + res->name, &avail); 508 + return -ENOSPC; 692 509 } 693 510 694 - rc = __cxl_dpa_reserve(cxled, start, size, skip); 695 - out: 696 - up_write(&cxl_dpa_rwsem); 511 + return __cxl_dpa_reserve(cxled, start, size, skip); 512 + } 697 513 514 + int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) 515 + { 516 + struct cxl_port *port = cxled_to_port(cxled); 517 + int rc; 518 + 519 + rc = __cxl_dpa_alloc(cxled, size); 698 520 if (rc) 699 521 return rc; 700 522

+83 -58

drivers/cxl/core/mbox.c

··· 11 11 12 12 #include "core.h" 13 13 #include "trace.h" 14 + #include "mce.h" 14 15 15 16 static bool cxl_raw_allow_all; 16 17 ··· 901 900 } 902 901 903 902 if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) { 904 - u64 dpa, hpa = ULLONG_MAX; 903 + u64 dpa, hpa = ULLONG_MAX, hpa_alias = ULLONG_MAX; 905 904 struct cxl_region *cxlr; 906 905 907 906 /* ··· 914 913 915 914 dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK; 916 915 cxlr = cxl_dpa_to_region(cxlmd, dpa); 917 - if (cxlr) 916 + if (cxlr) { 917 + u64 cache_size = cxlr->params.cache_size; 918 + 918 919 hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa); 920 + if (cache_size) 921 + hpa_alias = hpa - cache_size; 922 + } 919 923 920 924 if (event_type == CXL_CPER_EVENT_GEN_MEDIA) 921 925 trace_cxl_general_media(cxlmd, type, cxlr, hpa, 922 - &evt->gen_media); 926 + hpa_alias, &evt->gen_media); 923 927 else if (event_type == CXL_CPER_EVENT_DRAM) 924 - trace_cxl_dram(cxlmd, type, cxlr, hpa, &evt->dram); 928 + trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias, 929 + &evt->dram); 925 930 } 926 931 } 927 932 EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, "CXL"); ··· 1133 1126 le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER; 1134 1127 mds->active_persistent_bytes = 1135 1128 le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER; 1136 - mds->next_volatile_bytes = 1137 - le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER; 1138 - mds->next_persistent_bytes = 1139 - le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER; 1140 1129 1141 1130 return 0; 1142 1131 } ··· 1254 1251 { 1255 1252 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); 1256 1253 struct cxl_port *endpoint; 1257 - int rc; 1258 1254 1259 1255 /* synchronize with cxl_mem_probe() and decoder write operations */ 1260 1256 guard(device)(&cxlmd->dev); 1261 1257 endpoint = cxlmd->endpoint; 1262 - down_read(&cxl_region_rwsem); 1258 + guard(rwsem_read)(&cxl_region_rwsem); 1263 1259 /* 1264 1260 * Require an endpoint to be safe otherwise the driver can not 1265 1261 * be sure that the device is unmapped. 1266 1262 */ 1267 1263 if (endpoint && cxl_num_decoders_committed(endpoint) == 0) 1268 - rc = __cxl_mem_sanitize(mds, cmd); 1269 - else 1270 - rc = -EBUSY; 1271 - up_read(&cxl_region_rwsem); 1264 + return __cxl_mem_sanitize(mds, cmd); 1272 1265 1273 - return rc; 1266 + return -EBUSY; 1274 1267 } 1275 1268 1276 - static int add_dpa_res(struct device *dev, struct resource *parent, 1277 - struct resource *res, resource_size_t start, 1278 - resource_size_t size, const char *type) 1269 + static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) 1279 1270 { 1280 - int rc; 1271 + int i = info->nr_partitions; 1281 1272 1282 - res->name = type; 1283 - res->start = start; 1284 - res->end = start + size - 1; 1285 - res->flags = IORESOURCE_MEM; 1286 - if (resource_size(res) == 0) { 1287 - dev_dbg(dev, "DPA(%s): no capacity\n", res->name); 1288 - return 0; 1289 - } 1290 - rc = request_resource(parent, res); 1291 - if (rc) { 1292 - dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name, 1293 - res, rc); 1294 - return rc; 1295 - } 1273 + if (size == 0) 1274 + return; 1296 1275 1297 - dev_dbg(dev, "DPA(%s): %pr\n", res->name, res); 1298 - 1299 - return 0; 1276 + info->part[i].range = (struct range) { 1277 + .start = start, 1278 + .end = start + size - 1, 1279 + }; 1280 + info->part[i].mode = mode; 1281 + info->nr_partitions++; 1300 1282 } 1301 1283 1302 - int cxl_mem_create_range_info(struct cxl_memdev_state *mds) 1284 + int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) 1303 1285 { 1304 1286 struct cxl_dev_state *cxlds = &mds->cxlds; 1305 1287 struct device *dev = cxlds->dev; 1306 1288 int rc; 1307 1289 1308 1290 if (!cxlds->media_ready) { 1309 - cxlds->dpa_res = DEFINE_RES_MEM(0, 0); 1310 - cxlds->ram_res = DEFINE_RES_MEM(0, 0); 1311 - cxlds->pmem_res = DEFINE_RES_MEM(0, 0); 1291 + info->size = 0; 1312 1292 return 0; 1313 1293 } 1314 1294 1315 - cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes); 1295 + info->size = mds->total_bytes; 1316 1296 1317 1297 if (mds->partition_align_bytes == 0) { 1318 - rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, 1319 - mds->volatile_only_bytes, "ram"); 1320 - if (rc) 1321 - return rc; 1322 - return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res, 1323 - mds->volatile_only_bytes, 1324 - mds->persistent_only_bytes, "pmem"); 1298 + add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); 1299 + add_part(info, mds->volatile_only_bytes, 1300 + mds->persistent_only_bytes, CXL_PARTMODE_PMEM); 1301 + return 0; 1325 1302 } 1326 1303 1327 1304 rc = cxl_mem_get_partition_info(mds); ··· 1310 1327 return rc; 1311 1328 } 1312 1329 1313 - rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, 1314 - mds->active_volatile_bytes, "ram"); 1315 - if (rc) 1316 - return rc; 1317 - return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res, 1318 - mds->active_volatile_bytes, 1319 - mds->active_persistent_bytes, "pmem"); 1330 + add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); 1331 + add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, 1332 + CXL_PARTMODE_PMEM); 1333 + 1334 + return 0; 1320 1335 } 1321 - EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL"); 1336 + EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); 1337 + 1338 + int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count) 1339 + { 1340 + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; 1341 + struct cxl_mbox_get_health_info_out hi; 1342 + struct cxl_mbox_cmd mbox_cmd; 1343 + int rc; 1344 + 1345 + mbox_cmd = (struct cxl_mbox_cmd) { 1346 + .opcode = CXL_MBOX_OP_GET_HEALTH_INFO, 1347 + .size_out = sizeof(hi), 1348 + .payload_out = &hi, 1349 + }; 1350 + 1351 + rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); 1352 + if (!rc) 1353 + *count = le32_to_cpu(hi.dirty_shutdown_cnt); 1354 + 1355 + return rc; 1356 + } 1357 + EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL"); 1358 + 1359 + int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds) 1360 + { 1361 + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; 1362 + struct cxl_mbox_cmd mbox_cmd; 1363 + struct cxl_mbox_set_shutdown_state_in in = { 1364 + .state = 1 1365 + }; 1366 + 1367 + mbox_cmd = (struct cxl_mbox_cmd) { 1368 + .opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE, 1369 + .size_in = sizeof(in), 1370 + .payload_in = &in, 1371 + }; 1372 + 1373 + return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); 1374 + } 1375 + EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL"); 1322 1376 1323 1377 int cxl_set_timestamp(struct cxl_memdev_state *mds) 1324 1378 { ··· 1487 1467 struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) 1488 1468 { 1489 1469 struct cxl_memdev_state *mds; 1470 + int rc; 1490 1471 1491 1472 mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL); 1492 1473 if (!mds) { ··· 1501 1480 mds->cxlds.cxl_mbox.host = dev; 1502 1481 mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; 1503 1482 mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; 1504 - mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID; 1505 - mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID; 1483 + 1484 + rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier); 1485 + if (rc == -EOPNOTSUPP) 1486 + dev_warn(dev, "CXL MCE unsupported\n"); 1487 + else if (rc) 1488 + return ERR_PTR(rc); 1506 1489 1507 1490 return mds; 1508 1491 }

+65

drivers/cxl/core/mce.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright(c) 2024 Intel Corporation. All rights reserved. */ 3 + #include <linux/mm.h> 4 + #include <linux/notifier.h> 5 + #include <linux/set_memory.h> 6 + #include <asm/mce.h> 7 + #include <cxlmem.h> 8 + #include "mce.h" 9 + 10 + static int cxl_handle_mce(struct notifier_block *nb, unsigned long val, 11 + void *data) 12 + { 13 + struct cxl_memdev_state *mds = container_of(nb, struct cxl_memdev_state, 14 + mce_notifier); 15 + struct cxl_memdev *cxlmd = mds->cxlds.cxlmd; 16 + struct cxl_port *endpoint = cxlmd->endpoint; 17 + struct mce *mce = data; 18 + u64 spa, spa_alias; 19 + unsigned long pfn; 20 + 21 + if (!mce || !mce_usable_address(mce)) 22 + return NOTIFY_DONE; 23 + 24 + if (!endpoint) 25 + return NOTIFY_DONE; 26 + 27 + spa = mce->addr & MCI_ADDR_PHYSADDR; 28 + 29 + pfn = spa >> PAGE_SHIFT; 30 + if (!pfn_valid(pfn)) 31 + return NOTIFY_DONE; 32 + 33 + spa_alias = cxl_port_get_spa_cache_alias(endpoint, spa); 34 + if (spa_alias == ~0ULL) 35 + return NOTIFY_DONE; 36 + 37 + pfn = spa_alias >> PAGE_SHIFT; 38 + 39 + /* 40 + * Take down the aliased memory page. The original memory page flagged 41 + * by the MCE will be taken cared of by the standard MCE handler. 42 + */ 43 + dev_emerg(mds->cxlds.dev, "Offlining aliased SPA address0: %#llx\n", 44 + spa_alias); 45 + if (!memory_failure(pfn, 0)) 46 + set_mce_nospec(pfn); 47 + 48 + return NOTIFY_OK; 49 + } 50 + 51 + static void cxl_unregister_mce_notifier(void *mce_notifier) 52 + { 53 + mce_unregister_decode_chain(mce_notifier); 54 + } 55 + 56 + int devm_cxl_register_mce_notifier(struct device *dev, 57 + struct notifier_block *mce_notifier) 58 + { 59 + mce_notifier->notifier_call = cxl_handle_mce; 60 + mce_notifier->priority = MCE_PRIO_UC; 61 + mce_register_decode_chain(mce_notifier); 62 + 63 + return devm_add_action_or_reset(dev, cxl_unregister_mce_notifier, 64 + mce_notifier); 65 + }

+20

drivers/cxl/core/mce.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* Copyright(c) 2024 Intel Corporation. All rights reserved. */ 3 + #ifndef _CXL_CORE_MCE_H_ 4 + #define _CXL_CORE_MCE_H_ 5 + 6 + #include <linux/notifier.h> 7 + 8 + #ifdef CONFIG_CXL_MCE 9 + int devm_cxl_register_mce_notifier(struct device *dev, 10 + struct notifier_block *mce_notifer); 11 + #else 12 + static inline int 13 + devm_cxl_register_mce_notifier(struct device *dev, 14 + struct notifier_block *mce_notifier) 15 + { 16 + return -EOPNOTSUPP; 17 + } 18 + #endif 19 + 20 + #endif

+47 -36

drivers/cxl/core/memdev.c

··· 75 75 } 76 76 static DEVICE_ATTR_RO(label_storage_size); 77 77 78 + static resource_size_t cxl_ram_size(struct cxl_dev_state *cxlds) 79 + { 80 + /* Static RAM is only expected at partition 0. */ 81 + if (cxlds->part[0].mode != CXL_PARTMODE_RAM) 82 + return 0; 83 + return resource_size(&cxlds->part[0].res); 84 + } 85 + 78 86 static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, 79 87 char *buf) 80 88 { 81 89 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 82 90 struct cxl_dev_state *cxlds = cxlmd->cxlds; 83 - unsigned long long len = resource_size(&cxlds->ram_res); 91 + unsigned long long len = cxl_ram_size(cxlds); 84 92 85 93 return sysfs_emit(buf, "%#llx\n", len); 86 94 } ··· 101 93 { 102 94 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 103 95 struct cxl_dev_state *cxlds = cxlmd->cxlds; 104 - unsigned long long len = resource_size(&cxlds->pmem_res); 96 + unsigned long long len = cxl_pmem_size(cxlds); 105 97 106 98 return sysfs_emit(buf, "%#llx\n", len); 107 99 } ··· 206 198 int rc = 0; 207 199 208 200 /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */ 209 - if (resource_size(&cxlds->pmem_res)) { 210 - offset = cxlds->pmem_res.start; 211 - length = resource_size(&cxlds->pmem_res); 212 - rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 213 - if (rc) 214 - return rc; 215 - } 216 - if (resource_size(&cxlds->ram_res)) { 217 - offset = cxlds->ram_res.start; 218 - length = resource_size(&cxlds->ram_res); 201 + for (int i = 0; i < cxlds->nr_partitions; i++) { 202 + const struct resource *res = &cxlds->part[i].res; 203 + 204 + offset = res->start; 205 + length = resource_size(res); 219 206 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 220 207 /* 221 208 * Invalid Physical Address is not an error for 222 209 * volatile addresses. Device support is optional. 223 210 */ 224 - if (rc == -EFAULT) 211 + if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM) 225 212 rc = 0; 226 213 } 227 214 return rc; ··· 407 404 NULL, 408 405 }; 409 406 407 + static struct cxl_dpa_perf *to_pmem_perf(struct cxl_dev_state *cxlds) 408 + { 409 + for (int i = 0; i < cxlds->nr_partitions; i++) 410 + if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) 411 + return &cxlds->part[i].perf; 412 + return NULL; 413 + } 414 + 410 415 static ssize_t pmem_qos_class_show(struct device *dev, 411 416 struct device_attribute *attr, char *buf) 412 417 { 413 418 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 414 419 struct cxl_dev_state *cxlds = cxlmd->cxlds; 415 - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 416 420 417 - return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class); 421 + return sysfs_emit(buf, "%d\n", to_pmem_perf(cxlds)->qos_class); 418 422 } 419 423 420 424 static struct device_attribute dev_attr_pmem_qos_class = ··· 433 423 NULL, 434 424 }; 435 425 426 + static struct cxl_dpa_perf *to_ram_perf(struct cxl_dev_state *cxlds) 427 + { 428 + if (cxlds->part[0].mode != CXL_PARTMODE_RAM) 429 + return NULL; 430 + return &cxlds->part[0].perf; 431 + } 432 + 436 433 static ssize_t ram_qos_class_show(struct device *dev, 437 434 struct device_attribute *attr, char *buf) 438 435 { 439 436 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 440 437 struct cxl_dev_state *cxlds = cxlmd->cxlds; 441 - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 442 438 443 - return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class); 439 + return sysfs_emit(buf, "%d\n", to_ram_perf(cxlds)->qos_class); 444 440 } 445 441 446 442 static struct device_attribute dev_attr_ram_qos_class = ··· 482 466 { 483 467 struct device *dev = kobj_to_dev(kobj); 484 468 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 485 - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); 469 + struct cxl_dpa_perf *perf = to_ram_perf(cxlmd->cxlds); 486 470 487 - if (a == &dev_attr_ram_qos_class.attr) 488 - if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID) 489 - return 0; 471 + if (a == &dev_attr_ram_qos_class.attr && 472 + (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID)) 473 + return 0; 490 474 491 475 return a->mode; 492 476 } ··· 501 485 { 502 486 struct device *dev = kobj_to_dev(kobj); 503 487 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 504 - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); 488 + struct cxl_dpa_perf *perf = to_pmem_perf(cxlmd->cxlds); 505 489 506 - if (a == &dev_attr_pmem_qos_class.attr) 507 - if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID) 508 - return 0; 490 + if (a == &dev_attr_pmem_qos_class.attr && 491 + (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID)) 492 + return 0; 509 493 510 494 return a->mode; 511 495 } ··· 582 566 { 583 567 struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; 584 568 585 - down_write(&cxl_memdev_rwsem); 569 + guard(rwsem_write)(&cxl_memdev_rwsem); 586 570 bitmap_or(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds, 587 571 cmds, CXL_MEM_COMMAND_ID_MAX); 588 - up_write(&cxl_memdev_rwsem); 589 572 } 590 573 EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL"); 591 574 ··· 598 583 { 599 584 struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; 600 585 601 - down_write(&cxl_memdev_rwsem); 586 + guard(rwsem_write)(&cxl_memdev_rwsem); 602 587 bitmap_andnot(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds, 603 588 cmds, CXL_MEM_COMMAND_ID_MAX); 604 - up_write(&cxl_memdev_rwsem); 605 589 } 606 590 EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, "CXL"); 607 591 ··· 608 594 { 609 595 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 610 596 611 - down_write(&cxl_memdev_rwsem); 597 + guard(rwsem_write)(&cxl_memdev_rwsem); 612 598 cxlmd->cxlds = NULL; 613 - up_write(&cxl_memdev_rwsem); 614 599 } 615 600 616 601 static void cxl_memdev_unregister(void *_cxlmd) ··· 691 678 { 692 679 struct cxl_memdev *cxlmd = file->private_data; 693 680 struct cxl_dev_state *cxlds; 694 - int rc = -ENXIO; 695 681 696 - down_read(&cxl_memdev_rwsem); 682 + guard(rwsem_read)(&cxl_memdev_rwsem); 697 683 cxlds = cxlmd->cxlds; 698 684 if (cxlds && cxlds->type == CXL_DEVTYPE_CLASSMEM) 699 - rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); 700 - up_read(&cxl_memdev_rwsem); 685 + return __cxl_memdev_ioctl(cxlmd, cmd, arg); 701 686 702 - return rc; 687 + return -ENXIO; 703 688 } 704 689 705 690 static int cxl_memdev_open(struct inode *inode, struct file *file)

+97

drivers/cxl/core/pci.c

··· 1054 1054 1055 1055 return 0; 1056 1056 } 1057 + 1058 + /* 1059 + * Set max timeout such that platforms will optimize GPF flow to avoid 1060 + * the implied worst-case scenario delays. On a sane platform, all 1061 + * devices should always complete GPF within the energy budget of 1062 + * the GPF flow. The kernel does not have enough information to pick 1063 + * anything better than "maximize timeouts and hope it works". 1064 + * 1065 + * A misbehaving device could block forward progress of GPF for all 1066 + * the other devices, exhausting the energy budget of the platform. 1067 + * However, the spec seems to assume that moving on from slow to respond 1068 + * devices is a virtue. It is not possible to know that, in actuality, 1069 + * the slow to respond device is *the* most critical device in the 1070 + * system to wait. 1071 + */ 1072 + #define GPF_TIMEOUT_BASE_MAX 2 1073 + #define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ 1074 + 1075 + u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port) 1076 + { 1077 + u16 dvsec; 1078 + 1079 + if (!dev_is_pci(dev)) 1080 + return 0; 1081 + 1082 + dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL, 1083 + is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); 1084 + if (!dvsec) 1085 + dev_warn(dev, "%s GPF DVSEC not present\n", 1086 + is_port ? "Port" : "Device"); 1087 + return dvsec; 1088 + } 1089 + EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL"); 1090 + 1091 + static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) 1092 + { 1093 + u64 base, scale; 1094 + int rc, offset; 1095 + u16 ctrl; 1096 + 1097 + switch (phase) { 1098 + case 1: 1099 + offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; 1100 + base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; 1101 + scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; 1102 + break; 1103 + case 2: 1104 + offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; 1105 + base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; 1106 + scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; 1107 + break; 1108 + default: 1109 + return -EINVAL; 1110 + } 1111 + 1112 + rc = pci_read_config_word(pdev, dvsec + offset, &ctrl); 1113 + if (rc) 1114 + return rc; 1115 + 1116 + if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX && 1117 + FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX) 1118 + return 0; 1119 + 1120 + ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX); 1121 + ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX); 1122 + 1123 + rc = pci_write_config_word(pdev, dvsec + offset, ctrl); 1124 + if (!rc) 1125 + pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n", 1126 + phase, GPF_TIMEOUT_BASE_MAX); 1127 + 1128 + return rc; 1129 + } 1130 + 1131 + int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) 1132 + { 1133 + struct pci_dev *pdev; 1134 + 1135 + if (!port) 1136 + return -EINVAL; 1137 + 1138 + if (!port->gpf_dvsec) { 1139 + int dvsec; 1140 + 1141 + dvsec = cxl_gpf_get_dvsec(dport_dev, true); 1142 + if (!dvsec) 1143 + return -EINVAL; 1144 + 1145 + port->gpf_dvsec = dvsec; 1146 + } 1147 + 1148 + pdev = to_pci_dev(dport_dev); 1149 + update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); 1150 + update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); 1151 + 1152 + return 0; 1153 + }

+27 -11

drivers/cxl/core/port.c

··· 194 194 char *buf) 195 195 { 196 196 struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); 197 + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); 198 + struct cxl_dev_state *cxlds = cxlmd->cxlds; 199 + /* without @cxl_dpa_rwsem, make sure @part is not reloaded */ 200 + int part = READ_ONCE(cxled->part); 201 + const char *desc; 197 202 198 - return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode)); 203 + if (part < 0) 204 + desc = "none"; 205 + else 206 + desc = cxlds->part[part].res.name; 207 + 208 + return sysfs_emit(buf, "%s\n", desc); 199 209 } 200 210 201 211 static ssize_t mode_store(struct device *dev, struct device_attribute *attr, 202 212 const char *buf, size_t len) 203 213 { 204 214 struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); 205 - enum cxl_decoder_mode mode; 215 + enum cxl_partition_mode mode; 206 216 ssize_t rc; 207 217 208 218 if (sysfs_streq(buf, "pmem")) 209 - mode = CXL_DECODER_PMEM; 219 + mode = CXL_PARTMODE_PMEM; 210 220 else if (sysfs_streq(buf, "ram")) 211 - mode = CXL_DECODER_RAM; 221 + mode = CXL_PARTMODE_RAM; 212 222 else 213 223 return -EINVAL; 214 224 215 - rc = cxl_dpa_set_mode(cxled, mode); 225 + rc = cxl_dpa_set_part(cxled, mode); 216 226 if (rc) 217 227 return rc; 218 228 ··· 559 549 struct device_attribute *attr, char *buf) 560 550 { 561 551 struct cxl_port *port = to_cxl_port(dev); 562 - int rc; 563 552 564 - down_read(&cxl_region_rwsem); 565 - rc = sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); 566 - up_read(&cxl_region_rwsem); 567 - 568 - return rc; 553 + guard(rwsem_read)(&cxl_region_rwsem); 554 + return sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); 569 555 } 570 556 571 557 static DEVICE_ATTR_RO(decoders_committed); ··· 1678 1672 if (rc && rc != -EBUSY) 1679 1673 return rc; 1680 1674 1675 + cxl_gpf_port_setup(dport_dev, port); 1676 + 1681 1677 /* Any more ports to add between this one and the root? */ 1682 1678 if (!dev_is_cxl_root_child(&port->dev)) 1683 1679 continue; ··· 1907 1899 return ERR_PTR(-ENOMEM); 1908 1900 1909 1901 cxled->pos = -1; 1902 + cxled->part = -1; 1910 1903 cxld = &cxled->cxld; 1911 1904 rc = cxl_decoder_init(port, cxld); 1912 1905 if (rc) { ··· 2348 2339 if (rc) 2349 2340 goto err_region; 2350 2341 2342 + rc = cxl_ras_init(); 2343 + if (rc) 2344 + goto err_ras; 2345 + 2351 2346 return 0; 2352 2347 2348 + err_ras: 2349 + cxl_region_exit(); 2353 2350 err_region: 2354 2351 bus_unregister(&cxl_bus_type); 2355 2352 err_bus: ··· 2367 2352 2368 2353 static void cxl_core_exit(void) 2369 2354 { 2355 + cxl_ras_exit(); 2370 2356 cxl_region_exit(); 2371 2357 bus_unregister(&cxl_bus_type); 2372 2358 destroy_workqueue(cxl_bus_wq);

+119

drivers/cxl/core/ras.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright(c) 2025 AMD Corporation. All rights reserved. */ 3 + 4 + #include <linux/pci.h> 5 + #include <linux/aer.h> 6 + #include <cxl/event.h> 7 + #include <cxlmem.h> 8 + #include "trace.h" 9 + 10 + static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, 11 + struct cxl_ras_capability_regs ras_cap) 12 + { 13 + u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; 14 + 15 + trace_cxl_port_aer_correctable_error(&pdev->dev, status); 16 + } 17 + 18 + static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, 19 + struct cxl_ras_capability_regs ras_cap) 20 + { 21 + u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; 22 + u32 fe; 23 + 24 + if (hweight32(status) > 1) 25 + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, 26 + ras_cap.cap_control)); 27 + else 28 + fe = status; 29 + 30 + trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe, 31 + ras_cap.header_log); 32 + } 33 + 34 + static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, 35 + struct cxl_ras_capability_regs ras_cap) 36 + { 37 + u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; 38 + struct cxl_dev_state *cxlds; 39 + 40 + cxlds = pci_get_drvdata(pdev); 41 + if (!cxlds) 42 + return; 43 + 44 + trace_cxl_aer_correctable_error(cxlds->cxlmd, status); 45 + } 46 + 47 + static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, 48 + struct cxl_ras_capability_regs ras_cap) 49 + { 50 + u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; 51 + struct cxl_dev_state *cxlds; 52 + u32 fe; 53 + 54 + cxlds = pci_get_drvdata(pdev); 55 + if (!cxlds) 56 + return; 57 + 58 + if (hweight32(status) > 1) 59 + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, 60 + ras_cap.cap_control)); 61 + else 62 + fe = status; 63 + 64 + trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, 65 + ras_cap.header_log); 66 + } 67 + 68 + static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) 69 + { 70 + unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device, 71 + data->prot_err.agent_addr.function); 72 + struct pci_dev *pdev __free(pci_dev_put) = 73 + pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment, 74 + data->prot_err.agent_addr.bus, 75 + devfn); 76 + int port_type; 77 + 78 + if (!pdev) 79 + return; 80 + 81 + guard(device)(&pdev->dev); 82 + 83 + port_type = pci_pcie_type(pdev); 84 + if (port_type == PCI_EXP_TYPE_ROOT_PORT || 85 + port_type == PCI_EXP_TYPE_DOWNSTREAM || 86 + port_type == PCI_EXP_TYPE_UPSTREAM) { 87 + if (data->severity == AER_CORRECTABLE) 88 + cxl_cper_trace_corr_port_prot_err(pdev, data->ras_cap); 89 + else 90 + cxl_cper_trace_uncorr_port_prot_err(pdev, data->ras_cap); 91 + 92 + return; 93 + } 94 + 95 + if (data->severity == AER_CORRECTABLE) 96 + cxl_cper_trace_corr_prot_err(pdev, data->ras_cap); 97 + else 98 + cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap); 99 + } 100 + 101 + static void cxl_cper_prot_err_work_fn(struct work_struct *work) 102 + { 103 + struct cxl_cper_prot_err_work_data wd; 104 + 105 + while (cxl_cper_prot_err_kfifo_get(&wd)) 106 + cxl_cper_handle_prot_err(&wd); 107 + } 108 + static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn); 109 + 110 + int cxl_ras_init(void) 111 + { 112 + return cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work); 113 + } 114 + 115 + void cxl_ras_exit(void) 116 + { 117 + cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); 118 + cancel_work_sync(&cxl_cper_prot_err_work); 119 + }

+219 -123

drivers/cxl/core/region.c

··· 144 144 rc = down_read_interruptible(&cxl_region_rwsem); 145 145 if (rc) 146 146 return rc; 147 - if (cxlr->mode != CXL_DECODER_PMEM) 147 + if (cxlr->mode != CXL_PARTMODE_PMEM) 148 148 rc = sysfs_emit(buf, "\n"); 149 149 else 150 150 rc = sysfs_emit(buf, "%pUb\n", &p->uuid); ··· 441 441 * Support tooling that expects to find a 'uuid' attribute for all 442 442 * regions regardless of mode. 443 443 */ 444 - if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM) 444 + if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM) 445 445 return 0444; 446 446 return a->mode; 447 447 } ··· 603 603 char *buf) 604 604 { 605 605 struct cxl_region *cxlr = to_cxl_region(dev); 606 + const char *desc; 606 607 607 - return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode)); 608 + if (cxlr->mode == CXL_PARTMODE_RAM) 609 + desc = "ram"; 610 + else if (cxlr->mode == CXL_PARTMODE_PMEM) 611 + desc = "pmem"; 612 + else 613 + desc = ""; 614 + 615 + return sysfs_emit(buf, "%s\n", desc); 608 616 } 609 617 static DEVICE_ATTR_RO(mode); 610 618 ··· 638 630 639 631 /* ways, granularity and uuid (if PMEM) need to be set before HPA */ 640 632 if (!p->interleave_ways || !p->interleave_granularity || 641 - (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) 633 + (cxlr->mode == CXL_PARTMODE_PMEM && uuid_is_null(&p->uuid))) 642 634 return -ENXIO; 643 635 644 636 div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder); ··· 832 824 return 1; 833 825 } 834 826 827 + static bool region_res_match_cxl_range(const struct cxl_region_params *p, 828 + struct range *range) 829 + { 830 + if (!p->res) 831 + return false; 832 + 833 + /* 834 + * If an extended linear cache region then the CXL range is assumed 835 + * to be fronted by the DRAM range in current known implementation. 836 + * This assumption will be made until a variant implementation exists. 837 + */ 838 + return p->res->start + p->cache_size == range->start && 839 + p->res->end == range->end; 840 + } 841 + 835 842 static int match_auto_decoder(struct device *dev, const void *data) 836 843 { 837 844 const struct cxl_region_params *p = data; ··· 859 836 cxld = to_cxl_decoder(dev); 860 837 r = &cxld->hpa_range; 861 838 862 - if (p->res && p->res->start == r->start && p->res->end == r->end) 839 + if (region_res_match_cxl_range(p, r)) 863 840 return 1; 864 841 865 842 return 0; ··· 1447 1424 if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { 1448 1425 if (cxld->interleave_ways != iw || 1449 1426 cxld->interleave_granularity != ig || 1450 - cxld->hpa_range.start != p->res->start || 1451 - cxld->hpa_range.end != p->res->end || 1427 + !region_res_match_cxl_range(p, &cxld->hpa_range) || 1452 1428 ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { 1453 1429 dev_err(&cxlr->dev, 1454 1430 "%s:%s %s expected iw: %d ig: %d %pr\n", ··· 1910 1888 { 1911 1889 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); 1912 1890 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); 1891 + struct cxl_dev_state *cxlds = cxlmd->cxlds; 1913 1892 struct cxl_region_params *p = &cxlr->params; 1914 1893 struct cxl_port *ep_port, *root_port; 1915 1894 struct cxl_dport *dport; ··· 1925 1902 return rc; 1926 1903 } 1927 1904 1928 - if (cxled->mode != cxlr->mode) { 1929 - dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", 1930 - dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); 1931 - return -EINVAL; 1932 - } 1933 - 1934 - if (cxled->mode == CXL_DECODER_DEAD) { 1905 + if (cxled->part < 0) { 1935 1906 dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); 1936 1907 return -ENODEV; 1908 + } 1909 + 1910 + if (cxlds->part[cxled->part].mode != cxlr->mode) { 1911 + dev_dbg(&cxlr->dev, "%s region mode: %d mismatch\n", 1912 + dev_name(&cxled->cxld.dev), cxlr->mode); 1913 + return -EINVAL; 1937 1914 } 1938 1915 1939 1916 /* all full of members, or interleave config not established? */ ··· 1974 1951 return -ENXIO; 1975 1952 } 1976 1953 1977 - if (resource_size(cxled->dpa_res) * p->interleave_ways != 1954 + if (resource_size(cxled->dpa_res) * p->interleave_ways + p->cache_size != 1978 1955 resource_size(p->res)) { 1979 1956 dev_dbg(&cxlr->dev, 1980 - "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n", 1957 + "%s:%s-size-%#llx * ways-%d + cache-%#llx != region-size-%#llx\n", 1981 1958 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), 1982 1959 (u64)resource_size(cxled->dpa_res), p->interleave_ways, 1983 - (u64)resource_size(p->res)); 1960 + (u64)p->cache_size, (u64)resource_size(p->res)); 1984 1961 return -EINVAL; 1985 1962 } 1986 1963 ··· 2138 2115 void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) 2139 2116 { 2140 2117 down_write(&cxl_region_rwsem); 2141 - cxled->mode = CXL_DECODER_DEAD; 2118 + cxled->part = -1; 2142 2119 cxl_region_detach(cxled); 2143 2120 up_write(&cxl_region_rwsem); 2144 2121 } ··· 2494 2471 */ 2495 2472 static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, 2496 2473 int id, 2497 - enum cxl_decoder_mode mode, 2474 + enum cxl_partition_mode mode, 2498 2475 enum cxl_decoder_type type) 2499 2476 { 2500 2477 struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent); ··· 2548 2525 } 2549 2526 2550 2527 static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, 2551 - enum cxl_decoder_mode mode, int id) 2528 + enum cxl_partition_mode mode, int id) 2552 2529 { 2553 2530 int rc; 2554 2531 2555 2532 switch (mode) { 2556 - case CXL_DECODER_RAM: 2557 - case CXL_DECODER_PMEM: 2533 + case CXL_PARTMODE_RAM: 2534 + case CXL_PARTMODE_PMEM: 2558 2535 break; 2559 2536 default: 2560 2537 dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); ··· 2574 2551 } 2575 2552 2576 2553 static ssize_t create_region_store(struct device *dev, const char *buf, 2577 - size_t len, enum cxl_decoder_mode mode) 2554 + size_t len, enum cxl_partition_mode mode) 2578 2555 { 2579 2556 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); 2580 2557 struct cxl_region *cxlr; ··· 2595 2572 struct device_attribute *attr, 2596 2573 const char *buf, size_t len) 2597 2574 { 2598 - return create_region_store(dev, buf, len, CXL_DECODER_PMEM); 2575 + return create_region_store(dev, buf, len, CXL_PARTMODE_PMEM); 2599 2576 } 2600 2577 DEVICE_ATTR_RW(create_pmem_region); 2601 2578 ··· 2603 2580 struct device_attribute *attr, 2604 2581 const char *buf, size_t len) 2605 2582 { 2606 - return create_region_store(dev, buf, len, CXL_DECODER_RAM); 2583 + return create_region_store(dev, buf, len, CXL_PARTMODE_RAM); 2607 2584 } 2608 2585 DEVICE_ATTR_RW(create_ram_region); 2609 2586 ··· 2701 2678 2702 2679 struct cxl_poison_context { 2703 2680 struct cxl_port *port; 2704 - enum cxl_decoder_mode mode; 2681 + int part; 2705 2682 u64 offset; 2706 2683 }; 2707 2684 ··· 2709 2686 struct cxl_poison_context *ctx) 2710 2687 { 2711 2688 struct cxl_dev_state *cxlds = cxlmd->cxlds; 2689 + const struct resource *res; 2690 + struct resource *p, *last; 2712 2691 u64 offset, length; 2713 2692 int rc = 0; 2714 2693 2715 - /* 2716 - * Collect poison for the remaining unmapped resources 2717 - * after poison is collected by committed endpoints. 2718 - * 2719 - * Knowing that PMEM must always follow RAM, get poison 2720 - * for unmapped resources based on the last decoder's mode: 2721 - * ram: scan remains of ram range, then any pmem range 2722 - * pmem: scan remains of pmem range 2723 - */ 2724 - 2725 - if (ctx->mode == CXL_DECODER_RAM) { 2726 - offset = ctx->offset; 2727 - length = resource_size(&cxlds->ram_res) - offset; 2728 - rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 2729 - if (rc == -EFAULT) 2730 - rc = 0; 2731 - if (rc) 2732 - return rc; 2733 - } 2734 - if (ctx->mode == CXL_DECODER_PMEM) { 2735 - offset = ctx->offset; 2736 - length = resource_size(&cxlds->dpa_res) - offset; 2737 - if (!length) 2738 - return 0; 2739 - } else if (resource_size(&cxlds->pmem_res)) { 2740 - offset = cxlds->pmem_res.start; 2741 - length = resource_size(&cxlds->pmem_res); 2742 - } else { 2694 + if (ctx->part < 0) 2743 2695 return 0; 2696 + 2697 + /* 2698 + * Collect poison for the remaining unmapped resources after 2699 + * poison is collected by committed endpoints decoders. 2700 + */ 2701 + for (int i = ctx->part; i < cxlds->nr_partitions; i++) { 2702 + res = &cxlds->part[i].res; 2703 + for (p = res->child, last = NULL; p; p = p->sibling) 2704 + last = p; 2705 + if (last) 2706 + offset = last->end + 1; 2707 + else 2708 + offset = res->start; 2709 + length = res->end - offset + 1; 2710 + if (!length) 2711 + break; 2712 + rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 2713 + if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM) 2714 + continue; 2715 + if (rc) 2716 + break; 2744 2717 } 2745 2718 2746 - return cxl_mem_get_poison(cxlmd, offset, length, NULL); 2719 + return rc; 2747 2720 } 2748 2721 2749 2722 static int poison_by_decoder(struct device *dev, void *arg) 2750 2723 { 2751 2724 struct cxl_poison_context *ctx = arg; 2752 2725 struct cxl_endpoint_decoder *cxled; 2726 + enum cxl_partition_mode mode; 2727 + struct cxl_dev_state *cxlds; 2753 2728 struct cxl_memdev *cxlmd; 2754 2729 u64 offset, length; 2755 2730 int rc = 0; ··· 2756 2735 return rc; 2757 2736 2758 2737 cxled = to_cxl_endpoint_decoder(dev); 2759 - if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) 2738 + if (!cxled->dpa_res) 2760 2739 return rc; 2761 - 2762 - /* 2763 - * Regions are only created with single mode decoders: pmem or ram. 2764 - * Linux does not support mixed mode decoders. This means that 2765 - * reading poison per endpoint decoder adheres to the requirement 2766 - * that poison reads of pmem and ram must be separated. 2767 - * CXL 3.0 Spec 8.2.9.8.4.1 2768 - */ 2769 - if (cxled->mode == CXL_DECODER_MIXED) { 2770 - dev_dbg(dev, "poison list read unsupported in mixed mode\n"); 2771 - return rc; 2772 - } 2773 2740 2774 2741 cxlmd = cxled_to_memdev(cxled); 2742 + cxlds = cxlmd->cxlds; 2743 + mode = cxlds->part[cxled->part].mode; 2744 + 2775 2745 if (cxled->skip) { 2776 2746 offset = cxled->dpa_res->start - cxled->skip; 2777 2747 length = cxled->skip; 2778 2748 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 2779 - if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) 2749 + if (rc == -EFAULT && mode == CXL_PARTMODE_RAM) 2780 2750 rc = 0; 2781 2751 if (rc) 2782 2752 return rc; ··· 2776 2764 offset = cxled->dpa_res->start; 2777 2765 length = cxled->dpa_res->end - offset + 1; 2778 2766 rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region); 2779 - if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) 2767 + if (rc == -EFAULT && mode == CXL_PARTMODE_RAM) 2780 2768 rc = 0; 2781 2769 if (rc) 2782 2770 return rc; ··· 2784 2772 /* Iterate until commit_end is reached */ 2785 2773 if (cxled->cxld.id == ctx->port->commit_end) { 2786 2774 ctx->offset = cxled->dpa_res->end + 1; 2787 - ctx->mode = cxled->mode; 2775 + ctx->part = cxled->part; 2788 2776 return 1; 2789 2777 } 2790 2778 ··· 2797 2785 int rc = 0; 2798 2786 2799 2787 ctx = (struct cxl_poison_context) { 2800 - .port = port 2788 + .port = port, 2789 + .part = -1, 2801 2790 }; 2802 2791 2803 2792 rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder); ··· 2934 2921 hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0); 2935 2922 2936 2923 /* Apply the hpa_offset to the region base address */ 2937 - hpa = hpa_offset + p->res->start; 2924 + hpa = hpa_offset + p->res->start + p->cache_size; 2938 2925 2939 2926 /* Root decoder translation overrides typical modulo decode */ 2940 2927 if (cxlrd->hpa_to_spa) ··· 3051 3038 struct cxl_dax_region *cxlr_dax; 3052 3039 struct device *dev; 3053 3040 3054 - down_read(&cxl_region_rwsem); 3055 - if (p->state != CXL_CONFIG_COMMIT) { 3056 - cxlr_dax = ERR_PTR(-ENXIO); 3057 - goto out; 3058 - } 3041 + guard(rwsem_read)(&cxl_region_rwsem); 3042 + if (p->state != CXL_CONFIG_COMMIT) 3043 + return ERR_PTR(-ENXIO); 3059 3044 3060 3045 cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL); 3061 - if (!cxlr_dax) { 3062 - cxlr_dax = ERR_PTR(-ENOMEM); 3063 - goto out; 3064 - } 3046 + if (!cxlr_dax) 3047 + return ERR_PTR(-ENOMEM); 3065 3048 3066 3049 cxlr_dax->hpa_range.start = p->res->start; 3067 3050 cxlr_dax->hpa_range.end = p->res->end; ··· 3070 3061 dev->parent = &cxlr->dev; 3071 3062 dev->bus = &cxl_bus_type; 3072 3063 dev->type = &cxl_dax_region_type; 3073 - out: 3074 - up_read(&cxl_region_rwsem); 3075 3064 3076 3065 return cxlr_dax; 3077 3066 } ··· 3215 3208 struct cxl_region_params *p; 3216 3209 struct cxl_region *cxlr; 3217 3210 const struct range *r = data; 3218 - int rc = 0; 3219 3211 3220 3212 if (!is_cxl_region(dev)) 3221 3213 return 0; ··· 3222 3216 cxlr = to_cxl_region(dev); 3223 3217 p = &cxlr->params; 3224 3218 3225 - down_read(&cxl_region_rwsem); 3219 + guard(rwsem_read)(&cxl_region_rwsem); 3226 3220 if (p->res && p->res->start == r->start && p->res->end == r->end) 3227 - rc = 1; 3228 - up_read(&cxl_region_rwsem); 3221 + return 1; 3229 3222 3230 - return rc; 3223 + return 0; 3231 3224 } 3232 3225 3233 - /* Establish an empty region covering the given HPA range */ 3234 - static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, 3235 - struct cxl_endpoint_decoder *cxled) 3226 + static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, 3227 + struct resource *res) 3228 + { 3229 + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); 3230 + struct cxl_region_params *p = &cxlr->params; 3231 + int nid = phys_to_target_node(res->start); 3232 + resource_size_t size = resource_size(res); 3233 + resource_size_t cache_size, start; 3234 + int rc; 3235 + 3236 + rc = cxl_acpi_get_extended_linear_cache_size(res, nid, &cache_size); 3237 + if (rc) 3238 + return rc; 3239 + 3240 + if (!cache_size) 3241 + return 0; 3242 + 3243 + if (size != cache_size) { 3244 + dev_warn(&cxlr->dev, 3245 + "Extended Linear Cache size %pa != CXL size %pa. No Support!", 3246 + &cache_size, &size); 3247 + return -ENXIO; 3248 + } 3249 + 3250 + /* 3251 + * Move the start of the range to where the cache range starts. The 3252 + * implementation assumes that the cache range is in front of the 3253 + * CXL range. This is not dictated by the HMAT spec but is how the 3254 + * current known implementation is configured. 3255 + * 3256 + * The cache range is expected to be within the CFMWS. The adjusted 3257 + * res->start should not be less than cxlrd->res->start. 3258 + */ 3259 + start = res->start - cache_size; 3260 + if (start < cxlrd->res->start) 3261 + return -ENXIO; 3262 + 3263 + res->start = start; 3264 + p->cache_size = cache_size; 3265 + 3266 + return 0; 3267 + } 3268 + 3269 + static int __construct_region(struct cxl_region *cxlr, 3270 + struct cxl_root_decoder *cxlrd, 3271 + struct cxl_endpoint_decoder *cxled) 3236 3272 { 3237 3273 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); 3238 - struct cxl_port *port = cxlrd_to_port(cxlrd); 3239 3274 struct range *hpa = &cxled->cxld.hpa_range; 3240 3275 struct cxl_region_params *p; 3241 - struct cxl_region *cxlr; 3242 3276 struct resource *res; 3243 3277 int rc; 3244 3278 3245 - do { 3246 - cxlr = __create_region(cxlrd, cxled->mode, 3247 - atomic_read(&cxlrd->region_id)); 3248 - } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); 3249 - 3250 - if (IS_ERR(cxlr)) { 3251 - dev_err(cxlmd->dev.parent, 3252 - "%s:%s: %s failed assign region: %ld\n", 3253 - dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), 3254 - __func__, PTR_ERR(cxlr)); 3255 - return cxlr; 3256 - } 3257 - 3258 - down_write(&cxl_region_rwsem); 3279 + guard(rwsem_write)(&cxl_region_rwsem); 3259 3280 p = &cxlr->params; 3260 3281 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { 3261 3282 dev_err(cxlmd->dev.parent, 3262 3283 "%s:%s: %s autodiscovery interrupted\n", 3263 3284 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), 3264 3285 __func__); 3265 - rc = -EBUSY; 3266 - goto err; 3286 + return -EBUSY; 3267 3287 } 3268 3288 3269 3289 set_bit(CXL_REGION_F_AUTO, &cxlr->flags); 3270 3290 3271 3291 res = kmalloc(sizeof(*res), GFP_KERNEL); 3272 - if (!res) { 3273 - rc = -ENOMEM; 3274 - goto err; 3275 - } 3292 + if (!res) 3293 + return -ENOMEM; 3276 3294 3277 3295 *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa), 3278 3296 dev_name(&cxlr->dev)); 3297 + 3298 + rc = cxl_extended_linear_cache_resize(cxlr, res); 3299 + if (rc && rc != -EOPNOTSUPP) { 3300 + /* 3301 + * Failing to support extended linear cache region resize does not 3302 + * prevent the region from functioning. Only causes cxl list showing 3303 + * incorrect region size. 3304 + */ 3305 + dev_warn(cxlmd->dev.parent, 3306 + "Extended linear cache calculation failed rc:%d\n", rc); 3307 + } 3308 + 3279 3309 rc = insert_resource(cxlrd->res, res); 3280 3310 if (rc) { 3281 3311 /* ··· 3331 3289 3332 3290 rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); 3333 3291 if (rc) 3334 - goto err; 3292 + return rc; 3335 3293 3336 3294 dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n", 3337 3295 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, ··· 3340 3298 3341 3299 /* ...to match put_device() in cxl_add_to_region() */ 3342 3300 get_device(&cxlr->dev); 3343 - up_write(&cxl_region_rwsem); 3301 + 3302 + return 0; 3303 + } 3304 + 3305 + /* Establish an empty region covering the given HPA range */ 3306 + static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, 3307 + struct cxl_endpoint_decoder *cxled) 3308 + { 3309 + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); 3310 + struct cxl_port *port = cxlrd_to_port(cxlrd); 3311 + struct cxl_dev_state *cxlds = cxlmd->cxlds; 3312 + int rc, part = READ_ONCE(cxled->part); 3313 + struct cxl_region *cxlr; 3314 + 3315 + do { 3316 + cxlr = __create_region(cxlrd, cxlds->part[part].mode, 3317 + atomic_read(&cxlrd->region_id)); 3318 + } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); 3319 + 3320 + if (IS_ERR(cxlr)) { 3321 + dev_err(cxlmd->dev.parent, 3322 + "%s:%s: %s failed assign region: %ld\n", 3323 + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), 3324 + __func__, PTR_ERR(cxlr)); 3325 + return cxlr; 3326 + } 3327 + 3328 + rc = __construct_region(cxlr, cxlrd, cxled); 3329 + if (rc) { 3330 + devm_release_action(port->uport_dev, unregister_region, cxlr); 3331 + return ERR_PTR(rc); 3332 + } 3344 3333 3345 3334 return cxlr; 3346 - 3347 - err: 3348 - up_write(&cxl_region_rwsem); 3349 - devm_release_action(port->uport_dev, unregister_region, cxlr); 3350 - return ERR_PTR(rc); 3351 3335 } 3352 3336 3353 3337 int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) ··· 3443 3375 } 3444 3376 EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL"); 3445 3377 3378 + u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa) 3379 + { 3380 + struct cxl_region_ref *iter; 3381 + unsigned long index; 3382 + 3383 + if (!endpoint) 3384 + return ~0ULL; 3385 + 3386 + guard(rwsem_write)(&cxl_region_rwsem); 3387 + 3388 + xa_for_each(&endpoint->regions, index, iter) { 3389 + struct cxl_region_params *p = &iter->region->params; 3390 + 3391 + if (p->res->start <= spa && spa <= p->res->end) { 3392 + if (!p->cache_size) 3393 + return ~0ULL; 3394 + 3395 + if (spa >= p->res->start + p->cache_size) 3396 + return spa - p->cache_size; 3397 + 3398 + return spa + p->cache_size; 3399 + } 3400 + } 3401 + 3402 + return ~0ULL; 3403 + } 3404 + EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL"); 3405 + 3446 3406 static int is_system_ram(struct resource *res, void *arg) 3447 3407 { 3448 3408 struct cxl_region *cxlr = arg; ··· 3536 3440 return rc; 3537 3441 3538 3442 switch (cxlr->mode) { 3539 - case CXL_DECODER_PMEM: 3443 + case CXL_PARTMODE_PMEM: 3540 3444 return devm_cxl_add_pmem_region(cxlr); 3541 - case CXL_DECODER_RAM: 3445 + case CXL_PARTMODE_RAM: 3542 3446 /* 3543 3447 * The region can not be manged by CXL if any portion of 3544 3448 * it is already online as 'System RAM'

+71 -10

drivers/cxl/core/trace.h

··· 48 48 { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ 49 49 ) 50 50 51 + TRACE_EVENT(cxl_port_aer_uncorrectable_error, 52 + TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl), 53 + TP_ARGS(dev, status, fe, hl), 54 + TP_STRUCT__entry( 55 + __string(device, dev_name(dev)) 56 + __string(host, dev_name(dev->parent)) 57 + __field(u32, status) 58 + __field(u32, first_error) 59 + __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) 60 + ), 61 + TP_fast_assign( 62 + __assign_str(device); 63 + __assign_str(host); 64 + __entry->status = status; 65 + __entry->first_error = fe; 66 + /* 67 + * Embed the 512B headerlog data for user app retrieval and 68 + * parsing, but no need to print this in the trace buffer. 69 + */ 70 + memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); 71 + ), 72 + TP_printk("device=%s host=%s status: '%s' first_error: '%s'", 73 + __get_str(device), __get_str(host), 74 + show_uc_errs(__entry->status), 75 + show_uc_errs(__entry->first_error) 76 + ) 77 + ); 78 + 51 79 TRACE_EVENT(cxl_aer_uncorrectable_error, 52 80 TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl), 53 81 TP_ARGS(cxlmd, status, fe, hl), ··· 123 95 { CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" }, \ 124 96 { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ 125 97 ) 98 + 99 + TRACE_EVENT(cxl_port_aer_correctable_error, 100 + TP_PROTO(struct device *dev, u32 status), 101 + TP_ARGS(dev, status), 102 + TP_STRUCT__entry( 103 + __string(device, dev_name(dev)) 104 + __string(host, dev_name(dev->parent)) 105 + __field(u32, status) 106 + ), 107 + TP_fast_assign( 108 + __assign_str(device); 109 + __assign_str(host); 110 + __entry->status = status; 111 + ), 112 + TP_printk("device=%s host=%s status='%s'", 113 + __get_str(device), __get_str(host), 114 + show_ce_errs(__entry->status) 115 + ) 116 + ); 126 117 127 118 TRACE_EVENT(cxl_aer_correctable_error, 128 119 TP_PROTO(const struct cxl_memdev *cxlmd, u32 status), ··· 439 392 TRACE_EVENT(cxl_general_media, 440 393 441 394 TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, 442 - struct cxl_region *cxlr, u64 hpa, struct cxl_event_gen_media *rec), 395 + struct cxl_region *cxlr, u64 hpa, u64 hpa_alias0, 396 + struct cxl_event_gen_media *rec), 443 397 444 - TP_ARGS(cxlmd, log, cxlr, hpa, rec), 398 + TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias0, rec), 445 399 446 400 TP_STRUCT__entry( 447 401 CXL_EVT_TP_entry ··· 456 408 __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) 457 409 /* Following are out of order to pack trace record */ 458 410 __field(u64, hpa) 411 + __field(u64, hpa_alias0) 459 412 __field_struct(uuid_t, region_uuid) 460 413 __field(u16, validity_flags) 461 414 __field(u8, rank) ··· 487 438 CXL_EVENT_GEN_MED_COMP_ID_SIZE); 488 439 __entry->validity_flags = get_unaligned_le16(&rec->media_hdr.validity_flags); 489 440 __entry->hpa = hpa; 441 + __entry->hpa_alias0 = hpa_alias0; 490 442 if (cxlr) { 491 443 __assign_str(region_name); 492 444 uuid_copy(&__entry->region_uuid, &cxlr->params.uuid); ··· 505 455 "device=%x validity_flags='%s' " \ 506 456 "comp_id=%s comp_id_pldm_valid_flags='%s' " \ 507 457 "pldm_entity_id=%s pldm_resource_id=%s " \ 508 - "hpa=%llx region=%s region_uuid=%pUb " \ 458 + "hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb " \ 509 459 "cme_threshold_ev_flags='%s' cme_count=%u", 510 460 __entry->dpa, show_dpa_flags(__entry->dpa_flags), 511 461 show_event_desc_flags(__entry->descriptor), ··· 520 470 CXL_GMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), 521 471 show_pldm_resource_id(__entry->validity_flags, CXL_GMER_VALID_COMPONENT, 522 472 CXL_GMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), 523 - __entry->hpa, __get_str(region_name), &__entry->region_uuid, 473 + __entry->hpa, __entry->hpa_alias0, __get_str(region_name), &__entry->region_uuid, 524 474 show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags), __entry->cme_count 525 475 ) 526 476 ); ··· 579 529 TRACE_EVENT(cxl_dram, 580 530 581 531 TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, 582 - struct cxl_region *cxlr, u64 hpa, struct cxl_event_dram *rec), 532 + struct cxl_region *cxlr, u64 hpa, u64 hpa_alias0, 533 + struct cxl_event_dram *rec), 583 534 584 - TP_ARGS(cxlmd, log, cxlr, hpa, rec), 535 + TP_ARGS(cxlmd, log, cxlr, hpa, hpa_alias0, rec), 585 536 586 537 TP_STRUCT__entry( 587 538 CXL_EVT_TP_entry ··· 598 547 __field(u32, row) 599 548 __array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE) 600 549 __field(u64, hpa) 550 + __field(u64, hpa_alias0) 601 551 __field_struct(uuid_t, region_uuid) 602 552 __field(u8, rank) /* Out of order to pack trace record */ 603 553 __field(u8, bank_group) /* Out of order to pack trace record */ ··· 636 584 memcpy(__entry->cor_mask, &rec->correction_mask, 637 585 CXL_EVENT_DER_CORRECTION_MASK_SIZE); 638 586 __entry->hpa = hpa; 587 + __entry->hpa_alias0 = hpa_alias0; 639 588 if (cxlr) { 640 589 __assign_str(region_name); 641 590 uuid_copy(&__entry->region_uuid, &cxlr->params.uuid); ··· 657 604 "validity_flags='%s' " \ 658 605 "comp_id=%s comp_id_pldm_valid_flags='%s' " \ 659 606 "pldm_entity_id=%s pldm_resource_id=%s " \ 660 - "hpa=%llx region=%s region_uuid=%pUb " \ 607 + "hpa=%llx hpa_alias0=%llx region=%s region_uuid=%pUb " \ 661 608 "sub_channel=%u cme_threshold_ev_flags='%s' cvme_count=%u", 662 609 __entry->dpa, show_dpa_flags(__entry->dpa_flags), 663 610 show_event_desc_flags(__entry->descriptor), ··· 675 622 CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), 676 623 show_pldm_resource_id(__entry->validity_flags, CXL_DER_VALID_COMPONENT, 677 624 CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), 678 - __entry->hpa, __get_str(region_name), &__entry->region_uuid, 625 + __entry->hpa, __entry->hpa_alias0, __get_str(region_name), &__entry->region_uuid, 679 626 __entry->sub_channel, show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags), 680 627 __entry->cvme_count 681 628 ) ··· 923 870 __string(region, cxlr ? dev_name(&cxlr->dev) : "") 924 871 __field(u64, overflow_ts) 925 872 __field(u64, hpa) 873 + __field(u64, hpa_alias0) 926 874 __field(u64, dpa) 927 875 __field(u32, dpa_length) 928 876 __array(char, uuid, 16) ··· 946 892 memcpy(__entry->uuid, &cxlr->params.uuid, 16); 947 893 __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, 948 894 __entry->dpa); 895 + if (__entry->hpa != ULLONG_MAX && cxlr->params.cache_size) 896 + __entry->hpa_alias0 = __entry->hpa + 897 + cxlr->params.cache_size; 898 + else 899 + __entry->hpa_alias0 = ULLONG_MAX; 949 900 } else { 950 901 __assign_str(region); 951 902 memset(__entry->uuid, 0, 16); 952 903 __entry->hpa = ULLONG_MAX; 904 + __entry->hpa_alias0 = ULLONG_MAX; 953 905 } 954 906 ), 955 907 956 908 TP_printk("memdev=%s host=%s serial=%lld trace_type=%s region=%s " \ 957 - "region_uuid=%pU hpa=0x%llx dpa=0x%llx dpa_length=0x%x " \ 958 - "source=%s flags=%s overflow_time=%llu", 909 + "region_uuid=%pU hpa=0x%llx hpa_alias0=0x%llx dpa=0x%llx " \ 910 + "dpa_length=0x%x source=%s flags=%s overflow_time=%llu", 959 911 __get_str(memdev), 960 912 __get_str(host), 961 913 __entry->serial, ··· 969 909 __get_str(region), 970 910 __entry->uuid, 971 911 __entry->hpa, 912 + __entry->hpa_alias0, 972 913 __entry->dpa, 973 914 __entry->dpa_length, 974 915 show_poison_source(__entry->source),

+22 -30

drivers/cxl/cxl.h

··· 373 373 }; 374 374 375 375 /* 376 - * CXL_DECODER_DEAD prevents endpoints from being reattached to regions 377 - * while cxld_unregister() is running 378 - */ 379 - enum cxl_decoder_mode { 380 - CXL_DECODER_NONE, 381 - CXL_DECODER_RAM, 382 - CXL_DECODER_PMEM, 383 - CXL_DECODER_MIXED, 384 - CXL_DECODER_DEAD, 385 - }; 386 - 387 - static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode) 388 - { 389 - static const char * const names[] = { 390 - [CXL_DECODER_NONE] = "none", 391 - [CXL_DECODER_RAM] = "ram", 392 - [CXL_DECODER_PMEM] = "pmem", 393 - [CXL_DECODER_MIXED] = "mixed", 394 - }; 395 - 396 - if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED) 397 - return names[mode]; 398 - return "mixed"; 399 - } 400 - 401 - /* 402 376 * Track whether this decoder is reserved for region autodiscovery, or 403 377 * free for userspace provisioning. 404 378 */ ··· 386 412 * @cxld: base cxl_decoder_object 387 413 * @dpa_res: actively claimed DPA span of this decoder 388 414 * @skip: offset into @dpa_res where @cxld.hpa_range maps 389 - * @mode: which memory type / access-mode-partition this decoder targets 390 415 * @state: autodiscovery state 416 + * @part: partition index this decoder maps 391 417 * @pos: interleave position in @cxld.region 392 418 */ 393 419 struct cxl_endpoint_decoder { 394 420 struct cxl_decoder cxld; 395 421 struct resource *dpa_res; 396 422 resource_size_t skip; 397 - enum cxl_decoder_mode mode; 398 423 enum cxl_decoder_state state; 424 + int part; 399 425 int pos; 400 426 }; 401 427 ··· 467 493 * @res: allocated iomem capacity for this region 468 494 * @targets: active ordered targets in current decoder configuration 469 495 * @nr_targets: number of targets 496 + * @cache_size: extended linear cache size if exists, otherwise zero. 470 497 * 471 498 * State transitions are protected by the cxl_region_rwsem 472 499 */ ··· 479 504 struct resource *res; 480 505 struct cxl_endpoint_decoder *targets[CXL_DECODER_MAX_INTERLEAVE]; 481 506 int nr_targets; 507 + resource_size_t cache_size; 508 + }; 509 + 510 + enum cxl_partition_mode { 511 + CXL_PARTMODE_RAM, 512 + CXL_PARTMODE_PMEM, 482 513 }; 483 514 484 515 /* ··· 506 525 * struct cxl_region - CXL region 507 526 * @dev: This region's device 508 527 * @id: This region's id. Id is globally unique across all regions 509 - * @mode: Endpoint decoder allocation / access mode 528 + * @mode: Operational mode of the mapped capacity 510 529 * @type: Endpoint decoder target type 511 530 * @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown 512 531 * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge ··· 519 538 struct cxl_region { 520 539 struct device dev; 521 540 int id; 522 - enum cxl_decoder_mode mode; 541 + enum cxl_partition_mode mode; 523 542 enum cxl_decoder_type type; 524 543 struct cxl_nvdimm_bridge *cxl_nvb; 525 544 struct cxl_pmem_region *cxlr_pmem; ··· 544 563 struct device dev; 545 564 struct cxl_memdev *cxlmd; 546 565 u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */ 566 + u64 dirty_shutdowns; 547 567 }; 548 568 549 569 struct cxl_pmem_region_mapping { ··· 592 610 * @cdat: Cached CDAT data 593 611 * @cdat_available: Should a CDAT attribute be available in sysfs 594 612 * @pci_latency: Upstream latency in picoseconds 613 + * @gpf_dvsec: Cached GPF port DVSEC 595 614 */ 596 615 struct cxl_port { 597 616 struct device dev; ··· 616 633 } cdat; 617 634 bool cdat_available; 618 635 long pci_latency; 636 + int gpf_dvsec; 619 637 }; 620 638 621 639 /** ··· 859 875 int cxl_add_to_region(struct cxl_port *root, 860 876 struct cxl_endpoint_decoder *cxled); 861 877 struct cxl_dax_region *to_cxl_dax_region(struct device *dev); 878 + u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa); 862 879 #else 863 880 static inline bool is_cxl_pmem_region(struct device *dev) 864 881 { ··· 877 892 static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) 878 893 { 879 894 return NULL; 895 + } 896 + static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, 897 + u64 spa) 898 + { 899 + return 0; 880 900 } 881 901 #endif 882 902 ··· 909 919 #ifndef __mock 910 920 #define __mock static 911 921 #endif 922 + 923 + u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port); 912 924 913 925 #endif /* __CXL_H__ */

+63 -14

drivers/cxl/cxlmem.h

··· 97 97 resource_size_t base, resource_size_t len, 98 98 resource_size_t skipped); 99 99 100 + #define CXL_NR_PARTITIONS_MAX 2 101 + 102 + struct cxl_dpa_info { 103 + u64 size; 104 + struct cxl_dpa_part_info { 105 + struct range range; 106 + enum cxl_partition_mode mode; 107 + } part[CXL_NR_PARTITIONS_MAX]; 108 + int nr_partitions; 109 + }; 110 + 111 + int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info); 112 + 100 113 static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, 101 114 struct cxl_memdev *cxlmd) 102 115 { ··· 386 373 }; 387 374 388 375 /** 376 + * struct cxl_dpa_partition - DPA partition descriptor 377 + * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) 378 + * @perf: performance attributes of the partition from CDAT 379 + * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... 380 + */ 381 + struct cxl_dpa_partition { 382 + struct resource res; 383 + struct cxl_dpa_perf perf; 384 + enum cxl_partition_mode mode; 385 + }; 386 + 387 + /** 389 388 * struct cxl_dev_state - The driver device state 390 389 * 391 390 * cxl_dev_state represents the CXL driver/device state. It provides an ··· 412 387 * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) 413 388 * @media_ready: Indicate whether the device media is usable 414 389 * @dpa_res: Overall DPA resource tree for the device 415 - * @pmem_res: Active Persistent memory capacity configuration 416 - * @ram_res: Active Volatile memory capacity configuration 390 + * @part: DPA partition array 391 + * @nr_partitions: Number of DPA partitions 417 392 * @serial: PCIe Device Serial Number 418 393 * @type: Generic Memory Class device or Vendor Specific Memory device 419 394 * @cxl_mbox: CXL mailbox context ··· 428 403 bool rcd; 429 404 bool media_ready; 430 405 struct resource dpa_res; 431 - struct resource pmem_res; 432 - struct resource ram_res; 406 + struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; 407 + unsigned int nr_partitions; 433 408 u64 serial; 434 409 enum cxl_devtype type; 435 410 struct cxl_mailbox cxl_mbox; ··· 437 412 struct cxl_features_state *cxlfs; 438 413 #endif 439 414 }; 415 + 416 + static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) 417 + { 418 + /* 419 + * Static PMEM may be at partition index 0 when there is no static RAM 420 + * capacity. 421 + */ 422 + for (int i = 0; i < cxlds->nr_partitions; i++) 423 + if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) 424 + return resource_size(&cxlds->part[i].res); 425 + return 0; 426 + } 440 427 441 428 static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) 442 429 { ··· 472 435 * @partition_align_bytes: alignment size for partition-able capacity 473 436 * @active_volatile_bytes: sum of hard + soft volatile 474 437 * @active_persistent_bytes: sum of hard + soft persistent 475 - * @next_volatile_bytes: volatile capacity change pending device reset 476 - * @next_persistent_bytes: persistent capacity change pending device reset 477 - * @ram_perf: performance data entry matched to RAM partition 478 - * @pmem_perf: performance data entry matched to PMEM partition 479 438 * @event: event log driver state 480 439 * @poison: poison driver state info 481 440 * @security: security driver state info 482 441 * @fw: firmware upload / activation state 442 + * @mce_notifier: MCE notifier 483 443 * 484 444 * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for 485 445 * details on capacity parameters. ··· 491 457 u64 partition_align_bytes; 492 458 u64 active_volatile_bytes; 493 459 u64 active_persistent_bytes; 494 - u64 next_volatile_bytes; 495 - u64 next_persistent_bytes; 496 - 497 - struct cxl_dpa_perf ram_perf; 498 - struct cxl_dpa_perf pmem_perf; 499 460 500 461 struct cxl_event_state event; 501 462 struct cxl_poison_state poison; 502 463 struct cxl_security_state security; 503 464 struct cxl_fw_state fw; 465 + struct notifier_block mce_notifier; 504 466 }; 505 467 506 468 static inline struct cxl_memdev_state * ··· 690 660 691 661 #define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) 692 662 663 + /* Get Health Info Output Payload CXL 3.2 Spec 8.2.10.9.3.1 Table 8-148 */ 664 + struct cxl_mbox_get_health_info_out { 665 + u8 health_status; 666 + u8 media_status; 667 + u8 additional_status; 668 + u8 life_used; 669 + __le16 device_temperature; 670 + __le32 dirty_shutdown_cnt; 671 + __le32 corrected_volatile_error_cnt; 672 + __le32 corrected_persistent_error_cnt; 673 + } __packed; 674 + 675 + /* Set Shutdown State Input Payload CXL 3.2 Spec 8.2.10.9.3.5 Table 8-152 */ 676 + struct cxl_mbox_set_shutdown_state_in { 677 + u8 state; 678 + } __packed; 679 + 693 680 /* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */ 694 681 struct cxl_mbox_set_timestamp_in { 695 682 __le64 timestamp; ··· 832 785 int cxl_dev_state_identify(struct cxl_memdev_state *mds); 833 786 int cxl_await_media_ready(struct cxl_dev_state *cxlds); 834 787 int cxl_enumerate_cmds(struct cxl_memdev_state *mds); 835 - int cxl_mem_create_range_info(struct cxl_memdev_state *mds); 788 + int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info); 836 789 struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev); 837 790 void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, 838 791 unsigned long *cmds); ··· 843 796 enum cxl_event_log_type type, 844 797 enum cxl_event_type event_type, 845 798 const uuid_t *uuid, union cxl_event *evt); 799 + int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count); 800 + int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds); 846 801 int cxl_set_timestamp(struct cxl_memdev_state *mds); 847 802 int cxl_poison_state_init(struct cxl_memdev_state *mds); 848 803 int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,

+6

drivers/cxl/cxlpci.h

··· 40 40 41 41 /* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ 42 42 #define CXL_DVSEC_PORT_GPF 4 43 + #define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C 44 + #define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0) 45 + #define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8) 46 + #define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE 47 + #define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0) 48 + #define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8) 43 49 44 50 /* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ 45 51 #define CXL_DVSEC_DEVICE_GPF 5

+1 -1

drivers/cxl/mem.c

··· 152 152 return -ENXIO; 153 153 } 154 154 155 - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { 155 + if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) { 156 156 rc = devm_cxl_add_nvdimm(parent_port, cxlmd); 157 157 if (rc) { 158 158 if (rc == -ENODEV)

+6 -1

drivers/cxl/pci.c

··· 903 903 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 904 904 { 905 905 struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); 906 + struct cxl_dpa_info range_info = { 0 }; 906 907 struct cxl_memdev_state *mds; 907 908 struct cxl_dev_state *cxlds; 908 909 struct cxl_register_map map; ··· 994 993 if (rc) 995 994 return rc; 996 995 997 - rc = cxl_mem_create_range_info(mds); 996 + rc = cxl_mem_dpa_fetch(mds, &range_info); 997 + if (rc) 998 + return rc; 999 + 1000 + rc = cxl_dpa_setup(cxlds, &range_info); 998 1001 if (rc) 999 1002 return rc; 1000 1003

+79 -2

drivers/cxl/pmem.c

··· 42 42 } 43 43 static DEVICE_ATTR_RO(id); 44 44 45 + static ssize_t dirty_shutdown_show(struct device *dev, 46 + struct device_attribute *attr, char *buf) 47 + { 48 + struct nvdimm *nvdimm = to_nvdimm(dev); 49 + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); 50 + 51 + return sysfs_emit(buf, "%llu\n", cxl_nvd->dirty_shutdowns); 52 + } 53 + static DEVICE_ATTR_RO(dirty_shutdown); 54 + 45 55 static struct attribute *cxl_dimm_attributes[] = { 46 56 &dev_attr_id.attr, 47 57 &dev_attr_provider.attr, 58 + &dev_attr_dirty_shutdown.attr, 48 59 NULL 49 60 }; 61 + 62 + #define CXL_INVALID_DIRTY_SHUTDOWN_COUNT ULLONG_MAX 63 + static umode_t cxl_dimm_visible(struct kobject *kobj, 64 + struct attribute *a, int n) 65 + { 66 + if (a == &dev_attr_dirty_shutdown.attr) { 67 + struct device *dev = kobj_to_dev(kobj); 68 + struct nvdimm *nvdimm = to_nvdimm(dev); 69 + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); 70 + 71 + if (cxl_nvd->dirty_shutdowns == 72 + CXL_INVALID_DIRTY_SHUTDOWN_COUNT) 73 + return 0; 74 + } 75 + 76 + return a->mode; 77 + } 50 78 51 79 static const struct attribute_group cxl_dimm_attribute_group = { 52 80 .name = "cxl", 53 81 .attrs = cxl_dimm_attributes, 82 + .is_visible = cxl_dimm_visible 54 83 }; 55 84 56 85 static const struct attribute_group *cxl_dimm_attribute_groups[] = { 57 86 &cxl_dimm_attribute_group, 58 87 NULL 59 88 }; 89 + 90 + static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd) 91 + { 92 + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; 93 + struct cxl_dev_state *cxlds = cxlmd->cxlds; 94 + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 95 + struct device *dev = &cxl_nvd->dev; 96 + u32 count; 97 + 98 + /* 99 + * Dirty tracking is enabled and exposed to the user, only when: 100 + * - dirty shutdown on the device can be set, and, 101 + * - the device has a Device GPF DVSEC (albeit unused), and, 102 + * - the Get Health Info cmd can retrieve the device's dirty count. 103 + */ 104 + cxl_nvd->dirty_shutdowns = CXL_INVALID_DIRTY_SHUTDOWN_COUNT; 105 + 106 + if (cxl_arm_dirty_shutdown(mds)) { 107 + dev_warn(dev, "GPF: could not set dirty shutdown state\n"); 108 + return; 109 + } 110 + 111 + if (!cxl_gpf_get_dvsec(cxlds->dev, false)) 112 + return; 113 + 114 + if (cxl_get_dirty_count(mds, &count)) { 115 + dev_warn(dev, "GPF: could not retrieve dirty count\n"); 116 + return; 117 + } 118 + 119 + cxl_nvd->dirty_shutdowns = count; 120 + } 60 121 61 122 static int cxl_nvdimm_probe(struct device *dev) 62 123 { ··· 139 78 set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); 140 79 set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); 141 80 set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); 81 + 82 + /* 83 + * Set dirty shutdown now, with the expectation that the device 84 + * clear it upon a successful GPF flow. The exception to this 85 + * is upon Viral detection, per CXL 3.2 section 12.4.2. 86 + */ 87 + cxl_nvdimm_arm_dirty_shutdown_tracking(cxl_nvd); 88 + 142 89 nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, 143 90 cxl_dimm_attribute_groups, flags, 144 91 cmd_mask, 0, NULL, cxl_nvd->dev_id, ··· 444 375 goto out_nvd; 445 376 } 446 377 378 + if (cxlds->serial == 0) { 379 + /* include missing alongside invalid in this error message. */ 380 + dev_err(dev, "%s: invalid or missing serial number\n", 381 + dev_name(&cxlmd->dev)); 382 + rc = -ENXIO; 383 + goto out_nvd; 384 + } 385 + info[i].serial = cxlds->serial; 386 + info[i].offset = m->start; 387 + 447 388 m->cxl_nvd = cxl_nvd; 448 389 mappings[i] = (struct nd_mapping_desc) { 449 390 .nvdimm = nvdimm, ··· 461 382 .size = m->size, 462 383 .position = i, 463 384 }; 464 - info[i].offset = m->start; 465 - info[i].serial = cxlds->serial; 466 385 } 467 386 ndr_desc.num_mappings = cxlr_pmem->nr_mappings; 468 387 ndr_desc.mapping = mappings;

+3 -3

drivers/firmware/efi/cper.c

··· 24 24 #include <linux/bcd.h> 25 25 #include <acpi/ghes.h> 26 26 #include <ras/ras_event.h> 27 - #include "cper_cxl.h" 27 + #include <cxl/event.h> 28 28 29 29 /* 30 30 * CPER record ID need to be unique even after reboot, because record ··· 624 624 else 625 625 goto err_section_too_small; 626 626 } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) { 627 - struct cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); 627 + struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); 628 628 629 629 printk("%ssection_type: CXL Protocol Error\n", newpfx); 630 630 if (gdata->error_data_length >= sizeof(*prot_err)) 631 - cper_print_prot_err(newpfx, prot_err); 631 + cxl_cper_print_prot_err(newpfx, prot_err); 632 632 else 633 633 goto err_section_too_small; 634 634 } else {

+3 -36

drivers/firmware/efi/cper_cxl.c

··· 8 8 */ 9 9 10 10 #include <linux/cper.h> 11 - #include "cper_cxl.h" 12 - 13 - #define PROT_ERR_VALID_AGENT_TYPE BIT_ULL(0) 14 - #define PROT_ERR_VALID_AGENT_ADDRESS BIT_ULL(1) 15 - #define PROT_ERR_VALID_DEVICE_ID BIT_ULL(2) 16 - #define PROT_ERR_VALID_SERIAL_NUMBER BIT_ULL(3) 17 - #define PROT_ERR_VALID_CAPABILITY BIT_ULL(4) 18 - #define PROT_ERR_VALID_DVSEC BIT_ULL(5) 19 - #define PROT_ERR_VALID_ERROR_LOG BIT_ULL(6) 20 - 21 - /* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */ 22 - struct cxl_ras_capability_regs { 23 - u32 uncor_status; 24 - u32 uncor_mask; 25 - u32 uncor_severity; 26 - u32 cor_status; 27 - u32 cor_mask; 28 - u32 cap_control; 29 - u32 header_log[16]; 30 - }; 11 + #include <cxl/event.h> 31 12 32 13 static const char * const prot_err_agent_type_strs[] = { 33 14 "Restricted CXL Device", ··· 21 40 "CXL Upstream Switch Port", 22 41 }; 23 42 24 - /* 25 - * The layout of the enumeration and the values matches CXL Agent Type 26 - * field in the UEFI 2.10 Section N.2.13, 27 - */ 28 - enum { 29 - RCD, /* Restricted CXL Device */ 30 - RCH_DP, /* Restricted CXL Host Downstream Port */ 31 - DEVICE, /* CXL Device */ 32 - LD, /* CXL Logical Device */ 33 - FMLD, /* CXL Fabric Manager managed Logical Device */ 34 - RP, /* CXL Root Port */ 35 - DSP, /* CXL Downstream Switch Port */ 36 - USP, /* CXL Upstream Switch Port */ 37 - }; 38 - 39 - void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err) 43 + void cxl_cper_print_prot_err(const char *pfx, 44 + const struct cxl_cper_sec_prot_err *prot_err) 40 45 { 41 46 if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_TYPE) 42 47 pr_info("%s agent_type: %d, %s\n", pfx, prot_err->agent_type,

-66

drivers/firmware/efi/cper_cxl.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0-only */ 2 - /* 3 - * UEFI Common Platform Error Record (CPER) support for CXL Section. 4 - * 5 - * Copyright (C) 2022 Advanced Micro Devices, Inc. 6 - * 7 - * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> 8 - */ 9 - 10 - #ifndef LINUX_CPER_CXL_H 11 - #define LINUX_CPER_CXL_H 12 - 13 - /* CXL Protocol Error Section */ 14 - #define CPER_SEC_CXL_PROT_ERR \ 15 - GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ 16 - 0x4B, 0x77, 0x10, 0x48) 17 - 18 - #pragma pack(1) 19 - 20 - /* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */ 21 - struct cper_sec_prot_err { 22 - u64 valid_bits; 23 - u8 agent_type; 24 - u8 reserved[7]; 25 - 26 - /* 27 - * Except for RCH Downstream Port, all the remaining CXL Agent 28 - * types are uniquely identified by the PCIe compatible SBDF number. 29 - */ 30 - union { 31 - u64 rcrb_base_addr; 32 - struct { 33 - u8 function; 34 - u8 device; 35 - u8 bus; 36 - u16 segment; 37 - u8 reserved_1[3]; 38 - }; 39 - } agent_addr; 40 - 41 - struct { 42 - u16 vendor_id; 43 - u16 device_id; 44 - u16 subsystem_vendor_id; 45 - u16 subsystem_id; 46 - u8 class_code[2]; 47 - u16 slot; 48 - u8 reserved_1[4]; 49 - } device_id; 50 - 51 - struct { 52 - u32 lower_dw; 53 - u32 upper_dw; 54 - } dev_serial_num; 55 - 56 - u8 capability[60]; 57 - u16 dvsec_len; 58 - u16 err_len; 59 - u8 reserved_2[4]; 60 - }; 61 - 62 - #pragma pack() 63 - 64 - void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err); 65 - 66 - #endif //__CPER_CXL_

+101

include/cxl/event.h

··· 164 164 struct cxl_cper_event_rec rec; 165 165 }; 166 166 167 + #define PROT_ERR_VALID_AGENT_TYPE BIT_ULL(0) 168 + #define PROT_ERR_VALID_AGENT_ADDRESS BIT_ULL(1) 169 + #define PROT_ERR_VALID_DEVICE_ID BIT_ULL(2) 170 + #define PROT_ERR_VALID_SERIAL_NUMBER BIT_ULL(3) 171 + #define PROT_ERR_VALID_CAPABILITY BIT_ULL(4) 172 + #define PROT_ERR_VALID_DVSEC BIT_ULL(5) 173 + #define PROT_ERR_VALID_ERROR_LOG BIT_ULL(6) 174 + 175 + /* 176 + * The layout of the enumeration and the values matches CXL Agent Type 177 + * field in the UEFI 2.10 Section N.2.13, 178 + */ 179 + enum { 180 + RCD, /* Restricted CXL Device */ 181 + RCH_DP, /* Restricted CXL Host Downstream Port */ 182 + DEVICE, /* CXL Device */ 183 + LD, /* CXL Logical Device */ 184 + FMLD, /* CXL Fabric Manager managed Logical Device */ 185 + RP, /* CXL Root Port */ 186 + DSP, /* CXL Downstream Switch Port */ 187 + USP, /* CXL Upstream Switch Port */ 188 + }; 189 + 190 + #pragma pack(1) 191 + 192 + /* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */ 193 + struct cxl_cper_sec_prot_err { 194 + u64 valid_bits; 195 + u8 agent_type; 196 + u8 reserved[7]; 197 + 198 + /* 199 + * Except for RCH Downstream Port, all the remaining CXL Agent 200 + * types are uniquely identified by the PCIe compatible SBDF number. 201 + */ 202 + union { 203 + u64 rcrb_base_addr; 204 + struct { 205 + u8 function; 206 + u8 device; 207 + u8 bus; 208 + u16 segment; 209 + u8 reserved_1[3]; 210 + }; 211 + } agent_addr; 212 + 213 + struct { 214 + u16 vendor_id; 215 + u16 device_id; 216 + u16 subsystem_vendor_id; 217 + u16 subsystem_id; 218 + u8 class_code[2]; 219 + u16 slot; 220 + u8 reserved_1[4]; 221 + } device_id; 222 + 223 + struct { 224 + u32 lower_dw; 225 + u32 upper_dw; 226 + } dev_serial_num; 227 + 228 + u8 capability[60]; 229 + u16 dvsec_len; 230 + u16 err_len; 231 + u8 reserved_2[4]; 232 + }; 233 + 234 + #pragma pack() 235 + 236 + /* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */ 237 + struct cxl_ras_capability_regs { 238 + u32 uncor_status; 239 + u32 uncor_mask; 240 + u32 uncor_severity; 241 + u32 cor_status; 242 + u32 cor_mask; 243 + u32 cap_control; 244 + u32 header_log[16]; 245 + }; 246 + 247 + struct cxl_cper_prot_err_work_data { 248 + struct cxl_cper_sec_prot_err prot_err; 249 + struct cxl_ras_capability_regs ras_cap; 250 + int severity; 251 + }; 252 + 167 253 #ifdef CONFIG_ACPI_APEI_GHES 168 254 int cxl_cper_register_work(struct work_struct *work); 169 255 int cxl_cper_unregister_work(struct work_struct *work); 170 256 int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd); 257 + int cxl_cper_register_prot_err_work(struct work_struct *work); 258 + int cxl_cper_unregister_prot_err_work(struct work_struct *work); 259 + int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd); 171 260 #else 172 261 static inline int cxl_cper_register_work(struct work_struct *work) 173 262 { ··· 268 179 return 0; 269 180 } 270 181 static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) 182 + { 183 + return 0; 184 + } 185 + static inline int cxl_cper_register_prot_err_work(struct work_struct *work) 186 + { 187 + return 0; 188 + } 189 + static inline int cxl_cper_unregister_prot_err_work(struct work_struct *work) 190 + { 191 + return 0; 192 + } 193 + static inline int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd) 271 194 { 272 195 return 0; 273 196 }

+11

include/linux/acpi.h

··· 1094 1094 1095 1095 #endif /* !CONFIG_ACPI */ 1096 1096 1097 + #ifdef CONFIG_ACPI_HMAT 1098 + int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid, 1099 + resource_size_t *size); 1100 + #else 1101 + static inline int hmat_get_extended_linear_cache_size(struct resource *backing_res, 1102 + int nid, resource_size_t *size) 1103 + { 1104 + return -EOPNOTSUPP; 1105 + } 1106 + #endif 1107 + 1097 1108 extern void arch_post_acpi_subsys_init(void); 1098 1109 1099 1110 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC

+8

include/linux/cper.h

··· 89 89 #define CPER_NOTIFY_DMAR \ 90 90 GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 91 91 0x72, 0x2D, 0xEB, 0x41) 92 + /* CXL Protocol Error Section */ 93 + #define CPER_SEC_CXL_PROT_ERR \ 94 + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ 95 + 0x4B, 0x77, 0x10, 0x48) 92 96 93 97 /* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ 94 98 /* ··· 604 600 const struct acpi_hest_generic_status *estatus); 605 601 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus); 606 602 int cper_estatus_check(const struct acpi_hest_generic_status *estatus); 603 + 604 + struct cxl_cper_sec_prot_err; 605 + void cxl_cper_print_prot_err(const char *pfx, 606 + const struct cxl_cper_sec_prot_err *prot_err); 607 607 608 608 #endif

+7

include/linux/node.h

··· 57 57 NODE_CACHE_WRITE_OTHER, 58 58 }; 59 59 60 + enum cache_mode { 61 + NODE_CACHE_ADDR_MODE_RESERVED, 62 + NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR, 63 + }; 64 + 60 65 /** 61 66 * struct node_cache_attrs - system memory caching attributes 62 67 * ··· 70 65 * @size: Total size of cache in bytes 71 66 * @line_size: Number of bytes fetched on a cache miss 72 67 * @level: The cache hierarchy level 68 + * @address_mode: The address mode 73 69 */ 74 70 struct node_cache_attrs { 75 71 enum cache_indexing indexing; ··· 78 72 u64 size; 79 73 u16 line_size; 80 74 u8 level; 75 + u16 address_mode; 81 76 }; 82 77 83 78 #ifdef CONFIG_HMEM_REPORTING

+3

tools/testing/cxl/Kbuild

··· 61 61 cxl_core-y += $(CXL_CORE_SRC)/hdm.o 62 62 cxl_core-y += $(CXL_CORE_SRC)/pmu.o 63 63 cxl_core-y += $(CXL_CORE_SRC)/cdat.o 64 + cxl_core-y += $(CXL_CORE_SRC)/ras.o 65 + cxl_core-y += $(CXL_CORE_SRC)/acpi.o 64 66 cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o 65 67 cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o 68 + cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o 66 69 cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o 67 70 cxl_core-y += config_check.o 68 71 cxl_core-y += cxl_core_test.o

+14 -18

tools/testing/cxl/test/cxl.c

··· 155 155 } cfmws7; 156 156 struct { 157 157 struct acpi_cedt_cfmws cfmws; 158 - u32 target[4]; 158 + u32 target[3]; 159 159 } cfmws8; 160 160 struct { 161 161 struct acpi_cedt_cxims cxims; ··· 331 331 .length = sizeof(mock_cedt.cfmws8), 332 332 }, 333 333 .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, 334 - .interleave_ways = 2, 335 - .granularity = 0, 334 + .interleave_ways = 8, 335 + .granularity = 1, 336 336 .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | 337 337 ACPI_CEDT_CFMWS_RESTRICT_PMEM, 338 338 .qtg_id = FAKE_QTG_ID, 339 - .window_size = SZ_256M * 16UL, 339 + .window_size = SZ_512M * 6UL, 340 340 }, 341 - .target = { 0, 1, 0, 1, }, 341 + .target = { 0, 1, 2, }, 342 342 }, 343 343 .cxims0 = { 344 344 .cxims = { ··· 1000 1000 find_cxl_root(port); 1001 1001 struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); 1002 1002 struct cxl_dev_state *cxlds = cxlmd->cxlds; 1003 - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 1004 1003 struct access_coordinate ep_c[ACCESS_COORDINATE_MAX]; 1005 - struct range pmem_range = { 1006 - .start = cxlds->pmem_res.start, 1007 - .end = cxlds->pmem_res.end, 1008 - }; 1009 - struct range ram_range = { 1010 - .start = cxlds->ram_res.start, 1011 - .end = cxlds->ram_res.end, 1012 - }; 1013 1004 1014 1005 if (!cxl_root) 1015 1006 return; 1016 1007 1017 - if (range_len(&ram_range)) 1018 - dpa_perf_setup(port, &ram_range, &mds->ram_perf); 1008 + for (int i = 0; i < cxlds->nr_partitions; i++) { 1009 + struct resource *res = &cxlds->part[i].res; 1010 + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; 1011 + struct range range = { 1012 + .start = res->start, 1013 + .end = res->end, 1014 + }; 1019 1015 1020 - if (range_len(&pmem_range)) 1021 - dpa_perf_setup(port, &pmem_range, &mds->pmem_perf); 1016 + dpa_perf_setup(port, &range, perf); 1017 + } 1022 1018 1023 1019 cxl_memdev_update_perf(cxlmd); 1024 1020

+30 -2

tools/testing/cxl/test/mem.c

··· 78 78 .effect = CXL_CMD_EFFECT_NONE, 79 79 }, 80 80 { 81 + .opcode = cpu_to_le16(CXL_MBOX_OP_SET_SHUTDOWN_STATE), 82 + .effect = POLICY_CHANGE_IMMEDIATE, 83 + }, 84 + { 81 85 .opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON), 82 86 .effect = CXL_CMD_EFFECT_NONE, 83 87 }, ··· 182 178 u64 timestamp; 183 179 unsigned long sanitize_timeout; 184 180 struct vendor_test_feat test_feat; 181 + u8 shutdown_state; 185 182 }; 186 183 187 184 static struct mock_event_log *event_find_log(struct device *dev, int log_type) ··· 1110 1105 return 0; 1111 1106 } 1112 1107 1108 + static int mock_set_shutdown_state(struct cxl_mockmem_data *mdata, 1109 + struct cxl_mbox_cmd *cmd) 1110 + { 1111 + struct cxl_mbox_set_shutdown_state_in *ss = cmd->payload_in; 1112 + 1113 + if (cmd->size_in != sizeof(*ss)) 1114 + return -EINVAL; 1115 + 1116 + if (cmd->size_out != 0) 1117 + return -EINVAL; 1118 + 1119 + mdata->shutdown_state = ss->state; 1120 + return 0; 1121 + } 1122 + 1113 1123 static struct mock_poison { 1114 1124 struct cxl_dev_state *cxlds; 1115 1125 u64 dpa; ··· 1603 1583 case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE: 1604 1584 rc = mock_passphrase_secure_erase(mdata, cmd); 1605 1585 break; 1586 + case CXL_MBOX_OP_SET_SHUTDOWN_STATE: 1587 + rc = mock_set_shutdown_state(mdata, cmd); 1588 + break; 1606 1589 case CXL_MBOX_OP_GET_POISON: 1607 1590 rc = mock_get_poison(cxlds, cmd); 1608 1591 break; ··· 1693 1670 struct cxl_dev_state *cxlds; 1694 1671 struct cxl_mockmem_data *mdata; 1695 1672 struct cxl_mailbox *cxl_mbox; 1673 + struct cxl_dpa_info range_info = { 0 }; 1696 1674 int rc; 1697 1675 1698 1676 mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); ··· 1733 1709 mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; 1734 1710 INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); 1735 1711 1736 - cxlds->serial = pdev->id; 1712 + cxlds->serial = pdev->id + 1; 1737 1713 if (is_rcd(pdev)) 1738 1714 cxlds->rcd = true; 1739 1715 ··· 1754 1730 if (rc) 1755 1731 return rc; 1756 1732 1757 - rc = cxl_mem_create_range_info(mds); 1733 + rc = cxl_mem_dpa_fetch(mds, &range_info); 1734 + if (rc) 1735 + return rc; 1736 + 1737 + rc = cxl_dpa_setup(cxlds, &range_info); 1758 1738 if (rc) 1759 1739 return rc; 1760 1740