cxl: Restore XOR'd position bits during address translation

When a device reports a DPA in events like poison, general_media,
and dram, the driver translates that DPA back to an HPA. Presently,
the CXL driver translation only considers the Modulo position and
will report the wrong HPA for XOR configured root decoders.

Add a helper function that restores the XOR'd bits during DPA->HPA
address translation. Plumb a root decoder callback to the new helper
when XOR interleave arithmetic is in use. For Modulo arithmetic, just
let the callback be NULL - as in no extra work required.

Upon completion of a DPA->HPA translation a couple of checks are
performed on the result. One simply confirms that the calculated
HPA is within the address range of the region. That test is useful
for both Modulo and XOR interleave arithmetic decodes.

A second check confirms that the HPA is within an expected chunk
based on the endpoints position in the region and the region
granularity. An XOR decode disrupts the Modulo pattern making the
chunk check useless.

To align the checks with the proper decode, pull the region range
check inline and use the helper to do the chunk check for Modulo
decodes only.

A cxl-test unit test is posted for upstream review here:
https://lore.kernel.org/20240624210644.495563-1-alison.schofield@intel.com/

Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events")
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Tested-by: Diego Garcia Rodriguez <diego.garcia.rodriguez@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/1a1ac880d9f889bd6384e657e810431b9a0a72e5.1719980933.git.alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>

authored by Alison Schofield and committed by Dave Jiang 3b2fedcd 9aa5f623

+57 -9
+40
drivers/cxl/acpi.c
··· 74 74 return cxlrd->cxlsd.target[n]; 75 75 } 76 76 77 + static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) 78 + { 79 + struct cxl_cxims_data *cximsd = cxlrd->platform_data; 80 + int hbiw = cxlrd->cxlsd.nr_targets; 81 + u64 val; 82 + int pos; 83 + 84 + /* No xormaps for host bridge interleave ways of 1 or 3 */ 85 + if (hbiw == 1 || hbiw == 3) 86 + return hpa; 87 + 88 + /* 89 + * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore 90 + * the position bit to its value before the xormap was applied at 91 + * HPA->DPA translation. 92 + * 93 + * pos is the lowest set bit in an XORMAP 94 + * val is the XORALLBITS(HPA & XORMAP) 95 + * 96 + * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS 97 + * as an operation that outputs a single bit by XORing all the 98 + * bits in the input (hpa & xormap). Implement XORALLBITS using 99 + * hweight64(). If the hamming weight is even the XOR of those 100 + * bits results in val==0, if odd the XOR result is val==1. 101 + */ 102 + 103 + for (int i = 0; i < cximsd->nr_maps; i++) { 104 + if (!cximsd->xormaps[i]) 105 + continue; 106 + pos = __ffs(cximsd->xormaps[i]); 107 + val = (hweight64(hpa & cximsd->xormaps[i]) & 1); 108 + hpa = (hpa & ~(1ULL << pos)) | (val << pos); 109 + } 110 + 111 + return hpa; 112 + } 113 + 77 114 struct cxl_cxims_context { 78 115 struct device *dev; 79 116 struct cxl_root_decoder *cxlrd; ··· 470 433 } 471 434 472 435 cxlrd->qos_class = cfmws->qtg_id; 436 + 437 + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) 438 + cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa; 473 439 474 440 rc = cxl_decoder_add(cxld, target_map); 475 441 if (rc)
+14 -9
drivers/cxl/core/region.c
··· 2723 2723 return ctx.cxlr; 2724 2724 } 2725 2725 2726 - static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) 2726 + static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) 2727 2727 { 2728 2728 struct cxl_region_params *p = &cxlr->params; 2729 2729 int gran = p->interleave_granularity; 2730 2730 int ways = p->interleave_ways; 2731 2731 u64 offset; 2732 - 2733 - /* Is the hpa within this region at all */ 2734 - if (hpa < p->res->start || hpa > p->res->end) { 2735 - dev_dbg(&cxlr->dev, 2736 - "Addr trans fail: hpa 0x%llx not in region\n", hpa); 2737 - return false; 2738 - } 2739 2732 2740 2733 /* Is the hpa in an expected chunk for its pos(-ition) */ 2741 2734 offset = hpa - p->res->start; ··· 2745 2752 u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, 2746 2753 u64 dpa) 2747 2754 { 2755 + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); 2748 2756 u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; 2749 2757 struct cxl_region_params *p = &cxlr->params; 2750 2758 struct cxl_endpoint_decoder *cxled = NULL; ··· 2795 2801 /* Apply the hpa_offset to the region base address */ 2796 2802 hpa = hpa_offset + p->res->start; 2797 2803 2798 - if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos)) 2804 + /* Root decoder translation overrides typical modulo decode */ 2805 + if (cxlrd->hpa_to_spa) 2806 + hpa = cxlrd->hpa_to_spa(cxlrd, hpa); 2807 + 2808 + if (hpa < p->res->start || hpa > p->res->end) { 2809 + dev_dbg(&cxlr->dev, 2810 + "Addr trans fail: hpa 0x%llx not in region\n", hpa); 2811 + return ULLONG_MAX; 2812 + } 2813 + 2814 + /* Simple chunk check, by pos & gran, only applies to modulo decodes */ 2815 + if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) 2799 2816 return ULLONG_MAX; 2800 2817 2801 2818 return hpa;
+3
drivers/cxl/cxl.h
··· 434 434 struct cxl_root_decoder; 435 435 typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, 436 436 int pos); 437 + typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); 437 438 438 439 /** 439 440 * struct cxl_root_decoder - Static platform CXL address decoder 440 441 * @res: host / parent resource for region allocations 441 442 * @region_id: region id for next region provisioning event 442 443 * @calc_hb: which host bridge covers the n'th position by granularity 444 + * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address 443 445 * @platform_data: platform specific configuration data 444 446 * @range_lock: sync region autodiscovery by address range 445 447 * @qos_class: QoS performance class cookie ··· 451 449 struct resource *res; 452 450 atomic_t region_id; 453 451 cxl_calc_hb_fn calc_hb; 452 + cxl_hpa_to_spa_fn hpa_to_spa; 454 453 void *platform_data; 455 454 struct mutex range_lock; 456 455 int qos_class;