Merge branch 'for-6.11/xor_fixes' into cxl-for-next

Series to fix XOR math for DPA to SPA translation
- Refactor and fold cxl_trace_hpa() into cxl_dpa_to_hpa()
- Complete DPA->HPA->SPA translation and correct XOR translation issue
- Add new method to verify a CXL target position
- Remove old method of CXL target position verifiation

+77 -113
+34 -50
drivers/cxl/acpi.c
··· 22 22 GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 23 23 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); 24 24 25 - /* 26 - * Find a targets entry (n) in the host bridge interleave list. 27 - * CXL Specification 3.0 Table 9-22 28 - */ 29 - static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw, 30 - int ig) 31 - { 32 - int i = 0, n = 0; 33 - u8 eiw; 34 25 35 - /* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */ 36 - if (iw != 3) { 37 - for (i = 0; i < cximsd->nr_maps; i++) 38 - n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i; 39 - } 40 - /* IW: 3,6,12 add a modulo calculation to 'n' */ 41 - if (!is_power_of_2(iw)) { 42 - if (ways_to_eiw(iw, &eiw)) 43 - return -1; 44 - hpa &= GENMASK_ULL(51, eiw + ig); 45 - n |= do_div(hpa, 3) << i; 46 - } 47 - return n; 48 - } 49 - 50 - static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) 26 + static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) 51 27 { 52 28 struct cxl_cxims_data *cximsd = cxlrd->platform_data; 53 - struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; 54 - struct cxl_decoder *cxld = &cxlsd->cxld; 55 - int ig = cxld->interleave_granularity; 56 - int iw = cxld->interleave_ways; 57 - int n = 0; 58 - u64 hpa; 29 + int hbiw = cxlrd->cxlsd.nr_targets; 30 + u64 val; 31 + int pos; 59 32 60 - if (dev_WARN_ONCE(&cxld->dev, 61 - cxld->interleave_ways != cxlsd->nr_targets, 62 - "misconfigured root decoder\n")) 63 - return NULL; 33 + /* No xormaps for host bridge interleave ways of 1 or 3 */ 34 + if (hbiw == 1 || hbiw == 3) 35 + return hpa; 64 36 65 - hpa = cxlrd->res->start + pos * ig; 37 + /* 38 + * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore 39 + * the position bit to its value before the xormap was applied at 40 + * HPA->DPA translation. 41 + * 42 + * pos is the lowest set bit in an XORMAP 43 + * val is the XORALLBITS(HPA & XORMAP) 44 + * 45 + * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS 46 + * as an operation that outputs a single bit by XORing all the 47 + * bits in the input (hpa & xormap). Implement XORALLBITS using 48 + * hweight64(). If the hamming weight is even the XOR of those 49 + * bits results in val==0, if odd the XOR result is val==1. 50 + */ 66 51 67 - /* Entry (n) is 0 for no interleave (iw == 1) */ 68 - if (iw != 1) 69 - n = cxl_xor_calc_n(hpa, cximsd, iw, ig); 52 + for (int i = 0; i < cximsd->nr_maps; i++) { 53 + if (!cximsd->xormaps[i]) 54 + continue; 55 + pos = __ffs(cximsd->xormaps[i]); 56 + val = (hweight64(hpa & cximsd->xormaps[i]) & 1); 57 + hpa = (hpa & ~(1ULL << pos)) | (val << pos); 58 + } 70 59 71 - if (n < 0) 72 - return NULL; 73 - 74 - return cxlrd->cxlsd.target[n]; 60 + return hpa; 75 61 } 76 62 77 63 struct cxl_cxims_context { ··· 347 361 struct cxl_port *root_port = ctx->root_port; 348 362 struct cxl_cxims_context cxims_ctx; 349 363 struct device *dev = ctx->dev; 350 - cxl_calc_hb_fn cxl_calc_hb; 351 364 struct cxl_decoder *cxld; 352 365 unsigned int ways, i, ig; 353 366 int rc; ··· 374 389 if (rc) 375 390 return rc; 376 391 377 - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) 378 - cxl_calc_hb = cxl_hb_modulo; 379 - else 380 - cxl_calc_hb = cxl_hb_xor; 381 - 382 392 struct cxl_root_decoder *cxlrd __free(put_cxlrd) = 383 - cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); 393 + cxl_root_decoder_alloc(root_port, ways); 394 + 384 395 if (IS_ERR(cxlrd)) 385 396 return PTR_ERR(cxlrd); 386 397 ··· 414 433 } 415 434 416 435 cxlrd->qos_class = cfmws->qtg_id; 436 + 437 + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) 438 + cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa; 417 439 418 440 rc = cxl_decoder_add(cxld, target_map); 419 441 if (rc)
+4 -4
drivers/cxl/core/core.h
··· 28 28 void cxl_region_exit(void); 29 29 int cxl_get_poison_by_endpoint(struct cxl_port *port); 30 30 struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa); 31 - u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, 32 - u64 dpa); 31 + u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, 32 + u64 dpa); 33 33 34 34 #else 35 - static inline u64 36 - cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) 35 + static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, 36 + const struct cxl_memdev *cxlmd, u64 dpa) 37 37 { 38 38 return ULLONG_MAX; 39 39 }
+1 -1
drivers/cxl/core/mbox.c
··· 878 878 dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK; 879 879 cxlr = cxl_dpa_to_region(cxlmd, dpa); 880 880 if (cxlr) 881 - hpa = cxl_trace_hpa(cxlr, cxlmd, dpa); 881 + hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa); 882 882 883 883 if (event_type == CXL_CPER_EVENT_GEN_MEDIA) 884 884 trace_cxl_general_media(cxlmd, type, cxlr, hpa,
+1 -19
drivers/cxl/core/port.c
··· 1733 1733 return 0; 1734 1734 } 1735 1735 1736 - struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos) 1737 - { 1738 - struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; 1739 - struct cxl_decoder *cxld = &cxlsd->cxld; 1740 - int iw; 1741 - 1742 - iw = cxld->interleave_ways; 1743 - if (dev_WARN_ONCE(&cxld->dev, iw != cxlsd->nr_targets, 1744 - "misconfigured root decoder\n")) 1745 - return NULL; 1746 - 1747 - return cxlrd->cxlsd.target[pos % iw]; 1748 - } 1749 - EXPORT_SYMBOL_NS_GPL(cxl_hb_modulo, CXL); 1750 - 1751 1736 static struct lock_class_key cxl_decoder_key; 1752 1737 1753 1738 /** ··· 1792 1807 * cxl_root_decoder_alloc - Allocate a root level decoder 1793 1808 * @port: owning CXL root of this decoder 1794 1809 * @nr_targets: static number of downstream targets 1795 - * @calc_hb: which host bridge covers the n'th position by granularity 1796 1810 * 1797 1811 * Return: A new cxl decoder to be registered by cxl_decoder_add(). A 1798 1812 * 'CXL root' decoder is one that decodes from a top-level / static platform ··· 1799 1815 * topology. 1800 1816 */ 1801 1817 struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, 1802 - unsigned int nr_targets, 1803 - cxl_calc_hb_fn calc_hb) 1818 + unsigned int nr_targets) 1804 1819 { 1805 1820 struct cxl_root_decoder *cxlrd; 1806 1821 struct cxl_switch_decoder *cxlsd; ··· 1821 1838 return ERR_PTR(rc); 1822 1839 } 1823 1840 1824 - cxlrd->calc_hb = calc_hb; 1825 1841 mutex_init(&cxlrd->range_lock); 1826 1842 1827 1843 cxld = &cxlsd->cxld;
+31 -30
drivers/cxl/core/region.c
··· 1560 1560 const struct cxl_dport *dport, int pos) 1561 1561 { 1562 1562 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); 1563 + struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; 1564 + struct cxl_decoder *cxld = &cxlsd->cxld; 1565 + int iw = cxld->interleave_ways; 1563 1566 struct cxl_port *iter; 1564 1567 int rc; 1565 1568 1566 - if (cxlrd->calc_hb(cxlrd, pos) != dport) { 1569 + if (dport != cxlrd->cxlsd.target[pos % iw]) { 1567 1570 dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", 1568 1571 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), 1569 1572 dev_name(&cxlrd->cxlsd.cxld.dev)); ··· 2762 2759 return ctx.cxlr; 2763 2760 } 2764 2761 2765 - static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) 2762 + static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) 2766 2763 { 2767 2764 struct cxl_region_params *p = &cxlr->params; 2768 2765 int gran = p->interleave_granularity; 2769 2766 int ways = p->interleave_ways; 2770 2767 u64 offset; 2771 - 2772 - /* Is the hpa within this region at all */ 2773 - if (hpa < p->res->start || hpa > p->res->end) { 2774 - dev_dbg(&cxlr->dev, 2775 - "Addr trans fail: hpa 0x%llx not in region\n", hpa); 2776 - return false; 2777 - } 2778 2768 2779 2769 /* Is the hpa in an expected chunk for its pos(-ition) */ 2780 2770 offset = hpa - p->res->start; ··· 2781 2785 return false; 2782 2786 } 2783 2787 2784 - static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr, 2785 - struct cxl_endpoint_decoder *cxled) 2788 + u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, 2789 + u64 dpa) 2786 2790 { 2791 + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); 2787 2792 u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; 2788 2793 struct cxl_region_params *p = &cxlr->params; 2789 - int pos = cxled->pos; 2794 + struct cxl_endpoint_decoder *cxled = NULL; 2790 2795 u16 eig = 0; 2791 2796 u8 eiw = 0; 2797 + int pos; 2792 2798 2799 + for (int i = 0; i < p->nr_targets; i++) { 2800 + cxled = p->targets[i]; 2801 + if (cxlmd == cxled_to_memdev(cxled)) 2802 + break; 2803 + } 2804 + if (!cxled || cxlmd != cxled_to_memdev(cxled)) 2805 + return ULLONG_MAX; 2806 + 2807 + pos = cxled->pos; 2793 2808 ways_to_eiw(p->interleave_ways, &eiw); 2794 2809 granularity_to_eig(p->interleave_granularity, &eig); 2795 2810 ··· 2834 2827 /* Apply the hpa_offset to the region base address */ 2835 2828 hpa = hpa_offset + p->res->start; 2836 2829 2837 - if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos)) 2830 + /* Root decoder translation overrides typical modulo decode */ 2831 + if (cxlrd->hpa_to_spa) 2832 + hpa = cxlrd->hpa_to_spa(cxlrd, hpa); 2833 + 2834 + if (hpa < p->res->start || hpa > p->res->end) { 2835 + dev_dbg(&cxlr->dev, 2836 + "Addr trans fail: hpa 0x%llx not in region\n", hpa); 2837 + return ULLONG_MAX; 2838 + } 2839 + 2840 + /* Simple chunk check, by pos & gran, only applies to modulo decodes */ 2841 + if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) 2838 2842 return ULLONG_MAX; 2839 2843 2840 2844 return hpa; 2841 - } 2842 - 2843 - u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, 2844 - u64 dpa) 2845 - { 2846 - struct cxl_region_params *p = &cxlr->params; 2847 - struct cxl_endpoint_decoder *cxled = NULL; 2848 - 2849 - for (int i = 0; i < p->nr_targets; i++) { 2850 - cxled = p->targets[i]; 2851 - if (cxlmd == cxled_to_memdev(cxled)) 2852 - break; 2853 - } 2854 - if (!cxled || cxlmd != cxled_to_memdev(cxled)) 2855 - return ULLONG_MAX; 2856 - 2857 - return cxl_dpa_to_hpa(dpa, cxlr, cxled); 2858 2845 } 2859 2846 2860 2847 static struct lock_class_key cxl_pmem_region_key;
+2 -2
drivers/cxl/core/trace.h
··· 704 704 if (cxlr) { 705 705 __assign_str(region); 706 706 memcpy(__entry->uuid, &cxlr->params.uuid, 16); 707 - __entry->hpa = cxl_trace_hpa(cxlr, cxlmd, 708 - __entry->dpa); 707 + __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, 708 + __entry->dpa); 709 709 } else { 710 710 __assign_str(region); 711 711 memset(__entry->uuid, 0, 16);
+4 -7
drivers/cxl/cxl.h
··· 432 432 }; 433 433 434 434 struct cxl_root_decoder; 435 - typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, 436 - int pos); 435 + typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); 437 436 438 437 /** 439 438 * struct cxl_root_decoder - Static platform CXL address decoder 440 439 * @res: host / parent resource for region allocations 441 440 * @region_id: region id for next region provisioning event 442 - * @calc_hb: which host bridge covers the n'th position by granularity 441 + * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address 443 442 * @platform_data: platform specific configuration data 444 443 * @range_lock: sync region autodiscovery by address range 445 444 * @qos_class: QoS performance class cookie ··· 447 448 struct cxl_root_decoder { 448 449 struct resource *res; 449 450 atomic_t region_id; 450 - cxl_calc_hb_fn calc_hb; 451 + cxl_hpa_to_spa_fn hpa_to_spa; 451 452 void *platform_data; 452 453 struct mutex range_lock; 453 454 int qos_class; ··· 773 774 bool is_switch_decoder(struct device *dev); 774 775 bool is_endpoint_decoder(struct device *dev); 775 776 struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, 776 - unsigned int nr_targets, 777 - cxl_calc_hb_fn calc_hb); 778 - struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos); 777 + unsigned int nr_targets); 779 778 struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, 780 779 unsigned int nr_targets); 781 780 int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);