Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cxl: Check for invalid addresses returned from translation functions on errors

Translation functions may return an invalid address in case of errors.
If the address is not checked the further use of the invalid value
will cause an address corruption.

Consistently check for a valid address returned by translation
functions. Use RESOURCE_SIZE_MAX to indicate an invalid address for
type resource_size_t. Depending on the type either RESOURCE_SIZE_MAX
or ULLONG_MAX is used to indicate an address error.

Propagating an invalid address from a failed translation may cause
userspace to think it has received a valid SPA, when in fact it is
wrong. The CXL userspace API, using trace events, expects ULLONG_MAX
to indicate a translation failure. If ULLONG_MAX is not returned
immediately, subsequent calculations can transform that bad address
into a different value (!ULLONG_MAX), and an invalid SPA may be
returned to userspace. This can lead to incorrect diagnostics and
erroneous corrective actions.

[ dj: Added user impact statement from Alison. ]
[ dj: Fixed checkpatch tab alignment issue. ]

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Robert Richter <rrichter@amd.com>
Fixes: c3dd67681c70 ("cxl/region: Add inject and clear poison by region offset")
Fixes: b78b9e7b7979 ("cxl/region: Refactor address translation funcs for testing")
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260107120544.410993-1-rrichter@amd.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>

authored by

Robert Richter and committed by
Dave Jiang
8441c7d3 d4026a44

+43 -19
+1 -1
drivers/cxl/core/hdm.c
··· 530 530 531 531 resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) 532 532 { 533 - resource_size_t base = -1; 533 + resource_size_t base = RESOURCE_SIZE_MAX; 534 534 535 535 lockdep_assert_held(&cxl_rwsem.dpa); 536 536 if (cxled->dpa_res)
+26 -8
drivers/cxl/core/region.c
··· 3118 3118 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); 3119 3119 struct cxl_region_params *p = &cxlr->params; 3120 3120 struct cxl_endpoint_decoder *cxled = NULL; 3121 - u64 dpa_offset, hpa_offset, hpa; 3121 + u64 base, dpa_offset, hpa_offset, hpa; 3122 3122 u16 eig = 0; 3123 3123 u8 eiw = 0; 3124 3124 int pos; ··· 3136 3136 ways_to_eiw(p->interleave_ways, &eiw); 3137 3137 granularity_to_eig(p->interleave_granularity, &eig); 3138 3138 3139 - dpa_offset = dpa - cxl_dpa_resource_start(cxled); 3139 + base = cxl_dpa_resource_start(cxled); 3140 + if (base == RESOURCE_SIZE_MAX) 3141 + return ULLONG_MAX; 3142 + 3143 + dpa_offset = dpa - base; 3140 3144 hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig); 3145 + if (hpa_offset == ULLONG_MAX) 3146 + return ULLONG_MAX; 3141 3147 3142 3148 /* Apply the hpa_offset to the region base address */ 3143 3149 hpa = hpa_offset + p->res->start + p->cache_size; ··· 3151 3145 /* Root decoder translation overrides typical modulo decode */ 3152 3146 if (cxlrd->ops.hpa_to_spa) 3153 3147 hpa = cxlrd->ops.hpa_to_spa(cxlrd, hpa); 3148 + 3149 + if (hpa == ULLONG_MAX) 3150 + return ULLONG_MAX; 3154 3151 3155 3152 if (!cxl_resource_contains_addr(p->res, hpa)) { 3156 3153 dev_dbg(&cxlr->dev, ··· 3179 3170 struct cxl_region_params *p = &cxlr->params; 3180 3171 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); 3181 3172 struct cxl_endpoint_decoder *cxled; 3182 - u64 hpa, hpa_offset, dpa_offset; 3173 + u64 hpa_offset = offset; 3174 + u64 dpa, dpa_offset; 3183 3175 u16 eig = 0; 3184 3176 u8 eiw = 0; 3185 3177 int pos; ··· 3197 3187 * CXL HPA is assumed to equal SPA. 3198 3188 */ 3199 3189 if (cxlrd->ops.spa_to_hpa) { 3200 - hpa = cxlrd->ops.spa_to_hpa(cxlrd, p->res->start + offset); 3201 - hpa_offset = hpa - p->res->start; 3202 - } else { 3203 - hpa_offset = offset; 3190 + hpa_offset = cxlrd->ops.spa_to_hpa(cxlrd, p->res->start + offset); 3191 + if (hpa_offset == ULLONG_MAX) { 3192 + dev_dbg(&cxlr->dev, "HPA not found for %pr offset %#llx\n", 3193 + p->res, offset); 3194 + return -ENXIO; 3195 + } 3196 + hpa_offset -= p->res->start; 3204 3197 } 3205 3198 3206 3199 pos = cxl_calculate_position(hpa_offset, eiw, eig); ··· 3220 3207 cxled = p->targets[i]; 3221 3208 if (cxled->pos != pos) 3222 3209 continue; 3210 + 3211 + dpa = cxl_dpa_resource_start(cxled); 3212 + if (dpa != RESOURCE_SIZE_MAX) 3213 + dpa += dpa_offset; 3214 + 3223 3215 result->cxlmd = cxled_to_memdev(cxled); 3224 - result->dpa = cxl_dpa_resource_start(cxled) + dpa_offset; 3216 + result->dpa = dpa; 3225 3217 3226 3218 return 0; 3227 3219 }
+16 -10
tools/testing/cxl/test/cxl_translate.c
··· 68 68 69 69 /* Calculate base HPA offset from DPA and position */ 70 70 hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, r_eiw, r_eig); 71 + if (hpa_offset == ULLONG_MAX) 72 + return ULLONG_MAX; 71 73 72 74 if (math == XOR_MATH) { 73 75 cximsd->nr_maps = hbiw_to_nr_maps[hb_ways]; ··· 260 258 pos = get_random_u32() % ways; 261 259 dpa = get_random_u64() >> 12; 262 260 261 + reverse_dpa = ULLONG_MAX; 262 + reverse_pos = -1; 263 + 263 264 hpa = cxl_calculate_hpa_offset(dpa, pos, eiw, eig); 264 - reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig); 265 - reverse_pos = cxl_calculate_position(hpa, eiw, eig); 265 + if (hpa != ULLONG_MAX) { 266 + reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig); 267 + reverse_pos = cxl_calculate_position(hpa, eiw, eig); 268 + if (reverse_dpa == dpa && reverse_pos == pos) 269 + continue; 270 + } 266 271 267 - if (reverse_dpa != dpa || reverse_pos != pos) { 268 - pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n", 269 - i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw, 270 - eig); 272 + pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n", 273 + i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw, eig); 271 274 272 - if (failures++ > 10) { 273 - pr_err("test random too many failures, stop\n"); 274 - break; 275 - } 275 + if (failures++ > 10) { 276 + pr_err("test random too many failures, stop\n"); 277 + break; 276 278 } 277 279 } 278 280 pr_info("..... test random: PASS %d FAIL %d\n", i - failures, failures);