Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cxl/region: Allocate HPA capacity to regions

After a region's interleave parameters (ways and granularity) are set,
add a way for regions to allocate HPA (host physical address space) from
the free capacity in their parent root-decoder. The allocator for this
capacity reuses the 'struct resource' based allocator used for
CONFIG_DEVICE_PRIVATE.

Once the tuple of "ways, granularity, [uuid], and size" is set the
region configuration transitions to the CXL_CONFIG_INTERLEAVE_ACTIVE
state which is a precursor to allowing endpoint decoders to be added to
a region.

Co-developed-by: Ben Widawsky <bwidawsk@kernel.org>
Signed-off-by: Ben Widawsky <bwidawsk@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165784335630.1758207.420216490941955417.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

+183 -1
+29
Documentation/ABI/testing/sysfs-bus-cxl
··· 324 324 (RW) Configures the number of devices participating in the 325 325 region is set by writing this value. Each device will provide 326 326 1/interleave_ways of storage for the region. 327 + 328 + 329 + What: /sys/bus/cxl/devices/regionZ/size 330 + Date: May, 2022 331 + KernelVersion: v5.20 332 + Contact: linux-cxl@vger.kernel.org 333 + Description: 334 + (RW) System physical address space to be consumed by the region. 335 + When written trigger the driver to allocate space out of the 336 + parent root decoder's address space. When read the size of the 337 + address space is reported and should match the span of the 338 + region's resource attribute. Size shall be set after the 339 + interleave configuration parameters. Once set it cannot be 340 + changed, only freed by writing 0. The kernel makes no guarantees 341 + that data is maintained over an address space freeing event, and 342 + there is no guarantee that a free followed by an allocate 343 + results in the same address being allocated. 344 + 345 + 346 + What: /sys/bus/cxl/devices/regionZ/resource 347 + Date: May, 2022 348 + KernelVersion: v5.20 349 + Contact: linux-cxl@vger.kernel.org 350 + Description: 351 + (RO) A region is a contiguous partition of a CXL root decoder 352 + address space. Region capacity is allocated by writing to the 353 + size attribute, the resulting physical address space determined 354 + by the driver is reflected here. It is therefore not useful to 355 + read this before writing a value to the size attribute.
+3
drivers/cxl/Kconfig
··· 106 106 config CXL_REGION 107 107 bool 108 108 default CXL_BUS 109 + # For MAX_PHYSMEM_BITS 110 + depends on SPARSEMEM 109 111 select MEMREGION 112 + select GET_FREE_REGION 110 113 111 114 endif
+149 -1
drivers/cxl/core/region.c
··· 250 250 } 251 251 static DEVICE_ATTR_RW(interleave_granularity); 252 252 253 + static ssize_t resource_show(struct device *dev, struct device_attribute *attr, 254 + char *buf) 255 + { 256 + struct cxl_region *cxlr = to_cxl_region(dev); 257 + struct cxl_region_params *p = &cxlr->params; 258 + u64 resource = -1ULL; 259 + ssize_t rc; 260 + 261 + rc = down_read_interruptible(&cxl_region_rwsem); 262 + if (rc) 263 + return rc; 264 + if (p->res) 265 + resource = p->res->start; 266 + rc = sysfs_emit(buf, "%#llx\n", resource); 267 + up_read(&cxl_region_rwsem); 268 + 269 + return rc; 270 + } 271 + static DEVICE_ATTR_RO(resource); 272 + 273 + static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) 274 + { 275 + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); 276 + struct cxl_region_params *p = &cxlr->params; 277 + struct resource *res; 278 + u32 remainder = 0; 279 + 280 + lockdep_assert_held_write(&cxl_region_rwsem); 281 + 282 + /* Nothing to do... */ 283 + if (p->res && resource_size(res) == size) 284 + return 0; 285 + 286 + /* To change size the old size must be freed first */ 287 + if (p->res) 288 + return -EBUSY; 289 + 290 + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) 291 + return -EBUSY; 292 + 293 + /* ways, granularity and uuid (if PMEM) need to be set before HPA */ 294 + if (!p->interleave_ways || !p->interleave_granularity || 295 + (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) 296 + return -ENXIO; 297 + 298 + div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder); 299 + if (remainder) 300 + return -EINVAL; 301 + 302 + res = alloc_free_mem_region(cxlrd->res, size, SZ_256M, 303 + dev_name(&cxlr->dev)); 304 + if (IS_ERR(res)) { 305 + dev_dbg(&cxlr->dev, "failed to allocate HPA: %ld\n", 306 + PTR_ERR(res)); 307 + return PTR_ERR(res); 308 + } 309 + 310 + p->res = res; 311 + p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; 312 + 313 + return 0; 314 + } 315 + 316 + static void cxl_region_iomem_release(struct cxl_region *cxlr) 317 + { 318 + struct cxl_region_params *p = &cxlr->params; 319 + 320 + if (device_is_registered(&cxlr->dev)) 321 + lockdep_assert_held_write(&cxl_region_rwsem); 322 + if (p->res) { 323 + remove_resource(p->res); 324 + kfree(p->res); 325 + p->res = NULL; 326 + } 327 + } 328 + 329 + static int free_hpa(struct cxl_region *cxlr) 330 + { 331 + struct cxl_region_params *p = &cxlr->params; 332 + 333 + lockdep_assert_held_write(&cxl_region_rwsem); 334 + 335 + if (!p->res) 336 + return 0; 337 + 338 + if (p->state >= CXL_CONFIG_ACTIVE) 339 + return -EBUSY; 340 + 341 + cxl_region_iomem_release(cxlr); 342 + p->state = CXL_CONFIG_IDLE; 343 + return 0; 344 + } 345 + 346 + static ssize_t size_store(struct device *dev, struct device_attribute *attr, 347 + const char *buf, size_t len) 348 + { 349 + struct cxl_region *cxlr = to_cxl_region(dev); 350 + u64 val; 351 + int rc; 352 + 353 + rc = kstrtou64(buf, 0, &val); 354 + if (rc) 355 + return rc; 356 + 357 + rc = down_write_killable(&cxl_region_rwsem); 358 + if (rc) 359 + return rc; 360 + 361 + if (val) 362 + rc = alloc_hpa(cxlr, val); 363 + else 364 + rc = free_hpa(cxlr); 365 + up_write(&cxl_region_rwsem); 366 + 367 + if (rc) 368 + return rc; 369 + 370 + return len; 371 + } 372 + 373 + static ssize_t size_show(struct device *dev, struct device_attribute *attr, 374 + char *buf) 375 + { 376 + struct cxl_region *cxlr = to_cxl_region(dev); 377 + struct cxl_region_params *p = &cxlr->params; 378 + u64 size = 0; 379 + ssize_t rc; 380 + 381 + rc = down_read_interruptible(&cxl_region_rwsem); 382 + if (rc) 383 + return rc; 384 + if (p->res) 385 + size = resource_size(p->res); 386 + rc = sysfs_emit(buf, "%#llx\n", size); 387 + up_read(&cxl_region_rwsem); 388 + 389 + return rc; 390 + } 391 + static DEVICE_ATTR_RW(size); 392 + 253 393 static struct attribute *cxl_region_attrs[] = { 254 394 &dev_attr_uuid.attr, 255 395 &dev_attr_interleave_ways.attr, 256 396 &dev_attr_interleave_granularity.attr, 397 + &dev_attr_resource.attr, 398 + &dev_attr_size.attr, 257 399 NULL, 258 400 }; 259 401 ··· 441 299 442 300 static void unregister_region(void *dev) 443 301 { 444 - device_unregister(dev); 302 + struct cxl_region *cxlr = to_cxl_region(dev); 303 + 304 + device_del(dev); 305 + cxl_region_iomem_release(cxlr); 306 + put_device(dev); 445 307 } 446 308 447 309 static struct lock_class_key cxl_region_key; ··· 597 451 return len; 598 452 } 599 453 DEVICE_ATTR_WO(delete_region); 454 + 455 + MODULE_IMPORT_NS(CXL);
+2
drivers/cxl/cxl.h
··· 341 341 * @uuid: unique id for persistent regions 342 342 * @interleave_ways: number of endpoints in the region 343 343 * @interleave_granularity: capacity each endpoint contributes to a stripe 344 + * @res: allocated iomem capacity for this region 344 345 * 345 346 * State transitions are protected by the cxl_region_rwsem 346 347 */ ··· 350 349 uuid_t uuid; 351 350 int interleave_ways; 352 351 int interleave_granularity; 352 + struct resource *res; 353 353 }; 354 354 355 355 /**