Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cxl/region: Attach endpoint decoders

CXL regions (interleave sets) are made up of a set of memory devices
where each device maps a portion of the interleave with one of its
decoders (see CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure).
As endpoint decoders are identified by a provisioning tool they can be
added to a region provided the region interleave properties are set
(way, granularity, HPA) and DPA has been assigned to the decoder.

The attach event triggers several validation checks, for example:
- is the DPA sized appropriately for the region
- is the decoder reachable via the host-bridges identified by the
region's root decoder
- is the device already active in a different region position slot
- are there already regions with a higher HPA active on a given port
(per CXL 2.0 8.2.5.12.20 Committing Decoder Programming)

...and the attach event affords an opportunity to collect data and
resources relevant to later programming the target lists in switch
decoders, for example:
- allocate a decoder at each cxl_port in the decode chain
- for a given switch port, how many the region's endpoints are hosted
through the port
- how many unique targets (next hops) does a port need to map to reach
those endpoints

The act of reconciling this information and deploying it to the decoder
configuration is saved for a follow-on patch.

Co-developed-by: Ben Widawsky <bwidawsk@kernel.org>
Signed-off-by: Ben Widawsky <bwidawsk@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165784337277.1758207.4108508181328528703.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

+394 -12
+7
drivers/cxl/core/core.h
··· 41 41 resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled); 42 42 extern struct rw_semaphore cxl_dpa_rwsem; 43 43 44 + bool is_switch_decoder(struct device *dev); 45 + static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, 46 + struct cxl_memdev *cxlmd) 47 + { 48 + return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev); 49 + } 50 + 44 51 int cxl_memdev_init(void); 45 52 void cxl_memdev_exit(void); 46 53 void cxl_mbox_init(void);
+3 -7
drivers/cxl/core/port.c
··· 448 448 } 449 449 EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL); 450 450 451 - static bool is_switch_decoder(struct device *dev) 451 + bool is_switch_decoder(struct device *dev) 452 452 { 453 453 return is_root_decoder(dev) || dev->type == &cxl_decoder_switch_type; 454 454 } ··· 504 504 cxl_ep_remove(port, ep); 505 505 xa_destroy(&port->endpoints); 506 506 xa_destroy(&port->dports); 507 + xa_destroy(&port->regions); 507 508 ida_free(&cxl_port_ida, port->id); 508 509 kfree(port); 509 510 } ··· 636 635 port->hdm_end = -1; 637 636 xa_init(&port->dports); 638 637 xa_init(&port->endpoints); 638 + xa_init(&port->regions); 639 639 640 640 device_initialize(dev); 641 641 lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth); ··· 1109 1107 devm_release_action(&port->dev, cxl_dport_remove, dport); 1110 1108 devm_kfree(&port->dev, dport); 1111 1109 } 1112 - } 1113 - 1114 - static struct cxl_ep *cxl_ep_load(struct cxl_port *port, 1115 - struct cxl_memdev *cxlmd) 1116 - { 1117 - return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev); 1118 1110 } 1119 1111 1120 1112 int devm_cxl_add_endpoint(struct cxl_memdev *cxlmd,
+359 -5
drivers/cxl/core/region.c
··· 439 439 return rc; 440 440 } 441 441 442 - /* 443 - * - Check that the given endpoint is attached to a host-bridge identified 444 - * in the root interleave. 442 + static int match_free_decoder(struct device *dev, void *data) 443 + { 444 + struct cxl_decoder *cxld; 445 + int *id = data; 446 + 447 + if (!is_switch_decoder(dev)) 448 + return 0; 449 + 450 + cxld = to_cxl_decoder(dev); 451 + 452 + /* enforce ordered allocation */ 453 + if (cxld->id != *id) 454 + return 0; 455 + 456 + if (!cxld->region) 457 + return 1; 458 + 459 + (*id)++; 460 + 461 + return 0; 462 + } 463 + 464 + static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port, 465 + struct cxl_region *cxlr) 466 + { 467 + struct device *dev; 468 + int id = 0; 469 + 470 + dev = device_find_child(&port->dev, &id, match_free_decoder); 471 + if (!dev) 472 + return NULL; 473 + /* 474 + * This decoder is pinned registered as long as the endpoint decoder is 475 + * registered, and endpoint decoder unregistration holds the 476 + * cxl_region_rwsem over unregister events, so no need to hold on to 477 + * this extra reference. 478 + */ 479 + put_device(dev); 480 + return to_cxl_decoder(dev); 481 + } 482 + 483 + static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, 484 + struct cxl_region *cxlr) 485 + { 486 + struct cxl_region_ref *cxl_rr; 487 + int rc; 488 + 489 + cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL); 490 + if (!cxl_rr) 491 + return NULL; 492 + cxl_rr->port = port; 493 + cxl_rr->region = cxlr; 494 + xa_init(&cxl_rr->endpoints); 495 + 496 + rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL); 497 + if (rc) { 498 + dev_dbg(&cxlr->dev, 499 + "%s: failed to track region reference: %d\n", 500 + dev_name(&port->dev), rc); 501 + kfree(cxl_rr); 502 + return NULL; 503 + } 504 + 505 + return cxl_rr; 506 + } 507 + 508 + static void free_region_ref(struct cxl_region_ref *cxl_rr) 509 + { 510 + struct cxl_port *port = cxl_rr->port; 511 + struct cxl_region *cxlr = cxl_rr->region; 512 + struct cxl_decoder *cxld = cxl_rr->decoder; 513 + 514 + dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n"); 515 + if (cxld->region == cxlr) { 516 + cxld->region = NULL; 517 + put_device(&cxlr->dev); 518 + } 519 + 520 + xa_erase(&port->regions, (unsigned long)cxlr); 521 + xa_destroy(&cxl_rr->endpoints); 522 + kfree(cxl_rr); 523 + } 524 + 525 + static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr, 526 + struct cxl_endpoint_decoder *cxled) 527 + { 528 + int rc; 529 + struct cxl_port *port = cxl_rr->port; 530 + struct cxl_region *cxlr = cxl_rr->region; 531 + struct cxl_decoder *cxld = cxl_rr->decoder; 532 + struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled)); 533 + 534 + rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep, 535 + GFP_KERNEL); 536 + if (rc) 537 + return rc; 538 + cxl_rr->nr_eps++; 539 + 540 + if (!cxld->region) { 541 + cxld->region = cxlr; 542 + get_device(&cxlr->dev); 543 + } 544 + 545 + return 0; 546 + } 547 + 548 + /** 549 + * cxl_port_attach_region() - track a region's interest in a port by endpoint 550 + * @port: port to add a new region reference 'struct cxl_region_ref' 551 + * @cxlr: region to attach to @port 552 + * @cxled: endpoint decoder used to create or further pin a region reference 553 + * @pos: interleave position of @cxled in @cxlr 554 + * 555 + * The attach event is an opportunity to validate CXL decode setup 556 + * constraints and record metadata needed for programming HDM decoders, 557 + * in particular decoder target lists. 558 + * 559 + * The steps are: 560 + * - validate that there are no other regions with a higher HPA already 561 + * associated with @port 562 + * - establish a region reference if one is not already present 563 + * - additionally allocate a decoder instance that will host @cxlr on 564 + * @port 565 + * - pin the region reference by the endpoint 566 + * - account for how many entries in @port's target list are needed to 567 + * cover all of the added endpoints. 445 568 */ 569 + static int cxl_port_attach_region(struct cxl_port *port, 570 + struct cxl_region *cxlr, 571 + struct cxl_endpoint_decoder *cxled, int pos) 572 + { 573 + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); 574 + struct cxl_ep *ep = cxl_ep_load(port, cxlmd); 575 + struct cxl_region_ref *cxl_rr = NULL, *iter; 576 + struct cxl_region_params *p = &cxlr->params; 577 + struct cxl_decoder *cxld = NULL; 578 + unsigned long index; 579 + int rc = -EBUSY; 580 + 581 + lockdep_assert_held_write(&cxl_region_rwsem); 582 + 583 + xa_for_each(&port->regions, index, iter) { 584 + struct cxl_region_params *ip = &iter->region->params; 585 + 586 + if (iter->region == cxlr) 587 + cxl_rr = iter; 588 + if (ip->res->start > p->res->start) { 589 + dev_dbg(&cxlr->dev, 590 + "%s: HPA order violation %s:%pr vs %pr\n", 591 + dev_name(&port->dev), 592 + dev_name(&iter->region->dev), ip->res, p->res); 593 + return -EBUSY; 594 + } 595 + } 596 + 597 + if (cxl_rr) { 598 + struct cxl_ep *ep_iter; 599 + int found = 0; 600 + 601 + cxld = cxl_rr->decoder; 602 + xa_for_each(&cxl_rr->endpoints, index, ep_iter) { 603 + if (ep_iter == ep) 604 + continue; 605 + if (ep_iter->next == ep->next) { 606 + found++; 607 + break; 608 + } 609 + } 610 + 611 + /* 612 + * If this is a new target or if this port is direct connected 613 + * to this endpoint then add to the target count. 614 + */ 615 + if (!found || !ep->next) 616 + cxl_rr->nr_targets++; 617 + } else { 618 + cxl_rr = alloc_region_ref(port, cxlr); 619 + if (!cxl_rr) { 620 + dev_dbg(&cxlr->dev, 621 + "%s: failed to allocate region reference\n", 622 + dev_name(&port->dev)); 623 + return -ENOMEM; 624 + } 625 + } 626 + 627 + if (!cxld) { 628 + if (port == cxled_to_port(cxled)) 629 + cxld = &cxled->cxld; 630 + else 631 + cxld = cxl_region_find_decoder(port, cxlr); 632 + if (!cxld) { 633 + dev_dbg(&cxlr->dev, "%s: no decoder available\n", 634 + dev_name(&port->dev)); 635 + goto out_erase; 636 + } 637 + 638 + if (cxld->region) { 639 + dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", 640 + dev_name(&port->dev), dev_name(&cxld->dev), 641 + dev_name(&cxld->region->dev)); 642 + rc = -EBUSY; 643 + goto out_erase; 644 + } 645 + 646 + cxl_rr->decoder = cxld; 647 + } 648 + 649 + rc = cxl_rr_ep_add(cxl_rr, cxled); 650 + if (rc) { 651 + dev_dbg(&cxlr->dev, 652 + "%s: failed to track endpoint %s:%s reference\n", 653 + dev_name(&port->dev), dev_name(&cxlmd->dev), 654 + dev_name(&cxld->dev)); 655 + goto out_erase; 656 + } 657 + 658 + return 0; 659 + out_erase: 660 + if (cxl_rr->nr_eps == 0) 661 + free_region_ref(cxl_rr); 662 + return rc; 663 + } 664 + 665 + static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port, 666 + struct cxl_region *cxlr) 667 + { 668 + return xa_load(&port->regions, (unsigned long)cxlr); 669 + } 670 + 671 + static void cxl_port_detach_region(struct cxl_port *port, 672 + struct cxl_region *cxlr, 673 + struct cxl_endpoint_decoder *cxled) 674 + { 675 + struct cxl_region_ref *cxl_rr; 676 + struct cxl_ep *ep; 677 + 678 + lockdep_assert_held_write(&cxl_region_rwsem); 679 + 680 + cxl_rr = cxl_rr_load(port, cxlr); 681 + if (!cxl_rr) 682 + return; 683 + 684 + ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled); 685 + if (ep) { 686 + struct cxl_ep *ep_iter; 687 + unsigned long index; 688 + int found = 0; 689 + 690 + cxl_rr->nr_eps--; 691 + xa_for_each(&cxl_rr->endpoints, index, ep_iter) { 692 + if (ep_iter->next == ep->next) { 693 + found++; 694 + break; 695 + } 696 + } 697 + if (!found) 698 + cxl_rr->nr_targets--; 699 + } 700 + 701 + if (cxl_rr->nr_eps == 0) 702 + free_region_ref(cxl_rr); 703 + } 704 + 446 705 static int cxl_region_attach(struct cxl_region *cxlr, 447 706 struct cxl_endpoint_decoder *cxled, int pos) 448 707 { 708 + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); 709 + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); 710 + struct cxl_port *ep_port, *root_port, *iter; 449 711 struct cxl_region_params *p = &cxlr->params; 712 + struct cxl_dport *dport; 713 + int i, rc = -ENXIO; 450 714 451 715 if (cxled->mode == CXL_DECODER_DEAD) { 452 716 dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); 453 717 return -ENODEV; 454 718 } 455 719 456 - if (pos >= p->interleave_ways) { 720 + /* all full of members, or interleave config not established? */ 721 + if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { 722 + dev_dbg(&cxlr->dev, "region already active\n"); 723 + return -EBUSY; 724 + } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) { 725 + dev_dbg(&cxlr->dev, "interleave config missing\n"); 726 + return -ENXIO; 727 + } 728 + 729 + if (pos < 0 || pos >= p->interleave_ways) { 457 730 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, 458 731 p->interleave_ways); 459 732 return -ENXIO; ··· 745 472 return -EBUSY; 746 473 } 747 474 475 + for (i = 0; i < p->interleave_ways; i++) { 476 + struct cxl_endpoint_decoder *cxled_target; 477 + struct cxl_memdev *cxlmd_target; 478 + 479 + cxled_target = p->targets[pos]; 480 + if (!cxled_target) 481 + continue; 482 + 483 + cxlmd_target = cxled_to_memdev(cxled_target); 484 + if (cxlmd_target == cxlmd) { 485 + dev_dbg(&cxlr->dev, 486 + "%s already specified at position %d via: %s\n", 487 + dev_name(&cxlmd->dev), pos, 488 + dev_name(&cxled_target->cxld.dev)); 489 + return -EBUSY; 490 + } 491 + } 492 + 493 + ep_port = cxled_to_port(cxled); 494 + root_port = cxlrd_to_port(cxlrd); 495 + dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge); 496 + if (!dport) { 497 + dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n", 498 + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), 499 + dev_name(cxlr->dev.parent)); 500 + return -ENXIO; 501 + } 502 + 503 + if (cxlrd->calc_hb(cxlrd, pos) != dport) { 504 + dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", 505 + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), 506 + dev_name(&cxlrd->cxlsd.cxld.dev)); 507 + return -ENXIO; 508 + } 509 + 510 + if (cxled->cxld.target_type != cxlr->type) { 511 + dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n", 512 + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), 513 + cxled->cxld.target_type, cxlr->type); 514 + return -ENXIO; 515 + } 516 + 517 + if (!cxled->dpa_res) { 518 + dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n", 519 + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev)); 520 + return -ENXIO; 521 + } 522 + 523 + if (resource_size(cxled->dpa_res) * p->interleave_ways != 524 + resource_size(p->res)) { 525 + dev_dbg(&cxlr->dev, 526 + "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n", 527 + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), 528 + (u64)resource_size(cxled->dpa_res), p->interleave_ways, 529 + (u64)resource_size(p->res)); 530 + return -EINVAL; 531 + } 532 + 533 + for (iter = ep_port; !is_cxl_root(iter); 534 + iter = to_cxl_port(iter->dev.parent)) { 535 + rc = cxl_port_attach_region(iter, cxlr, cxled, pos); 536 + if (rc) 537 + goto err; 538 + } 539 + 748 540 p->targets[pos] = cxled; 749 541 cxled->pos = pos; 750 542 p->nr_targets++; 751 543 544 + if (p->nr_targets == p->interleave_ways) 545 + p->state = CXL_CONFIG_ACTIVE; 546 + 752 547 return 0; 548 + 549 + err: 550 + for (iter = ep_port; !is_cxl_root(iter); 551 + iter = to_cxl_port(iter->dev.parent)) 552 + cxl_port_detach_region(iter, cxlr, cxled); 553 + return rc; 753 554 } 754 555 755 556 static void cxl_region_detach(struct cxl_endpoint_decoder *cxled) 756 557 { 558 + struct cxl_port *iter, *ep_port = cxled_to_port(cxled); 757 559 struct cxl_region *cxlr = cxled->cxld.region; 758 560 struct cxl_region_params *p; 759 561 ··· 840 492 p = &cxlr->params; 841 493 get_device(&cxlr->dev); 842 494 495 + for (iter = ep_port; !is_cxl_root(iter); 496 + iter = to_cxl_port(iter->dev.parent)) 497 + cxl_port_detach_region(iter, cxlr, cxled); 498 + 843 499 if (cxled->pos < 0 || cxled->pos >= p->interleave_ways || 844 500 p->targets[cxled->pos] != cxled) { 845 501 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); ··· 854 502 goto out; 855 503 } 856 504 505 + if (p->state == CXL_CONFIG_ACTIVE) 506 + p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; 857 507 p->targets[cxled->pos] = NULL; 858 508 p->nr_targets--; 859 509 860 - /* notify the region driver that one of its targets has deparated */ 510 + /* notify the region driver that one of its targets has departed */ 861 511 up_write(&cxl_region_rwsem); 862 512 device_release_driver(&cxlr->dev); 863 513 down_write(&cxl_region_rwsem);
+20
drivers/cxl/cxl.h
··· 421 421 * @id: id for port device-name 422 422 * @dports: cxl_dport instances referenced by decoders 423 423 * @endpoints: cxl_ep instances, endpoints that are a descendant of this port 424 + * @regions: cxl_region_ref instances, regions mapped by this port 424 425 * @parent_dport: dport that points to this port in the parent 425 426 * @decoder_ida: allocator for decoder ids 426 427 * @hdm_end: track last allocated HDM decoder instance for allocation ordering ··· 438 437 int id; 439 438 struct xarray dports; 440 439 struct xarray endpoints; 440 + struct xarray regions; 441 441 struct cxl_dport *parent_dport; 442 442 struct ida decoder_ida; 443 443 int hdm_end; ··· 483 481 struct device *ep; 484 482 struct cxl_dport *dport; 485 483 struct cxl_port *next; 484 + }; 485 + 486 + /** 487 + * struct cxl_region_ref - track a region's interest in a port 488 + * @port: point in topology to install this reference 489 + * @decoder: decoder assigned for @region in @port 490 + * @region: region for this reference 491 + * @endpoints: cxl_ep references for region members beneath @port 492 + * @nr_eps: number of endpoints beneath @port 493 + * @nr_targets: number of distinct targets needed to reach @nr_eps 494 + */ 495 + struct cxl_region_ref { 496 + struct cxl_port *port; 497 + struct cxl_decoder *decoder; 498 + struct cxl_region *region; 499 + struct xarray endpoints; 500 + int nr_eps; 501 + int nr_targets; 486 502 }; 487 503 488 504 /*
+5
drivers/cxl/cxlmem.h
··· 55 55 return to_cxl_port(cxled->cxld.dev.parent); 56 56 } 57 57 58 + static inline struct cxl_port *cxlrd_to_port(struct cxl_root_decoder *cxlrd) 59 + { 60 + return to_cxl_port(cxlrd->cxlsd.cxld.dev.parent); 61 + } 62 + 58 63 static inline struct cxl_memdev * 59 64 cxled_to_memdev(struct cxl_endpoint_decoder *cxled) 60 65 {