Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cxl/mem: Register CXL memX devices

Create the /sys/bus/cxl hierarchy to enumerate:

* Memory Devices (per-endpoint control devices)

* Memory Address Space Devices (platform address ranges with
interleaving, performance, and persistence attributes)

* Memory Regions (active provisioned memory from an address space device
that is in use as System RAM or delegated to libnvdimm as Persistent
Memory regions).

For now, only the per-endpoint control devices are registered on the
'cxl' bus. However, going forward it will provide a mechanism to
coordinate cross-device interleave.

Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> (v2)
Link: https://lore.kernel.org/r/20210217040958.1354670-4-ben.widawsky@intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

+349 -2
+26
Documentation/ABI/testing/sysfs-bus-cxl
··· 1 + What: /sys/bus/cxl/devices/memX/firmware_version 2 + Date: December, 2020 3 + KernelVersion: v5.12 4 + Contact: linux-cxl@vger.kernel.org 5 + Description: 6 + (RO) "FW Revision" string as reported by the Identify 7 + Memory Device Output Payload in the CXL-2.0 8 + specification. 9 + 10 + What: /sys/bus/cxl/devices/memX/ram/size 11 + Date: December, 2020 12 + KernelVersion: v5.12 13 + Contact: linux-cxl@vger.kernel.org 14 + Description: 15 + (RO) "Volatile Only Capacity" as bytes. Represents the 16 + identically named field in the Identify Memory Device Output 17 + Payload in the CXL-2.0 specification. 18 + 19 + What: /sys/bus/cxl/devices/memX/pmem/size 20 + Date: December, 2020 21 + KernelVersion: v5.12 22 + Contact: linux-cxl@vger.kernel.org 23 + Description: 24 + (RO) "Persistent Only Capacity" as bytes. Represents the 25 + identically named field in the Identify Memory Device Output 26 + Payload in the CXL-2.0 specification.
+5
Documentation/driver-api/cxl/memory-devices.rst
··· 27 27 28 28 .. kernel-doc:: drivers/cxl/mem.c 29 29 :internal: 30 + 31 + CXL Bus 32 + ------- 33 + .. kernel-doc:: drivers/cxl/bus.c 34 + :doc: cxl bus
+3
drivers/cxl/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 + obj-$(CONFIG_CXL_BUS) += cxl_bus.o 2 3 obj-$(CONFIG_CXL_MEM) += cxl_mem.o 3 4 5 + ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=CXL 6 + cxl_bus-y := bus.o 4 7 cxl_mem-y := mem.o
+29
drivers/cxl/bus.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ 3 + #include <linux/device.h> 4 + #include <linux/module.h> 5 + 6 + /** 7 + * DOC: cxl bus 8 + * 9 + * The CXL bus provides namespace for control devices and a rendezvous 10 + * point for cross-device interleave coordination. 11 + */ 12 + struct bus_type cxl_bus_type = { 13 + .name = "cxl", 14 + }; 15 + EXPORT_SYMBOL_GPL(cxl_bus_type); 16 + 17 + static __init int cxl_bus_init(void) 18 + { 19 + return bus_register(&cxl_bus_type); 20 + } 21 + 22 + static void cxl_bus_exit(void) 23 + { 24 + bus_unregister(&cxl_bus_type); 25 + } 26 + 27 + module_init(cxl_bus_init); 28 + module_exit(cxl_bus_exit); 29 + MODULE_LICENSE("GPL v2");
+3
drivers/cxl/cxl.h
··· 57 57 (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) != \ 58 58 CXLMDEV_RESET_NEEDED_NOT) 59 59 60 + struct cxl_memdev; 60 61 /** 61 62 * struct cxl_mem - A CXL memory device 62 63 * @pdev: The PCI device associated with this CXL device. ··· 75 74 struct cxl_mem { 76 75 struct pci_dev *pdev; 77 76 void __iomem *regs; 77 + struct cxl_memdev *cxlmd; 78 78 79 79 void __iomem *status_regs; 80 80 void __iomem *mbox_regs; ··· 89 87 struct range ram_range; 90 88 }; 91 89 90 + extern struct bus_type cxl_bus_type; 92 91 #endif /* __CXL_H__ */
+283 -2
drivers/cxl/mem.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ 3 3 #include <linux/module.h> 4 + #include <linux/mutex.h> 5 + #include <linux/cdev.h> 6 + #include <linux/idr.h> 4 7 #include <linux/pci.h> 5 8 #include <linux/io.h> 6 9 #include <linux/io-64-nonatomic-lo-hi.h> ··· 25 22 * - Support management of interleave sets. 26 23 * - Handle and manage error conditions. 27 24 */ 25 + 26 + /* 27 + * An entire PCI topology full of devices should be enough for any 28 + * config 29 + */ 30 + #define CXL_MEM_MAX_DEVS 65536 28 31 29 32 #define cxl_doorbell_busy(cxlm) \ 30 33 (readl((cxlm)->mbox_regs + CXLDEV_MBOX_CTRL_OFFSET) & \ ··· 73 64 u16 return_code; 74 65 #define CXL_MBOX_SUCCESS 0 75 66 }; 67 + 68 + /** 69 + * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device 70 + * @dev: driver core device object 71 + * @cdev: char dev core object for ioctl operations 72 + * @cxlm: pointer to the parent device driver data 73 + * @ops_active: active user of @cxlm in ops handlers 74 + * @ops_dead: completion when all @cxlm ops users have exited 75 + * @id: id number of this memdev instance. 76 + */ 77 + struct cxl_memdev { 78 + struct device dev; 79 + struct cdev cdev; 80 + struct cxl_mem *cxlm; 81 + struct percpu_ref ops_active; 82 + struct completion ops_dead; 83 + int id; 84 + }; 85 + 86 + static int cxl_mem_major; 87 + static DEFINE_IDA(cxl_memdev_ida); 76 88 77 89 static int cxl_mem_wait_for_doorbell(struct cxl_mem *cxlm) 78 90 { ··· 324 294 mutex_unlock(&cxlm->mbox_mutex); 325 295 } 326 296 297 + static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, 298 + unsigned long arg) 299 + { 300 + struct cxl_memdev *cxlmd; 301 + struct inode *inode; 302 + int rc = -ENOTTY; 303 + 304 + inode = file_inode(file); 305 + cxlmd = container_of(inode->i_cdev, typeof(*cxlmd), cdev); 306 + 307 + if (!percpu_ref_tryget_live(&cxlmd->ops_active)) 308 + return -ENXIO; 309 + 310 + /* TODO: ioctl body */ 311 + 312 + percpu_ref_put(&cxlmd->ops_active); 313 + 314 + return rc; 315 + } 316 + 317 + static const struct file_operations cxl_memdev_fops = { 318 + .owner = THIS_MODULE, 319 + .unlocked_ioctl = cxl_memdev_ioctl, 320 + .compat_ioctl = compat_ptr_ioctl, 321 + .llseek = noop_llseek, 322 + }; 323 + 327 324 /** 328 325 * cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device. 329 326 * @cxlm: The CXL memory device to communicate with. ··· 569 512 return 0; 570 513 } 571 514 515 + static struct cxl_memdev *to_cxl_memdev(struct device *dev) 516 + { 517 + return container_of(dev, struct cxl_memdev, dev); 518 + } 519 + 520 + static void cxl_memdev_release(struct device *dev) 521 + { 522 + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 523 + 524 + percpu_ref_exit(&cxlmd->ops_active); 525 + ida_free(&cxl_memdev_ida, cxlmd->id); 526 + kfree(cxlmd); 527 + } 528 + 529 + static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid, 530 + kgid_t *gid) 531 + { 532 + return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev)); 533 + } 534 + 535 + static ssize_t firmware_version_show(struct device *dev, 536 + struct device_attribute *attr, char *buf) 537 + { 538 + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 539 + struct cxl_mem *cxlm = cxlmd->cxlm; 540 + 541 + return sprintf(buf, "%.16s\n", cxlm->firmware_version); 542 + } 543 + static DEVICE_ATTR_RO(firmware_version); 544 + 545 + static ssize_t payload_max_show(struct device *dev, 546 + struct device_attribute *attr, char *buf) 547 + { 548 + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 549 + struct cxl_mem *cxlm = cxlmd->cxlm; 550 + 551 + return sprintf(buf, "%zu\n", cxlm->payload_size); 552 + } 553 + static DEVICE_ATTR_RO(payload_max); 554 + 555 + static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, 556 + char *buf) 557 + { 558 + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 559 + struct cxl_mem *cxlm = cxlmd->cxlm; 560 + unsigned long long len = range_len(&cxlm->ram_range); 561 + 562 + return sprintf(buf, "%#llx\n", len); 563 + } 564 + 565 + static struct device_attribute dev_attr_ram_size = 566 + __ATTR(size, 0444, ram_size_show, NULL); 567 + 568 + static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, 569 + char *buf) 570 + { 571 + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 572 + struct cxl_mem *cxlm = cxlmd->cxlm; 573 + unsigned long long len = range_len(&cxlm->pmem_range); 574 + 575 + return sprintf(buf, "%#llx\n", len); 576 + } 577 + 578 + static struct device_attribute dev_attr_pmem_size = 579 + __ATTR(size, 0444, pmem_size_show, NULL); 580 + 581 + static struct attribute *cxl_memdev_attributes[] = { 582 + &dev_attr_firmware_version.attr, 583 + &dev_attr_payload_max.attr, 584 + NULL, 585 + }; 586 + 587 + static struct attribute *cxl_memdev_pmem_attributes[] = { 588 + &dev_attr_pmem_size.attr, 589 + NULL, 590 + }; 591 + 592 + static struct attribute *cxl_memdev_ram_attributes[] = { 593 + &dev_attr_ram_size.attr, 594 + NULL, 595 + }; 596 + 597 + static struct attribute_group cxl_memdev_attribute_group = { 598 + .attrs = cxl_memdev_attributes, 599 + }; 600 + 601 + static struct attribute_group cxl_memdev_ram_attribute_group = { 602 + .name = "ram", 603 + .attrs = cxl_memdev_ram_attributes, 604 + }; 605 + 606 + static struct attribute_group cxl_memdev_pmem_attribute_group = { 607 + .name = "pmem", 608 + .attrs = cxl_memdev_pmem_attributes, 609 + }; 610 + 611 + static const struct attribute_group *cxl_memdev_attribute_groups[] = { 612 + &cxl_memdev_attribute_group, 613 + &cxl_memdev_ram_attribute_group, 614 + &cxl_memdev_pmem_attribute_group, 615 + NULL, 616 + }; 617 + 618 + static const struct device_type cxl_memdev_type = { 619 + .name = "cxl_memdev", 620 + .release = cxl_memdev_release, 621 + .devnode = cxl_memdev_devnode, 622 + .groups = cxl_memdev_attribute_groups, 623 + }; 624 + 625 + static void cxlmdev_unregister(void *_cxlmd) 626 + { 627 + struct cxl_memdev *cxlmd = _cxlmd; 628 + struct device *dev = &cxlmd->dev; 629 + 630 + percpu_ref_kill(&cxlmd->ops_active); 631 + cdev_device_del(&cxlmd->cdev, dev); 632 + wait_for_completion(&cxlmd->ops_dead); 633 + cxlmd->cxlm = NULL; 634 + put_device(dev); 635 + } 636 + 637 + static void cxlmdev_ops_active_release(struct percpu_ref *ref) 638 + { 639 + struct cxl_memdev *cxlmd = 640 + container_of(ref, typeof(*cxlmd), ops_active); 641 + 642 + complete(&cxlmd->ops_dead); 643 + } 644 + 645 + static int cxl_mem_add_memdev(struct cxl_mem *cxlm) 646 + { 647 + struct pci_dev *pdev = cxlm->pdev; 648 + struct cxl_memdev *cxlmd; 649 + struct device *dev; 650 + struct cdev *cdev; 651 + int rc; 652 + 653 + cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); 654 + if (!cxlmd) 655 + return -ENOMEM; 656 + init_completion(&cxlmd->ops_dead); 657 + 658 + /* 659 + * @cxlm is deallocated when the driver unbinds so operations 660 + * that are using it need to hold a live reference. 661 + */ 662 + cxlmd->cxlm = cxlm; 663 + rc = percpu_ref_init(&cxlmd->ops_active, cxlmdev_ops_active_release, 0, 664 + GFP_KERNEL); 665 + if (rc) 666 + goto err_ref; 667 + 668 + rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL); 669 + if (rc < 0) 670 + goto err_id; 671 + cxlmd->id = rc; 672 + 673 + dev = &cxlmd->dev; 674 + device_initialize(dev); 675 + dev->parent = &pdev->dev; 676 + dev->bus = &cxl_bus_type; 677 + dev->devt = MKDEV(cxl_mem_major, cxlmd->id); 678 + dev->type = &cxl_memdev_type; 679 + dev_set_name(dev, "mem%d", cxlmd->id); 680 + 681 + cdev = &cxlmd->cdev; 682 + cdev_init(cdev, &cxl_memdev_fops); 683 + 684 + rc = cdev_device_add(cdev, dev); 685 + if (rc) 686 + goto err_add; 687 + 688 + return devm_add_action_or_reset(dev->parent, cxlmdev_unregister, cxlmd); 689 + 690 + err_add: 691 + ida_free(&cxl_memdev_ida, cxlmd->id); 692 + err_id: 693 + /* 694 + * Theoretically userspace could have already entered the fops, 695 + * so flush ops_active. 696 + */ 697 + percpu_ref_kill(&cxlmd->ops_active); 698 + wait_for_completion(&cxlmd->ops_dead); 699 + percpu_ref_exit(&cxlmd->ops_active); 700 + err_ref: 701 + kfree(cxlmd); 702 + 703 + return rc; 704 + } 705 + 572 706 /** 573 707 * cxl_mem_identify() - Send the IDENTIFY command to the device. 574 708 * @cxlm: The device to identify. ··· 860 612 if (rc) 861 613 return rc; 862 614 863 - return cxl_mem_identify(cxlm); 615 + rc = cxl_mem_identify(cxlm); 616 + if (rc) 617 + return rc; 618 + 619 + return cxl_mem_add_memdev(cxlm); 864 620 } 865 621 866 622 static const struct pci_device_id cxl_mem_pci_tbl[] = { ··· 883 631 }, 884 632 }; 885 633 634 + static __init int cxl_mem_init(void) 635 + { 636 + dev_t devt; 637 + int rc; 638 + 639 + rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl"); 640 + if (rc) 641 + return rc; 642 + 643 + cxl_mem_major = MAJOR(devt); 644 + 645 + rc = pci_register_driver(&cxl_mem_driver); 646 + if (rc) { 647 + unregister_chrdev_region(MKDEV(cxl_mem_major, 0), 648 + CXL_MEM_MAX_DEVS); 649 + return rc; 650 + } 651 + 652 + return 0; 653 + } 654 + 655 + static __exit void cxl_mem_exit(void) 656 + { 657 + pci_unregister_driver(&cxl_mem_driver); 658 + unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS); 659 + } 660 + 886 661 MODULE_LICENSE("GPL v2"); 887 - module_pci_driver(cxl_mem_driver); 662 + module_init(cxl_mem_init); 663 + module_exit(cxl_mem_exit); 664 + MODULE_IMPORT_NS(CXL);