Merge tag 'drm-xe-next-2025-07-10' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

+9 -3

Documentation/gpu/rfc/gpusvm.rst

··· 74 74 :doc: Locking 75 75 76 76 .. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c 77 - :doc: Migration 78 - 79 - .. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c 80 77 :doc: Partial Unmapping of Ranges 81 78 82 79 .. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c 83 80 :doc: Examples 81 + 82 + Overview of drm_pagemap design 83 + ============================== 84 + 85 + .. kernel-doc:: drivers/gpu/drm/drm_pagemap.c 86 + :doc: Overview 87 + 88 + .. kernel-doc:: drivers/gpu/drm/drm_pagemap.c 89 + :doc: Migration 84 90 85 91 Possible future design features 86 92 ===============================

+7

MAINTAINERS

··· 12089 12089 S: Supported 12090 12090 F: arch/x86/include/asm/intel-family.h 12091 12091 12092 + INTEL DISCRETE GRAPHICS NVM MTD DRIVER 12093 + M: Alexander Usyskin <alexander.usyskin@intel.com> 12094 + L: linux-mtd@lists.infradead.org 12095 + S: Supported 12096 + F: drivers/mtd/devices/mtd_intel_dg.c 12097 + F: include/linux/intel_dg_nvm_aux.h 12098 + 12092 12099 INTEL DRM DISPLAY FOR XE AND I915 DRIVERS 12093 12100 M: Jani Nikula <jani.nikula@linux.intel.com> 12094 12101 M: Rodrigo Vivi <rodrigo.vivi@intel.com>

+5 -1

drivers/gpu/drm/Makefile

··· 104 104 # 105 105 obj-$(CONFIG_DRM_EXEC) += drm_exec.o 106 106 obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o 107 - obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm.o 107 + 108 + drm_gpusvm_helper-y := \ 109 + drm_gpusvm.o\ 110 + drm_pagemap.o 111 + obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o 108 112 109 113 obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o 110 114

+13 -749

drivers/gpu/drm/drm_gpusvm.c

··· 9 9 #include <linux/dma-mapping.h> 10 10 #include <linux/export.h> 11 11 #include <linux/hmm.h> 12 + #include <linux/hugetlb_inline.h> 12 13 #include <linux/memremap.h> 13 - #include <linux/migrate.h> 14 14 #include <linux/mm_types.h> 15 - #include <linux/pagemap.h> 16 15 #include <linux/slab.h> 17 16 18 17 #include <drm/drm_device.h> ··· 108 109 */ 109 110 110 111 /** 111 - * DOC: Migration 112 - * 113 - * The migration support is quite simple, allowing migration between RAM and 114 - * device memory at the range granularity. For example, GPU SVM currently does 115 - * not support mixing RAM and device memory pages within a range. This means 116 - * that upon GPU fault, the entire range can be migrated to device memory, and 117 - * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device 118 - * memory storage within a range could be added in the future if required. 119 - * 120 - * The reasoning for only supporting range granularity is as follows: it 121 - * simplifies the implementation, and range sizes are driver-defined and should 122 - * be relatively small. 123 - */ 124 - 125 - /** 126 112 * DOC: Partial Unmapping of Ranges 127 113 * 128 114 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting ··· 176 192 * } 177 193 * 178 194 * if (driver_migration_policy(range)) { 179 - * mmap_read_lock(mm); 180 - * devmem = driver_alloc_devmem(); 181 - * err = drm_gpusvm_migrate_to_devmem(gpusvm, range, 182 - * devmem_allocation, 183 - * &ctx); 184 - * mmap_read_unlock(mm); 195 + * err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(), 196 + * gpuva_start, gpuva_end, gpusvm->mm, 197 + * ctx->timeslice_ms); 185 198 * if (err) // CPU mappings may have changed 186 199 * goto retry; 187 200 * } ··· 268 287 npages_in_range(unsigned long start, unsigned long end) 269 288 { 270 289 return (end - start) >> PAGE_SHIFT; 271 - } 272 - 273 - /** 274 - * struct drm_gpusvm_zdd - GPU SVM zone device data 275 - * 276 - * @refcount: Reference count for the zdd 277 - * @devmem_allocation: device memory allocation 278 - * @device_private_page_owner: Device private pages owner 279 - * 280 - * This structure serves as a generic wrapper installed in 281 - * page->zone_device_data. It provides infrastructure for looking up a device 282 - * memory allocation upon CPU page fault and asynchronously releasing device 283 - * memory once the CPU has no page references. Asynchronous release is useful 284 - * because CPU page references can be dropped in IRQ contexts, while releasing 285 - * device memory likely requires sleeping locks. 286 - */ 287 - struct drm_gpusvm_zdd { 288 - struct kref refcount; 289 - struct drm_gpusvm_devmem *devmem_allocation; 290 - void *device_private_page_owner; 291 - }; 292 - 293 - /** 294 - * drm_gpusvm_zdd_alloc() - Allocate a zdd structure. 295 - * @device_private_page_owner: Device private pages owner 296 - * 297 - * This function allocates and initializes a new zdd structure. It sets up the 298 - * reference count and initializes the destroy work. 299 - * 300 - * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure. 301 - */ 302 - static struct drm_gpusvm_zdd * 303 - drm_gpusvm_zdd_alloc(void *device_private_page_owner) 304 - { 305 - struct drm_gpusvm_zdd *zdd; 306 - 307 - zdd = kmalloc(sizeof(*zdd), GFP_KERNEL); 308 - if (!zdd) 309 - return NULL; 310 - 311 - kref_init(&zdd->refcount); 312 - zdd->devmem_allocation = NULL; 313 - zdd->device_private_page_owner = device_private_page_owner; 314 - 315 - return zdd; 316 - } 317 - 318 - /** 319 - * drm_gpusvm_zdd_get() - Get a reference to a zdd structure. 320 - * @zdd: Pointer to the zdd structure. 321 - * 322 - * This function increments the reference count of the provided zdd structure. 323 - * 324 - * Return: Pointer to the zdd structure. 325 - */ 326 - static struct drm_gpusvm_zdd *drm_gpusvm_zdd_get(struct drm_gpusvm_zdd *zdd) 327 - { 328 - kref_get(&zdd->refcount); 329 - return zdd; 330 - } 331 - 332 - /** 333 - * drm_gpusvm_zdd_destroy() - Destroy a zdd structure. 334 - * @ref: Pointer to the reference count structure. 335 - * 336 - * This function queues the destroy_work of the zdd for asynchronous destruction. 337 - */ 338 - static void drm_gpusvm_zdd_destroy(struct kref *ref) 339 - { 340 - struct drm_gpusvm_zdd *zdd = 341 - container_of(ref, struct drm_gpusvm_zdd, refcount); 342 - struct drm_gpusvm_devmem *devmem = zdd->devmem_allocation; 343 - 344 - if (devmem) { 345 - complete_all(&devmem->detached); 346 - if (devmem->ops->devmem_release) 347 - devmem->ops->devmem_release(devmem); 348 - } 349 - kfree(zdd); 350 - } 351 - 352 - /** 353 - * drm_gpusvm_zdd_put() - Put a zdd reference. 354 - * @zdd: Pointer to the zdd structure. 355 - * 356 - * This function decrements the reference count of the provided zdd structure 357 - * and schedules its destruction if the count drops to zero. 358 - */ 359 - static void drm_gpusvm_zdd_put(struct drm_gpusvm_zdd *zdd) 360 - { 361 - kref_put(&zdd->refcount, drm_gpusvm_zdd_destroy); 362 290 } 363 291 364 292 /** ··· 836 946 * process-many-malloc' fails. In the failure case, each process 837 947 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM 838 948 * ranges. When migrating the SVM ranges, some processes fail in 839 - * drm_gpusvm_migrate_to_devmem with 'migrate.cpages != npages' 949 + * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages' 840 950 * and then upon drm_gpusvm_range_get_pages device pages from 841 951 * other processes are collected + faulted in which creates all 842 952 * sorts of problems. Unsure exactly how this happening, also ··· 1254 1364 .dev_private_owner = gpusvm->device_private_page_owner, 1255 1365 }; 1256 1366 struct mm_struct *mm = gpusvm->mm; 1257 - struct drm_gpusvm_zdd *zdd; 1367 + void *zdd; 1258 1368 unsigned long timeout = 1259 1369 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1260 1370 unsigned long i, j; ··· 1337 1447 } 1338 1448 1339 1449 zdd = NULL; 1450 + pagemap = NULL; 1340 1451 num_dma_mapped = 0; 1341 1452 for (i = 0, j = 0; i < npages; ++j) { 1342 1453 struct page *page = hmm_pfn_to_page(pfns[i]); ··· 1357 1466 } 1358 1467 1359 1468 pagemap = page_pgmap(page); 1360 - dpagemap = zdd->devmem_allocation->dpagemap; 1469 + dpagemap = drm_pagemap_page_to_dpagemap(page); 1361 1470 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { 1362 1471 /* 1363 1472 * Raced. This is not supposed to happen ··· 1381 1490 } else { 1382 1491 dma_addr_t addr; 1383 1492 1384 - if (is_zone_device_page(page) || zdd) { 1493 + if (is_zone_device_page(page) || pagemap) { 1385 1494 err = -EOPNOTSUPP; 1386 1495 goto err_unmap; 1387 1496 } ··· 1409 1518 flags.has_dma_mapping = true; 1410 1519 } 1411 1520 1412 - if (zdd) { 1521 + if (pagemap) { 1413 1522 flags.has_devmem_pages = true; 1414 1523 range->dpagemap = dpagemap; 1415 1524 } ··· 1437 1546 1438 1547 /** 1439 1548 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range 1549 + * drm_gpusvm_range_evict() - Evict GPU SVM range 1440 1550 * @gpusvm: Pointer to the GPU SVM structure 1441 1551 * @range: Pointer to the GPU SVM range structure 1442 1552 * @ctx: GPU SVM context ··· 1468 1576 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); 1469 1577 1470 1578 /** 1471 - * drm_gpusvm_migration_unlock_put_page() - Put a migration page 1472 - * @page: Pointer to the page to put 1473 - * 1474 - * This function unlocks and puts a page. 1475 - */ 1476 - static void drm_gpusvm_migration_unlock_put_page(struct page *page) 1477 - { 1478 - unlock_page(page); 1479 - put_page(page); 1480 - } 1481 - 1482 - /** 1483 - * drm_gpusvm_migration_unlock_put_pages() - Put migration pages 1484 - * @npages: Number of pages 1485 - * @migrate_pfn: Array of migrate page frame numbers 1486 - * 1487 - * This function unlocks and puts an array of pages. 1488 - */ 1489 - static void drm_gpusvm_migration_unlock_put_pages(unsigned long npages, 1490 - unsigned long *migrate_pfn) 1491 - { 1492 - unsigned long i; 1493 - 1494 - for (i = 0; i < npages; ++i) { 1495 - struct page *page; 1496 - 1497 - if (!migrate_pfn[i]) 1498 - continue; 1499 - 1500 - page = migrate_pfn_to_page(migrate_pfn[i]); 1501 - drm_gpusvm_migration_unlock_put_page(page); 1502 - migrate_pfn[i] = 0; 1503 - } 1504 - } 1505 - 1506 - /** 1507 - * drm_gpusvm_get_devmem_page() - Get a reference to a device memory page 1508 - * @page: Pointer to the page 1509 - * @zdd: Pointer to the GPU SVM zone device data 1510 - * 1511 - * This function associates the given page with the specified GPU SVM zone 1512 - * device data and initializes it for zone device usage. 1513 - */ 1514 - static void drm_gpusvm_get_devmem_page(struct page *page, 1515 - struct drm_gpusvm_zdd *zdd) 1516 - { 1517 - page->zone_device_data = drm_gpusvm_zdd_get(zdd); 1518 - zone_device_page_init(page); 1519 - } 1520 - 1521 - /** 1522 - * drm_gpusvm_migrate_map_pages() - Map migration pages for GPU SVM migration 1523 - * @dev: The device for which the pages are being mapped 1524 - * @dma_addr: Array to store DMA addresses corresponding to mapped pages 1525 - * @migrate_pfn: Array of migrate page frame numbers to map 1526 - * @npages: Number of pages to map 1527 - * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) 1528 - * 1529 - * This function maps pages of memory for migration usage in GPU SVM. It 1530 - * iterates over each page frame number provided in @migrate_pfn, maps the 1531 - * corresponding page, and stores the DMA address in the provided @dma_addr 1532 - * array. 1533 - * 1534 - * Return: 0 on success, -EFAULT if an error occurs during mapping. 1535 - */ 1536 - static int drm_gpusvm_migrate_map_pages(struct device *dev, 1537 - dma_addr_t *dma_addr, 1538 - unsigned long *migrate_pfn, 1539 - unsigned long npages, 1540 - enum dma_data_direction dir) 1541 - { 1542 - unsigned long i; 1543 - 1544 - for (i = 0; i < npages; ++i) { 1545 - struct page *page = migrate_pfn_to_page(migrate_pfn[i]); 1546 - 1547 - if (!page) 1548 - continue; 1549 - 1550 - if (WARN_ON_ONCE(is_zone_device_page(page))) 1551 - return -EFAULT; 1552 - 1553 - dma_addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); 1554 - if (dma_mapping_error(dev, dma_addr[i])) 1555 - return -EFAULT; 1556 - } 1557 - 1558 - return 0; 1559 - } 1560 - 1561 - /** 1562 - * drm_gpusvm_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration 1563 - * @dev: The device for which the pages were mapped 1564 - * @dma_addr: Array of DMA addresses corresponding to mapped pages 1565 - * @npages: Number of pages to unmap 1566 - * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) 1567 - * 1568 - * This function unmaps previously mapped pages of memory for GPU Shared Virtual 1569 - * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks 1570 - * if it's valid and not already unmapped, and unmaps the corresponding page. 1571 - */ 1572 - static void drm_gpusvm_migrate_unmap_pages(struct device *dev, 1573 - dma_addr_t *dma_addr, 1574 - unsigned long npages, 1575 - enum dma_data_direction dir) 1576 - { 1577 - unsigned long i; 1578 - 1579 - for (i = 0; i < npages; ++i) { 1580 - if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) 1581 - continue; 1582 - 1583 - dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); 1584 - } 1585 - } 1586 - 1587 - /** 1588 - * drm_gpusvm_migrate_to_devmem() - Migrate GPU SVM range to device memory 1579 + * drm_gpusvm_range_evict() - Evict GPU SVM range 1589 1580 * @gpusvm: Pointer to the GPU SVM structure 1590 - * @range: Pointer to the GPU SVM range structure 1591 - * @devmem_allocation: Pointer to the device memory allocation. The caller 1592 - * should hold a reference to the device memory allocation, 1593 - * which should be dropped via ops->devmem_release or upon 1594 - * the failure of this function. 1595 - * @ctx: GPU SVM context 1596 - * 1597 - * This function migrates the specified GPU SVM range to device memory. It 1598 - * performs the necessary setup and invokes the driver-specific operations for 1599 - * migration to device memory. Upon successful return, @devmem_allocation can 1600 - * safely reference @range until ops->devmem_release is called which only upon 1601 - * successful return. Expected to be called while holding the mmap lock in read 1602 - * mode. 1603 - * 1604 - * Return: 0 on success, negative error code on failure. 1605 - */ 1606 - int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, 1607 - struct drm_gpusvm_range *range, 1608 - struct drm_gpusvm_devmem *devmem_allocation, 1609 - const struct drm_gpusvm_ctx *ctx) 1610 - { 1611 - const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops; 1612 - unsigned long start = drm_gpusvm_range_start(range), 1613 - end = drm_gpusvm_range_end(range); 1614 - struct migrate_vma migrate = { 1615 - .start = start, 1616 - .end = end, 1617 - .pgmap_owner = gpusvm->device_private_page_owner, 1618 - .flags = MIGRATE_VMA_SELECT_SYSTEM, 1619 - }; 1620 - struct mm_struct *mm = gpusvm->mm; 1621 - unsigned long i, npages = npages_in_range(start, end); 1622 - struct vm_area_struct *vas; 1623 - struct drm_gpusvm_zdd *zdd = NULL; 1624 - struct page **pages; 1625 - dma_addr_t *dma_addr; 1626 - void *buf; 1627 - int err; 1628 - 1629 - mmap_assert_locked(gpusvm->mm); 1630 - 1631 - if (!range->flags.migrate_devmem) 1632 - return -EINVAL; 1633 - 1634 - if (!ops->populate_devmem_pfn || !ops->copy_to_devmem || 1635 - !ops->copy_to_ram) 1636 - return -EOPNOTSUPP; 1637 - 1638 - vas = vma_lookup(mm, start); 1639 - if (!vas) { 1640 - err = -ENOENT; 1641 - goto err_out; 1642 - } 1643 - 1644 - if (end > vas->vm_end || start < vas->vm_start) { 1645 - err = -EINVAL; 1646 - goto err_out; 1647 - } 1648 - 1649 - if (!vma_is_anonymous(vas)) { 1650 - err = -EBUSY; 1651 - goto err_out; 1652 - } 1653 - 1654 - buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + 1655 - sizeof(*pages), GFP_KERNEL); 1656 - if (!buf) { 1657 - err = -ENOMEM; 1658 - goto err_out; 1659 - } 1660 - dma_addr = buf + (2 * sizeof(*migrate.src) * npages); 1661 - pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; 1662 - 1663 - zdd = drm_gpusvm_zdd_alloc(gpusvm->device_private_page_owner); 1664 - if (!zdd) { 1665 - err = -ENOMEM; 1666 - goto err_free; 1667 - } 1668 - 1669 - migrate.vma = vas; 1670 - migrate.src = buf; 1671 - migrate.dst = migrate.src + npages; 1672 - 1673 - err = migrate_vma_setup(&migrate); 1674 - if (err) 1675 - goto err_free; 1676 - 1677 - if (!migrate.cpages) { 1678 - err = -EFAULT; 1679 - goto err_free; 1680 - } 1681 - 1682 - if (migrate.cpages != npages) { 1683 - err = -EBUSY; 1684 - goto err_finalize; 1685 - } 1686 - 1687 - err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); 1688 - if (err) 1689 - goto err_finalize; 1690 - 1691 - err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr, 1692 - migrate.src, npages, DMA_TO_DEVICE); 1693 - if (err) 1694 - goto err_finalize; 1695 - 1696 - for (i = 0; i < npages; ++i) { 1697 - struct page *page = pfn_to_page(migrate.dst[i]); 1698 - 1699 - pages[i] = page; 1700 - migrate.dst[i] = migrate_pfn(migrate.dst[i]); 1701 - drm_gpusvm_get_devmem_page(page, zdd); 1702 - } 1703 - 1704 - err = ops->copy_to_devmem(pages, dma_addr, npages); 1705 - if (err) 1706 - goto err_finalize; 1707 - 1708 - /* Upon success bind devmem allocation to range and zdd */ 1709 - devmem_allocation->timeslice_expiration = get_jiffies_64() + 1710 - msecs_to_jiffies(ctx->timeslice_ms); 1711 - zdd->devmem_allocation = devmem_allocation; /* Owns ref */ 1712 - 1713 - err_finalize: 1714 - if (err) 1715 - drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst); 1716 - migrate_vma_pages(&migrate); 1717 - migrate_vma_finalize(&migrate); 1718 - drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, 1719 - DMA_TO_DEVICE); 1720 - err_free: 1721 - if (zdd) 1722 - drm_gpusvm_zdd_put(zdd); 1723 - kvfree(buf); 1724 - err_out: 1725 - return err; 1726 - } 1727 - EXPORT_SYMBOL_GPL(drm_gpusvm_migrate_to_devmem); 1728 - 1729 - /** 1730 - * drm_gpusvm_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area 1731 - * @vas: Pointer to the VM area structure, can be NULL 1732 - * @fault_page: Fault page 1733 - * @npages: Number of pages to populate 1734 - * @mpages: Number of pages to migrate 1735 - * @src_mpfn: Source array of migrate PFNs 1736 - * @mpfn: Array of migrate PFNs to populate 1737 - * @addr: Start address for PFN allocation 1738 - * 1739 - * This function populates the RAM migrate page frame numbers (PFNs) for the 1740 - * specified VM area structure. It allocates and locks pages in the VM area for 1741 - * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use 1742 - * alloc_page for allocation. 1743 - * 1744 - * Return: 0 on success, negative error code on failure. 1745 - */ 1746 - static int drm_gpusvm_migrate_populate_ram_pfn(struct vm_area_struct *vas, 1747 - struct page *fault_page, 1748 - unsigned long npages, 1749 - unsigned long *mpages, 1750 - unsigned long *src_mpfn, 1751 - unsigned long *mpfn, 1752 - unsigned long addr) 1753 - { 1754 - unsigned long i; 1755 - 1756 - for (i = 0; i < npages; ++i, addr += PAGE_SIZE) { 1757 - struct page *page, *src_page; 1758 - 1759 - if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE)) 1760 - continue; 1761 - 1762 - src_page = migrate_pfn_to_page(src_mpfn[i]); 1763 - if (!src_page) 1764 - continue; 1765 - 1766 - if (fault_page) { 1767 - if (src_page->zone_device_data != 1768 - fault_page->zone_device_data) 1769 - continue; 1770 - } 1771 - 1772 - if (vas) 1773 - page = alloc_page_vma(GFP_HIGHUSER, vas, addr); 1774 - else 1775 - page = alloc_page(GFP_HIGHUSER); 1776 - 1777 - if (!page) 1778 - goto free_pages; 1779 - 1780 - mpfn[i] = migrate_pfn(page_to_pfn(page)); 1781 - } 1782 - 1783 - for (i = 0; i < npages; ++i) { 1784 - struct page *page = migrate_pfn_to_page(mpfn[i]); 1785 - 1786 - if (!page) 1787 - continue; 1788 - 1789 - WARN_ON_ONCE(!trylock_page(page)); 1790 - ++*mpages; 1791 - } 1792 - 1793 - return 0; 1794 - 1795 - free_pages: 1796 - for (i = 0; i < npages; ++i) { 1797 - struct page *page = migrate_pfn_to_page(mpfn[i]); 1798 - 1799 - if (!page) 1800 - continue; 1801 - 1802 - put_page(page); 1803 - mpfn[i] = 0; 1804 - } 1805 - return -ENOMEM; 1806 - } 1807 - 1808 - /** 1809 - * drm_gpusvm_evict_to_ram() - Evict GPU SVM range to RAM 1810 - * @devmem_allocation: Pointer to the device memory allocation 1811 - * 1812 - * Similar to __drm_gpusvm_migrate_to_ram but does not require mmap lock and 1813 - * migration done via migrate_device_* functions. 1814 - * 1815 - * Return: 0 on success, negative error code on failure. 1816 - */ 1817 - int drm_gpusvm_evict_to_ram(struct drm_gpusvm_devmem *devmem_allocation) 1818 - { 1819 - const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops; 1820 - unsigned long npages, mpages = 0; 1821 - struct page **pages; 1822 - unsigned long *src, *dst; 1823 - dma_addr_t *dma_addr; 1824 - void *buf; 1825 - int i, err = 0; 1826 - unsigned int retry_count = 2; 1827 - 1828 - npages = devmem_allocation->size >> PAGE_SHIFT; 1829 - 1830 - retry: 1831 - if (!mmget_not_zero(devmem_allocation->mm)) 1832 - return -EFAULT; 1833 - 1834 - buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*dma_addr) + 1835 - sizeof(*pages), GFP_KERNEL); 1836 - if (!buf) { 1837 - err = -ENOMEM; 1838 - goto err_out; 1839 - } 1840 - src = buf; 1841 - dst = buf + (sizeof(*src) * npages); 1842 - dma_addr = buf + (2 * sizeof(*src) * npages); 1843 - pages = buf + (2 * sizeof(*src) + sizeof(*dma_addr)) * npages; 1844 - 1845 - err = ops->populate_devmem_pfn(devmem_allocation, npages, src); 1846 - if (err) 1847 - goto err_free; 1848 - 1849 - err = migrate_device_pfns(src, npages); 1850 - if (err) 1851 - goto err_free; 1852 - 1853 - err = drm_gpusvm_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages, 1854 - src, dst, 0); 1855 - if (err || !mpages) 1856 - goto err_finalize; 1857 - 1858 - err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr, 1859 - dst, npages, DMA_FROM_DEVICE); 1860 - if (err) 1861 - goto err_finalize; 1862 - 1863 - for (i = 0; i < npages; ++i) 1864 - pages[i] = migrate_pfn_to_page(src[i]); 1865 - 1866 - err = ops->copy_to_ram(pages, dma_addr, npages); 1867 - if (err) 1868 - goto err_finalize; 1869 - 1870 - err_finalize: 1871 - if (err) 1872 - drm_gpusvm_migration_unlock_put_pages(npages, dst); 1873 - migrate_device_pages(src, dst, npages); 1874 - migrate_device_finalize(src, dst, npages); 1875 - drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, 1876 - DMA_FROM_DEVICE); 1877 - err_free: 1878 - kvfree(buf); 1879 - err_out: 1880 - mmput_async(devmem_allocation->mm); 1881 - 1882 - if (completion_done(&devmem_allocation->detached)) 1883 - return 0; 1884 - 1885 - if (retry_count--) { 1886 - cond_resched(); 1887 - goto retry; 1888 - } 1889 - 1890 - return err ?: -EBUSY; 1891 - } 1892 - EXPORT_SYMBOL_GPL(drm_gpusvm_evict_to_ram); 1893 - 1894 - /** 1895 - * __drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (internal) 1896 - * @vas: Pointer to the VM area structure 1897 - * @device_private_page_owner: Device private pages owner 1898 - * @page: Pointer to the page for fault handling (can be NULL) 1899 - * @fault_addr: Fault address 1900 - * @size: Size of migration 1901 - * 1902 - * This internal function performs the migration of the specified GPU SVM range 1903 - * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and 1904 - * invokes the driver-specific operations for migration to RAM. 1905 - * 1906 - * Return: 0 on success, negative error code on failure. 1907 - */ 1908 - static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, 1909 - void *device_private_page_owner, 1910 - struct page *page, 1911 - unsigned long fault_addr, 1912 - unsigned long size) 1913 - { 1914 - struct migrate_vma migrate = { 1915 - .vma = vas, 1916 - .pgmap_owner = device_private_page_owner, 1917 - .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | 1918 - MIGRATE_VMA_SELECT_DEVICE_COHERENT, 1919 - .fault_page = page, 1920 - }; 1921 - struct drm_gpusvm_zdd *zdd; 1922 - const struct drm_gpusvm_devmem_ops *ops; 1923 - struct device *dev = NULL; 1924 - unsigned long npages, mpages = 0; 1925 - struct page **pages; 1926 - dma_addr_t *dma_addr; 1927 - unsigned long start, end; 1928 - void *buf; 1929 - int i, err = 0; 1930 - 1931 - if (page) { 1932 - zdd = page->zone_device_data; 1933 - if (time_before64(get_jiffies_64(), 1934 - zdd->devmem_allocation->timeslice_expiration)) 1935 - return 0; 1936 - } 1937 - 1938 - start = ALIGN_DOWN(fault_addr, size); 1939 - end = ALIGN(fault_addr + 1, size); 1940 - 1941 - /* Corner where VMA area struct has been partially unmapped */ 1942 - if (start < vas->vm_start) 1943 - start = vas->vm_start; 1944 - if (end > vas->vm_end) 1945 - end = vas->vm_end; 1946 - 1947 - migrate.start = start; 1948 - migrate.end = end; 1949 - npages = npages_in_range(start, end); 1950 - 1951 - buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + 1952 - sizeof(*pages), GFP_KERNEL); 1953 - if (!buf) { 1954 - err = -ENOMEM; 1955 - goto err_out; 1956 - } 1957 - dma_addr = buf + (2 * sizeof(*migrate.src) * npages); 1958 - pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; 1959 - 1960 - migrate.vma = vas; 1961 - migrate.src = buf; 1962 - migrate.dst = migrate.src + npages; 1963 - 1964 - err = migrate_vma_setup(&migrate); 1965 - if (err) 1966 - goto err_free; 1967 - 1968 - /* Raced with another CPU fault, nothing to do */ 1969 - if (!migrate.cpages) 1970 - goto err_free; 1971 - 1972 - if (!page) { 1973 - for (i = 0; i < npages; ++i) { 1974 - if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE)) 1975 - continue; 1976 - 1977 - page = migrate_pfn_to_page(migrate.src[i]); 1978 - break; 1979 - } 1980 - 1981 - if (!page) 1982 - goto err_finalize; 1983 - } 1984 - zdd = page->zone_device_data; 1985 - ops = zdd->devmem_allocation->ops; 1986 - dev = zdd->devmem_allocation->dev; 1987 - 1988 - err = drm_gpusvm_migrate_populate_ram_pfn(vas, page, npages, &mpages, 1989 - migrate.src, migrate.dst, 1990 - start); 1991 - if (err) 1992 - goto err_finalize; 1993 - 1994 - err = drm_gpusvm_migrate_map_pages(dev, dma_addr, migrate.dst, npages, 1995 - DMA_FROM_DEVICE); 1996 - if (err) 1997 - goto err_finalize; 1998 - 1999 - for (i = 0; i < npages; ++i) 2000 - pages[i] = migrate_pfn_to_page(migrate.src[i]); 2001 - 2002 - err = ops->copy_to_ram(pages, dma_addr, npages); 2003 - if (err) 2004 - goto err_finalize; 2005 - 2006 - err_finalize: 2007 - if (err) 2008 - drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst); 2009 - migrate_vma_pages(&migrate); 2010 - migrate_vma_finalize(&migrate); 2011 - if (dev) 2012 - drm_gpusvm_migrate_unmap_pages(dev, dma_addr, npages, 2013 - DMA_FROM_DEVICE); 2014 - err_free: 2015 - kvfree(buf); 2016 - err_out: 2017 - 2018 - return err; 2019 - } 2020 - 2021 - /** 2022 - * drm_gpusvm_range_evict - Evict GPU SVM range 2023 1581 * @range: Pointer to the GPU SVM range to be removed 2024 1582 * 2025 - * This function evicts the specified GPU SVM range. This function will not 2026 - * evict coherent pages. 1583 + * This function evicts the specified GPU SVM range. 2027 1584 * 2028 1585 * Return: 0 on success, a negative error code on failure. 2029 1586 */ ··· 1525 2184 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); 1526 2185 1527 2186 /** 1528 - * drm_gpusvm_page_free() - Put GPU SVM zone device data associated with a page 1529 - * @page: Pointer to the page 1530 - * 1531 - * This function is a callback used to put the GPU SVM zone device data 1532 - * associated with a page when it is being released. 1533 - */ 1534 - static void drm_gpusvm_page_free(struct page *page) 1535 - { 1536 - drm_gpusvm_zdd_put(page->zone_device_data); 1537 - } 1538 - 1539 - /** 1540 - * drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (page fault handler) 1541 - * @vmf: Pointer to the fault information structure 1542 - * 1543 - * This function is a page fault handler used to migrate a GPU SVM range to RAM. 1544 - * It retrieves the GPU SVM range information from the faulting page and invokes 1545 - * the internal migration function to migrate the range back to RAM. 1546 - * 1547 - * Return: VM_FAULT_SIGBUS on failure, 0 on success. 1548 - */ 1549 - static vm_fault_t drm_gpusvm_migrate_to_ram(struct vm_fault *vmf) 1550 - { 1551 - struct drm_gpusvm_zdd *zdd = vmf->page->zone_device_data; 1552 - int err; 1553 - 1554 - err = __drm_gpusvm_migrate_to_ram(vmf->vma, 1555 - zdd->device_private_page_owner, 1556 - vmf->page, vmf->address, 1557 - zdd->devmem_allocation->size); 1558 - 1559 - return err ? VM_FAULT_SIGBUS : 0; 1560 - } 1561 - 1562 - /* 1563 - * drm_gpusvm_pagemap_ops - Device page map operations for GPU SVM 1564 - */ 1565 - static const struct dev_pagemap_ops drm_gpusvm_pagemap_ops = { 1566 - .page_free = drm_gpusvm_page_free, 1567 - .migrate_to_ram = drm_gpusvm_migrate_to_ram, 1568 - }; 1569 - 1570 - /** 1571 - * drm_gpusvm_pagemap_ops_get() - Retrieve GPU SVM device page map operations 1572 - * 1573 - * Return: Pointer to the GPU SVM device page map operations structure. 1574 - */ 1575 - const struct dev_pagemap_ops *drm_gpusvm_pagemap_ops_get(void) 1576 - { 1577 - return &drm_gpusvm_pagemap_ops; 1578 - } 1579 - EXPORT_SYMBOL_GPL(drm_gpusvm_pagemap_ops_get); 1580 - 1581 - /** 1582 2187 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range 1583 2188 * @gpusvm: Pointer to the GPU SVM structure. 1584 2189 * @start: Start address ··· 1567 2280 range->flags.partial_unmap = true; 1568 2281 } 1569 2282 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); 1570 - 1571 - /** 1572 - * drm_gpusvm_devmem_init() - Initialize a GPU SVM device memory allocation 1573 - * 1574 - * @dev: Pointer to the device structure which device memory allocation belongs to 1575 - * @mm: Pointer to the mm_struct for the address space 1576 - * @ops: Pointer to the operations structure for GPU SVM device memory 1577 - * @dpagemap: The struct drm_pagemap we're allocating from. 1578 - * @size: Size of device memory allocation 1579 - */ 1580 - void drm_gpusvm_devmem_init(struct drm_gpusvm_devmem *devmem_allocation, 1581 - struct device *dev, struct mm_struct *mm, 1582 - const struct drm_gpusvm_devmem_ops *ops, 1583 - struct drm_pagemap *dpagemap, size_t size) 1584 - { 1585 - init_completion(&devmem_allocation->detached); 1586 - devmem_allocation->dev = dev; 1587 - devmem_allocation->mm = mm; 1588 - devmem_allocation->ops = ops; 1589 - devmem_allocation->dpagemap = dpagemap; 1590 - devmem_allocation->size = size; 1591 - } 1592 - EXPORT_SYMBOL_GPL(drm_gpusvm_devmem_init); 1593 2283 1594 2284 MODULE_DESCRIPTION("DRM GPUSVM"); 1595 2285 MODULE_LICENSE("GPL");

+838

drivers/gpu/drm/drm_pagemap.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 + /* 3 + * Copyright © 2024-2025 Intel Corporation 4 + */ 5 + 6 + #include <linux/dma-mapping.h> 7 + #include <linux/migrate.h> 8 + #include <linux/pagemap.h> 9 + #include <drm/drm_drv.h> 10 + #include <drm/drm_pagemap.h> 11 + 12 + /** 13 + * DOC: Overview 14 + * 15 + * The DRM pagemap layer is intended to augment the dev_pagemap functionality by 16 + * providing a way to populate a struct mm_struct virtual range with device 17 + * private pages and to provide helpers to abstract device memory allocations, 18 + * to migrate memory back and forth between device memory and system RAM and 19 + * to handle access (and in the future migration) between devices implementing 20 + * a fast interconnect that is not necessarily visible to the rest of the 21 + * system. 22 + * 23 + * Typically the DRM pagemap receives requests from one or more DRM GPU SVM 24 + * instances to populate struct mm_struct virtual ranges with memory, and the 25 + * migration is best effort only and may thus fail. The implementation should 26 + * also handle device unbinding by blocking (return an -ENODEV) error for new 27 + * population requests and after that migrate all device pages to system ram. 28 + */ 29 + 30 + /** 31 + * DOC: Migration 32 + * 33 + * Migration granularity typically follows the GPU SVM range requests, but 34 + * if there are clashes, due to races or due to the fact that multiple GPU 35 + * SVM instances have different views of the ranges used, and because of that 36 + * parts of a requested range is already present in the requested device memory, 37 + * the implementation has a variety of options. It can fail and it can choose 38 + * to populate only the part of the range that isn't already in device memory, 39 + * and it can evict the range to system before trying to migrate. Ideally an 40 + * implementation would just try to migrate the missing part of the range and 41 + * allocate just enough memory to do so. 42 + * 43 + * When migrating to system memory as a response to a cpu fault or a device 44 + * memory eviction request, currently a full device memory allocation is 45 + * migrated back to system. Moving forward this might need improvement for 46 + * situations where a single page needs bouncing between system memory and 47 + * device memory due to, for example, atomic operations. 48 + * 49 + * Key DRM pagemap components: 50 + * 51 + * - Device Memory Allocations: 52 + * Embedded structure containing enough information for the drm_pagemap to 53 + * migrate to / from device memory. 54 + * 55 + * - Device Memory Operations: 56 + * Define the interface for driver-specific device memory operations 57 + * release memory, populate pfns, and copy to / from device memory. 58 + */ 59 + 60 + /** 61 + * struct drm_pagemap_zdd - GPU SVM zone device data 62 + * 63 + * @refcount: Reference count for the zdd 64 + * @devmem_allocation: device memory allocation 65 + * @device_private_page_owner: Device private pages owner 66 + * 67 + * This structure serves as a generic wrapper installed in 68 + * page->zone_device_data. It provides infrastructure for looking up a device 69 + * memory allocation upon CPU page fault and asynchronously releasing device 70 + * memory once the CPU has no page references. Asynchronous release is useful 71 + * because CPU page references can be dropped in IRQ contexts, while releasing 72 + * device memory likely requires sleeping locks. 73 + */ 74 + struct drm_pagemap_zdd { 75 + struct kref refcount; 76 + struct drm_pagemap_devmem *devmem_allocation; 77 + void *device_private_page_owner; 78 + }; 79 + 80 + /** 81 + * drm_pagemap_zdd_alloc() - Allocate a zdd structure. 82 + * @device_private_page_owner: Device private pages owner 83 + * 84 + * This function allocates and initializes a new zdd structure. It sets up the 85 + * reference count and initializes the destroy work. 86 + * 87 + * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure. 88 + */ 89 + static struct drm_pagemap_zdd * 90 + drm_pagemap_zdd_alloc(void *device_private_page_owner) 91 + { 92 + struct drm_pagemap_zdd *zdd; 93 + 94 + zdd = kmalloc(sizeof(*zdd), GFP_KERNEL); 95 + if (!zdd) 96 + return NULL; 97 + 98 + kref_init(&zdd->refcount); 99 + zdd->devmem_allocation = NULL; 100 + zdd->device_private_page_owner = device_private_page_owner; 101 + 102 + return zdd; 103 + } 104 + 105 + /** 106 + * drm_pagemap_zdd_get() - Get a reference to a zdd structure. 107 + * @zdd: Pointer to the zdd structure. 108 + * 109 + * This function increments the reference count of the provided zdd structure. 110 + * 111 + * Return: Pointer to the zdd structure. 112 + */ 113 + static struct drm_pagemap_zdd *drm_pagemap_zdd_get(struct drm_pagemap_zdd *zdd) 114 + { 115 + kref_get(&zdd->refcount); 116 + return zdd; 117 + } 118 + 119 + /** 120 + * drm_pagemap_zdd_destroy() - Destroy a zdd structure. 121 + * @ref: Pointer to the reference count structure. 122 + * 123 + * This function queues the destroy_work of the zdd for asynchronous destruction. 124 + */ 125 + static void drm_pagemap_zdd_destroy(struct kref *ref) 126 + { 127 + struct drm_pagemap_zdd *zdd = 128 + container_of(ref, struct drm_pagemap_zdd, refcount); 129 + struct drm_pagemap_devmem *devmem = zdd->devmem_allocation; 130 + 131 + if (devmem) { 132 + complete_all(&devmem->detached); 133 + if (devmem->ops->devmem_release) 134 + devmem->ops->devmem_release(devmem); 135 + } 136 + kfree(zdd); 137 + } 138 + 139 + /** 140 + * drm_pagemap_zdd_put() - Put a zdd reference. 141 + * @zdd: Pointer to the zdd structure. 142 + * 143 + * This function decrements the reference count of the provided zdd structure 144 + * and schedules its destruction if the count drops to zero. 145 + */ 146 + static void drm_pagemap_zdd_put(struct drm_pagemap_zdd *zdd) 147 + { 148 + kref_put(&zdd->refcount, drm_pagemap_zdd_destroy); 149 + } 150 + 151 + /** 152 + * drm_pagemap_migration_unlock_put_page() - Put a migration page 153 + * @page: Pointer to the page to put 154 + * 155 + * This function unlocks and puts a page. 156 + */ 157 + static void drm_pagemap_migration_unlock_put_page(struct page *page) 158 + { 159 + unlock_page(page); 160 + put_page(page); 161 + } 162 + 163 + /** 164 + * drm_pagemap_migration_unlock_put_pages() - Put migration pages 165 + * @npages: Number of pages 166 + * @migrate_pfn: Array of migrate page frame numbers 167 + * 168 + * This function unlocks and puts an array of pages. 169 + */ 170 + static void drm_pagemap_migration_unlock_put_pages(unsigned long npages, 171 + unsigned long *migrate_pfn) 172 + { 173 + unsigned long i; 174 + 175 + for (i = 0; i < npages; ++i) { 176 + struct page *page; 177 + 178 + if (!migrate_pfn[i]) 179 + continue; 180 + 181 + page = migrate_pfn_to_page(migrate_pfn[i]); 182 + drm_pagemap_migration_unlock_put_page(page); 183 + migrate_pfn[i] = 0; 184 + } 185 + } 186 + 187 + /** 188 + * drm_pagemap_get_devmem_page() - Get a reference to a device memory page 189 + * @page: Pointer to the page 190 + * @zdd: Pointer to the GPU SVM zone device data 191 + * 192 + * This function associates the given page with the specified GPU SVM zone 193 + * device data and initializes it for zone device usage. 194 + */ 195 + static void drm_pagemap_get_devmem_page(struct page *page, 196 + struct drm_pagemap_zdd *zdd) 197 + { 198 + page->zone_device_data = drm_pagemap_zdd_get(zdd); 199 + zone_device_page_init(page); 200 + } 201 + 202 + /** 203 + * drm_pagemap_migrate_map_pages() - Map migration pages for GPU SVM migration 204 + * @dev: The device for which the pages are being mapped 205 + * @dma_addr: Array to store DMA addresses corresponding to mapped pages 206 + * @migrate_pfn: Array of migrate page frame numbers to map 207 + * @npages: Number of pages to map 208 + * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) 209 + * 210 + * This function maps pages of memory for migration usage in GPU SVM. It 211 + * iterates over each page frame number provided in @migrate_pfn, maps the 212 + * corresponding page, and stores the DMA address in the provided @dma_addr 213 + * array. 214 + * 215 + * Returns: 0 on success, -EFAULT if an error occurs during mapping. 216 + */ 217 + static int drm_pagemap_migrate_map_pages(struct device *dev, 218 + dma_addr_t *dma_addr, 219 + unsigned long *migrate_pfn, 220 + unsigned long npages, 221 + enum dma_data_direction dir) 222 + { 223 + unsigned long i; 224 + 225 + for (i = 0; i < npages; ++i) { 226 + struct page *page = migrate_pfn_to_page(migrate_pfn[i]); 227 + 228 + if (!page) 229 + continue; 230 + 231 + if (WARN_ON_ONCE(is_zone_device_page(page))) 232 + return -EFAULT; 233 + 234 + dma_addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); 235 + if (dma_mapping_error(dev, dma_addr[i])) 236 + return -EFAULT; 237 + } 238 + 239 + return 0; 240 + } 241 + 242 + /** 243 + * drm_pagemap_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration 244 + * @dev: The device for which the pages were mapped 245 + * @dma_addr: Array of DMA addresses corresponding to mapped pages 246 + * @npages: Number of pages to unmap 247 + * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) 248 + * 249 + * This function unmaps previously mapped pages of memory for GPU Shared Virtual 250 + * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks 251 + * if it's valid and not already unmapped, and unmaps the corresponding page. 252 + */ 253 + static void drm_pagemap_migrate_unmap_pages(struct device *dev, 254 + dma_addr_t *dma_addr, 255 + unsigned long npages, 256 + enum dma_data_direction dir) 257 + { 258 + unsigned long i; 259 + 260 + for (i = 0; i < npages; ++i) { 261 + if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) 262 + continue; 263 + 264 + dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); 265 + } 266 + } 267 + 268 + static unsigned long 269 + npages_in_range(unsigned long start, unsigned long end) 270 + { 271 + return (end - start) >> PAGE_SHIFT; 272 + } 273 + 274 + /** 275 + * drm_pagemap_migrate_to_devmem() - Migrate a struct mm_struct range to device memory 276 + * @devmem_allocation: The device memory allocation to migrate to. 277 + * The caller should hold a reference to the device memory allocation, 278 + * and the reference is consumed by this function unless it returns with 279 + * an error. 280 + * @mm: Pointer to the struct mm_struct. 281 + * @start: Start of the virtual address range to migrate. 282 + * @end: End of the virtual address range to migrate. 283 + * @timeslice_ms: The time requested for the migrated pagemap pages to 284 + * be present in @mm before being allowed to be migrated back. 285 + * @pgmap_owner: Not used currently, since only system memory is considered. 286 + * 287 + * This function migrates the specified virtual address range to device memory. 288 + * It performs the necessary setup and invokes the driver-specific operations for 289 + * migration to device memory. Expected to be called while holding the mmap lock in 290 + * at least read mode. 291 + * 292 + * Note: The @timeslice_ms parameter can typically be used to force data to 293 + * remain in pagemap pages long enough for a GPU to perform a task and to prevent 294 + * a migration livelock. One alternative would be for the GPU driver to block 295 + * in a mmu_notifier for the specified amount of time, but adding the 296 + * functionality to the pagemap is likely nicer to the system as a whole. 297 + * 298 + * Return: %0 on success, negative error code on failure. 299 + */ 300 + int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, 301 + struct mm_struct *mm, 302 + unsigned long start, unsigned long end, 303 + unsigned long timeslice_ms, 304 + void *pgmap_owner) 305 + { 306 + const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; 307 + struct migrate_vma migrate = { 308 + .start = start, 309 + .end = end, 310 + .pgmap_owner = pgmap_owner, 311 + .flags = MIGRATE_VMA_SELECT_SYSTEM, 312 + }; 313 + unsigned long i, npages = npages_in_range(start, end); 314 + struct vm_area_struct *vas; 315 + struct drm_pagemap_zdd *zdd = NULL; 316 + struct page **pages; 317 + dma_addr_t *dma_addr; 318 + void *buf; 319 + int err; 320 + 321 + mmap_assert_locked(mm); 322 + 323 + if (!ops->populate_devmem_pfn || !ops->copy_to_devmem || 324 + !ops->copy_to_ram) 325 + return -EOPNOTSUPP; 326 + 327 + vas = vma_lookup(mm, start); 328 + if (!vas) { 329 + err = -ENOENT; 330 + goto err_out; 331 + } 332 + 333 + if (end > vas->vm_end || start < vas->vm_start) { 334 + err = -EINVAL; 335 + goto err_out; 336 + } 337 + 338 + if (!vma_is_anonymous(vas)) { 339 + err = -EBUSY; 340 + goto err_out; 341 + } 342 + 343 + buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + 344 + sizeof(*pages), GFP_KERNEL); 345 + if (!buf) { 346 + err = -ENOMEM; 347 + goto err_out; 348 + } 349 + dma_addr = buf + (2 * sizeof(*migrate.src) * npages); 350 + pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; 351 + 352 + zdd = drm_pagemap_zdd_alloc(pgmap_owner); 353 + if (!zdd) { 354 + err = -ENOMEM; 355 + goto err_free; 356 + } 357 + 358 + migrate.vma = vas; 359 + migrate.src = buf; 360 + migrate.dst = migrate.src + npages; 361 + 362 + err = migrate_vma_setup(&migrate); 363 + if (err) 364 + goto err_free; 365 + 366 + if (!migrate.cpages) { 367 + err = -EFAULT; 368 + goto err_free; 369 + } 370 + 371 + if (migrate.cpages != npages) { 372 + err = -EBUSY; 373 + goto err_finalize; 374 + } 375 + 376 + err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); 377 + if (err) 378 + goto err_finalize; 379 + 380 + err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, dma_addr, 381 + migrate.src, npages, DMA_TO_DEVICE); 382 + if (err) 383 + goto err_finalize; 384 + 385 + for (i = 0; i < npages; ++i) { 386 + struct page *page = pfn_to_page(migrate.dst[i]); 387 + 388 + pages[i] = page; 389 + migrate.dst[i] = migrate_pfn(migrate.dst[i]); 390 + drm_pagemap_get_devmem_page(page, zdd); 391 + } 392 + 393 + err = ops->copy_to_devmem(pages, dma_addr, npages); 394 + if (err) 395 + goto err_finalize; 396 + 397 + /* Upon success bind devmem allocation to range and zdd */ 398 + devmem_allocation->timeslice_expiration = get_jiffies_64() + 399 + msecs_to_jiffies(timeslice_ms); 400 + zdd->devmem_allocation = devmem_allocation; /* Owns ref */ 401 + 402 + err_finalize: 403 + if (err) 404 + drm_pagemap_migration_unlock_put_pages(npages, migrate.dst); 405 + migrate_vma_pages(&migrate); 406 + migrate_vma_finalize(&migrate); 407 + drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, 408 + DMA_TO_DEVICE); 409 + err_free: 410 + if (zdd) 411 + drm_pagemap_zdd_put(zdd); 412 + kvfree(buf); 413 + err_out: 414 + return err; 415 + } 416 + EXPORT_SYMBOL_GPL(drm_pagemap_migrate_to_devmem); 417 + 418 + /** 419 + * drm_pagemap_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area 420 + * @vas: Pointer to the VM area structure, can be NULL 421 + * @fault_page: Fault page 422 + * @npages: Number of pages to populate 423 + * @mpages: Number of pages to migrate 424 + * @src_mpfn: Source array of migrate PFNs 425 + * @mpfn: Array of migrate PFNs to populate 426 + * @addr: Start address for PFN allocation 427 + * 428 + * This function populates the RAM migrate page frame numbers (PFNs) for the 429 + * specified VM area structure. It allocates and locks pages in the VM area for 430 + * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use 431 + * alloc_page for allocation. 432 + * 433 + * Return: 0 on success, negative error code on failure. 434 + */ 435 + static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas, 436 + struct page *fault_page, 437 + unsigned long npages, 438 + unsigned long *mpages, 439 + unsigned long *src_mpfn, 440 + unsigned long *mpfn, 441 + unsigned long addr) 442 + { 443 + unsigned long i; 444 + 445 + for (i = 0; i < npages; ++i, addr += PAGE_SIZE) { 446 + struct page *page, *src_page; 447 + 448 + if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE)) 449 + continue; 450 + 451 + src_page = migrate_pfn_to_page(src_mpfn[i]); 452 + if (!src_page) 453 + continue; 454 + 455 + if (fault_page) { 456 + if (src_page->zone_device_data != 457 + fault_page->zone_device_data) 458 + continue; 459 + } 460 + 461 + if (vas) 462 + page = alloc_page_vma(GFP_HIGHUSER, vas, addr); 463 + else 464 + page = alloc_page(GFP_HIGHUSER); 465 + 466 + if (!page) 467 + goto free_pages; 468 + 469 + mpfn[i] = migrate_pfn(page_to_pfn(page)); 470 + } 471 + 472 + for (i = 0; i < npages; ++i) { 473 + struct page *page = migrate_pfn_to_page(mpfn[i]); 474 + 475 + if (!page) 476 + continue; 477 + 478 + WARN_ON_ONCE(!trylock_page(page)); 479 + ++*mpages; 480 + } 481 + 482 + return 0; 483 + 484 + free_pages: 485 + for (i = 0; i < npages; ++i) { 486 + struct page *page = migrate_pfn_to_page(mpfn[i]); 487 + 488 + if (!page) 489 + continue; 490 + 491 + put_page(page); 492 + mpfn[i] = 0; 493 + } 494 + return -ENOMEM; 495 + } 496 + 497 + /** 498 + * drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM 499 + * @devmem_allocation: Pointer to the device memory allocation 500 + * 501 + * Similar to __drm_pagemap_migrate_to_ram but does not require mmap lock and 502 + * migration done via migrate_device_* functions. 503 + * 504 + * Return: 0 on success, negative error code on failure. 505 + */ 506 + int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation) 507 + { 508 + const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; 509 + unsigned long npages, mpages = 0; 510 + struct page **pages; 511 + unsigned long *src, *dst; 512 + dma_addr_t *dma_addr; 513 + void *buf; 514 + int i, err = 0; 515 + unsigned int retry_count = 2; 516 + 517 + npages = devmem_allocation->size >> PAGE_SHIFT; 518 + 519 + retry: 520 + if (!mmget_not_zero(devmem_allocation->mm)) 521 + return -EFAULT; 522 + 523 + buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*dma_addr) + 524 + sizeof(*pages), GFP_KERNEL); 525 + if (!buf) { 526 + err = -ENOMEM; 527 + goto err_out; 528 + } 529 + src = buf; 530 + dst = buf + (sizeof(*src) * npages); 531 + dma_addr = buf + (2 * sizeof(*src) * npages); 532 + pages = buf + (2 * sizeof(*src) + sizeof(*dma_addr)) * npages; 533 + 534 + err = ops->populate_devmem_pfn(devmem_allocation, npages, src); 535 + if (err) 536 + goto err_free; 537 + 538 + err = migrate_device_pfns(src, npages); 539 + if (err) 540 + goto err_free; 541 + 542 + err = drm_pagemap_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages, 543 + src, dst, 0); 544 + if (err || !mpages) 545 + goto err_finalize; 546 + 547 + err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, dma_addr, 548 + dst, npages, DMA_FROM_DEVICE); 549 + if (err) 550 + goto err_finalize; 551 + 552 + for (i = 0; i < npages; ++i) 553 + pages[i] = migrate_pfn_to_page(src[i]); 554 + 555 + err = ops->copy_to_ram(pages, dma_addr, npages); 556 + if (err) 557 + goto err_finalize; 558 + 559 + err_finalize: 560 + if (err) 561 + drm_pagemap_migration_unlock_put_pages(npages, dst); 562 + migrate_device_pages(src, dst, npages); 563 + migrate_device_finalize(src, dst, npages); 564 + drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, 565 + DMA_FROM_DEVICE); 566 + err_free: 567 + kvfree(buf); 568 + err_out: 569 + mmput_async(devmem_allocation->mm); 570 + 571 + if (completion_done(&devmem_allocation->detached)) 572 + return 0; 573 + 574 + if (retry_count--) { 575 + cond_resched(); 576 + goto retry; 577 + } 578 + 579 + return err ?: -EBUSY; 580 + } 581 + EXPORT_SYMBOL_GPL(drm_pagemap_evict_to_ram); 582 + 583 + /** 584 + * __drm_pagemap_migrate_to_ram() - Migrate GPU SVM range to RAM (internal) 585 + * @vas: Pointer to the VM area structure 586 + * @device_private_page_owner: Device private pages owner 587 + * @page: Pointer to the page for fault handling (can be NULL) 588 + * @fault_addr: Fault address 589 + * @size: Size of migration 590 + * 591 + * This internal function performs the migration of the specified GPU SVM range 592 + * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and 593 + * invokes the driver-specific operations for migration to RAM. 594 + * 595 + * Return: 0 on success, negative error code on failure. 596 + */ 597 + static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, 598 + void *device_private_page_owner, 599 + struct page *page, 600 + unsigned long fault_addr, 601 + unsigned long size) 602 + { 603 + struct migrate_vma migrate = { 604 + .vma = vas, 605 + .pgmap_owner = device_private_page_owner, 606 + .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | 607 + MIGRATE_VMA_SELECT_DEVICE_COHERENT, 608 + .fault_page = page, 609 + }; 610 + struct drm_pagemap_zdd *zdd; 611 + const struct drm_pagemap_devmem_ops *ops; 612 + struct device *dev = NULL; 613 + unsigned long npages, mpages = 0; 614 + struct page **pages; 615 + dma_addr_t *dma_addr; 616 + unsigned long start, end; 617 + void *buf; 618 + int i, err = 0; 619 + 620 + if (page) { 621 + zdd = page->zone_device_data; 622 + if (time_before64(get_jiffies_64(), 623 + zdd->devmem_allocation->timeslice_expiration)) 624 + return 0; 625 + } 626 + 627 + start = ALIGN_DOWN(fault_addr, size); 628 + end = ALIGN(fault_addr + 1, size); 629 + 630 + /* Corner where VMA area struct has been partially unmapped */ 631 + if (start < vas->vm_start) 632 + start = vas->vm_start; 633 + if (end > vas->vm_end) 634 + end = vas->vm_end; 635 + 636 + migrate.start = start; 637 + migrate.end = end; 638 + npages = npages_in_range(start, end); 639 + 640 + buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + 641 + sizeof(*pages), GFP_KERNEL); 642 + if (!buf) { 643 + err = -ENOMEM; 644 + goto err_out; 645 + } 646 + dma_addr = buf + (2 * sizeof(*migrate.src) * npages); 647 + pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; 648 + 649 + migrate.vma = vas; 650 + migrate.src = buf; 651 + migrate.dst = migrate.src + npages; 652 + 653 + err = migrate_vma_setup(&migrate); 654 + if (err) 655 + goto err_free; 656 + 657 + /* Raced with another CPU fault, nothing to do */ 658 + if (!migrate.cpages) 659 + goto err_free; 660 + 661 + if (!page) { 662 + for (i = 0; i < npages; ++i) { 663 + if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE)) 664 + continue; 665 + 666 + page = migrate_pfn_to_page(migrate.src[i]); 667 + break; 668 + } 669 + 670 + if (!page) 671 + goto err_finalize; 672 + } 673 + zdd = page->zone_device_data; 674 + ops = zdd->devmem_allocation->ops; 675 + dev = zdd->devmem_allocation->dev; 676 + 677 + err = drm_pagemap_migrate_populate_ram_pfn(vas, page, npages, &mpages, 678 + migrate.src, migrate.dst, 679 + start); 680 + if (err) 681 + goto err_finalize; 682 + 683 + err = drm_pagemap_migrate_map_pages(dev, dma_addr, migrate.dst, npages, 684 + DMA_FROM_DEVICE); 685 + if (err) 686 + goto err_finalize; 687 + 688 + for (i = 0; i < npages; ++i) 689 + pages[i] = migrate_pfn_to_page(migrate.src[i]); 690 + 691 + err = ops->copy_to_ram(pages, dma_addr, npages); 692 + if (err) 693 + goto err_finalize; 694 + 695 + err_finalize: 696 + if (err) 697 + drm_pagemap_migration_unlock_put_pages(npages, migrate.dst); 698 + migrate_vma_pages(&migrate); 699 + migrate_vma_finalize(&migrate); 700 + if (dev) 701 + drm_pagemap_migrate_unmap_pages(dev, dma_addr, npages, 702 + DMA_FROM_DEVICE); 703 + err_free: 704 + kvfree(buf); 705 + err_out: 706 + 707 + return err; 708 + } 709 + 710 + /** 711 + * drm_pagemap_page_free() - Put GPU SVM zone device data associated with a page 712 + * @page: Pointer to the page 713 + * 714 + * This function is a callback used to put the GPU SVM zone device data 715 + * associated with a page when it is being released. 716 + */ 717 + static void drm_pagemap_page_free(struct page *page) 718 + { 719 + drm_pagemap_zdd_put(page->zone_device_data); 720 + } 721 + 722 + /** 723 + * drm_pagemap_migrate_to_ram() - Migrate a virtual range to RAM (page fault handler) 724 + * @vmf: Pointer to the fault information structure 725 + * 726 + * This function is a page fault handler used to migrate a virtual range 727 + * to ram. The device memory allocation in which the device page is found is 728 + * migrated in its entirety. 729 + * 730 + * Returns: 731 + * VM_FAULT_SIGBUS on failure, 0 on success. 732 + */ 733 + static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf) 734 + { 735 + struct drm_pagemap_zdd *zdd = vmf->page->zone_device_data; 736 + int err; 737 + 738 + err = __drm_pagemap_migrate_to_ram(vmf->vma, 739 + zdd->device_private_page_owner, 740 + vmf->page, vmf->address, 741 + zdd->devmem_allocation->size); 742 + 743 + return err ? VM_FAULT_SIGBUS : 0; 744 + } 745 + 746 + static const struct dev_pagemap_ops drm_pagemap_pagemap_ops = { 747 + .page_free = drm_pagemap_page_free, 748 + .migrate_to_ram = drm_pagemap_migrate_to_ram, 749 + }; 750 + 751 + /** 752 + * drm_pagemap_pagemap_ops_get() - Retrieve GPU SVM device page map operations 753 + * 754 + * Returns: 755 + * Pointer to the GPU SVM device page map operations structure. 756 + */ 757 + const struct dev_pagemap_ops *drm_pagemap_pagemap_ops_get(void) 758 + { 759 + return &drm_pagemap_pagemap_ops; 760 + } 761 + EXPORT_SYMBOL_GPL(drm_pagemap_pagemap_ops_get); 762 + 763 + /** 764 + * drm_pagemap_devmem_init() - Initialize a drm_pagemap device memory allocation 765 + * 766 + * @devmem_allocation: The struct drm_pagemap_devmem to initialize. 767 + * @dev: Pointer to the device structure which device memory allocation belongs to 768 + * @mm: Pointer to the mm_struct for the address space 769 + * @ops: Pointer to the operations structure for GPU SVM device memory 770 + * @dpagemap: The struct drm_pagemap we're allocating from. 771 + * @size: Size of device memory allocation 772 + */ 773 + void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, 774 + struct device *dev, struct mm_struct *mm, 775 + const struct drm_pagemap_devmem_ops *ops, 776 + struct drm_pagemap *dpagemap, size_t size) 777 + { 778 + init_completion(&devmem_allocation->detached); 779 + devmem_allocation->dev = dev; 780 + devmem_allocation->mm = mm; 781 + devmem_allocation->ops = ops; 782 + devmem_allocation->dpagemap = dpagemap; 783 + devmem_allocation->size = size; 784 + } 785 + EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init); 786 + 787 + /** 788 + * drm_pagemap_page_to_dpagemap() - Return a pointer the drm_pagemap of a page 789 + * @page: The struct page. 790 + * 791 + * Return: A pointer to the struct drm_pagemap of a device private page that 792 + * was populated from the struct drm_pagemap. If the page was *not* populated 793 + * from a struct drm_pagemap, the result is undefined and the function call 794 + * may result in dereferencing and invalid address. 795 + */ 796 + struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page) 797 + { 798 + struct drm_pagemap_zdd *zdd = page->zone_device_data; 799 + 800 + return zdd->devmem_allocation->dpagemap; 801 + } 802 + EXPORT_SYMBOL_GPL(drm_pagemap_page_to_dpagemap); 803 + 804 + /** 805 + * drm_pagemap_populate_mm() - Populate a virtual range with device memory pages 806 + * @dpagemap: Pointer to the drm_pagemap managing the device memory 807 + * @start: Start of the virtual range to populate. 808 + * @end: End of the virtual range to populate. 809 + * @mm: Pointer to the virtual address space. 810 + * @timeslice_ms: The time requested for the migrated pagemap pages to 811 + * be present in @mm before being allowed to be migrated back. 812 + * 813 + * Attempt to populate a virtual range with device memory pages, 814 + * clearing them or migrating data from the existing pages if necessary. 815 + * The function is best effort only, and implementations may vary 816 + * in how hard they try to satisfy the request. 817 + * 818 + * Return: %0 on success, negative error code on error. If the hardware 819 + * device was removed / unbound the function will return %-ENODEV. 820 + */ 821 + int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, 822 + unsigned long start, unsigned long end, 823 + struct mm_struct *mm, 824 + unsigned long timeslice_ms) 825 + { 826 + int err; 827 + 828 + if (!mmget_not_zero(mm)) 829 + return -EFAULT; 830 + mmap_read_lock(mm); 831 + err = dpagemap->ops->populate_mm(dpagemap, start, end, mm, 832 + timeslice_ms); 833 + mmap_read_unlock(mm); 834 + mmput(mm); 835 + 836 + return err; 837 + } 838 + EXPORT_SYMBOL(drm_pagemap_populate_mm);

+10 -6

drivers/gpu/drm/xe/Kconfig

··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 config DRM_XE 3 3 tristate "Intel Xe2 Graphics" 4 - depends on DRM && PCI && (m || (y && KUNIT=y)) 4 + depends on DRM && PCI 5 + depends on KUNIT || !KUNIT 5 6 depends on INTEL_VSEC || !INTEL_VSEC 6 7 depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) 7 8 select INTERVAL_TREE ··· 45 44 select WANT_DEV_COREDUMP 46 45 select AUXILIARY_BUS 47 46 select HMM_MIRROR 47 + select REGMAP if I2C 48 48 help 49 49 Driver for Intel Xe2 series GPUs and later. Experimental support 50 50 for Xe series is also available. ··· 87 85 Enable this option if you want support for CPU to GPU address 88 86 mirroring. 89 87 90 - If in doubut say "Y". 88 + If in doubt say "Y". 91 89 92 - config DRM_XE_DEVMEM_MIRROR 93 - bool "Enable device memory mirror" 90 + config DRM_XE_PAGEMAP 91 + bool "Enable device memory pool for SVM" 94 92 depends on DRM_XE_GPUSVM 95 93 select GET_FREE_REGION 96 94 default y 97 95 help 98 - Disable this option only if you want to compile out without device 99 - memory mirror. Will reduce KMD memory footprint when disabled. 96 + Disable this option only if you don't want to expose local device 97 + memory for SVM. Will reduce KMD memory footprint when disabled. 98 + 99 + If in doubt say "Y". 100 100 101 101 config DRM_XE_FORCE_PROBE 102 102 string "Force probe xe for selected Intel hardware IDs"

+2

drivers/gpu/drm/xe/Makefile

··· 80 80 xe_mmio.o \ 81 81 xe_mocs.o \ 82 82 xe_module.o \ 83 + xe_nvm.o \ 83 84 xe_oa.o \ 84 85 xe_observation.o \ 85 86 xe_pat.o \ ··· 125 124 xe_wait_user_fence.o \ 126 125 xe_wopcm.o 127 126 127 + xe-$(CONFIG_I2C) += xe_i2c.o 128 128 xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o 129 129 xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o 130 130

+4

drivers/gpu/drm/xe/abi/guc_actions_abi.h

··· 142 142 XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, 143 143 XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C, 144 144 XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D, 145 + XE_GUC_ACTION_OPT_IN_FEATURE_KLV = 0x550E, 145 146 XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, 146 147 XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, 147 148 XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, ··· 271 270 #define XE_G2G_DEREGISTER_DEVICE REG_GENMASK(16, 16) 272 271 #define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12) 273 272 #define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8) 273 + 274 + /* invalid type for XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR */ 275 + #define XE_GUC_CAT_ERR_TYPE_INVALID 0xdeadbeef 274 276 275 277 #endif

+28

drivers/gpu/drm/xe/abi/guc_klvs_abi.h

··· 16 16 * +===+=======+==============================================================+ 17 17 * | 0 | 31:16 | **KEY** - KLV key identifier | 18 18 * | | | - `GuC Self Config KLVs`_ | 19 + * | | | - `GuC Opt In Feature KLVs`_ | 19 20 * | | | - `GuC VGT Policy KLVs`_ | 20 21 * | | | - `GuC VF Configuration KLVs`_ | 21 22 * | | | | ··· 124 123 125 124 GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, 126 125 }; 126 + 127 + /** 128 + * DOC: GuC Opt In Feature KLVs 129 + * 130 + * `GuC KLV`_ keys available for use with OPT_IN_FEATURE_KLV 131 + * 132 + * _`GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE` : 0x4001 133 + * Adds an extra dword to the XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR G2H 134 + * containing the type of the CAT error. On HW that does not support 135 + * reporting the CAT error type, the extra dword is set to 0xdeadbeef. 136 + * 137 + * _`GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH` : 0x4003 138 + * This KLV enables the Dynamic Inhibit Context Switch optimization, which 139 + * consists in the GuC setting the CTX_CTRL_INHIBIT_SYN_CTX_SWITCH bit to 140 + * zero in the CTX_CONTEXT_CONTROL register of LRCs that are submitted 141 + * to an oversubscribed engine. This will cause those contexts to be 142 + * switched out immediately if they hit an unsatisfied semaphore wait 143 + * (instead of waiting the full timeslice duration). The bit is instead set 144 + * to one if a single context is queued on the engine, to avoid it being 145 + * switched out if there isn't another context that can run in its place. 146 + */ 147 + 148 + #define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_KEY 0x4001 149 + #define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_LEN 0u 150 + 151 + #define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003 152 + #define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u 127 153 128 154 /** 129 155 * DOC: GuC VGT Policy KLVs

+4 -7

drivers/gpu/drm/xe/display/xe_dsb_buffer.c

··· 17 17 18 18 void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) 19 19 { 20 - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; 21 - 22 20 iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); 23 - xe_device_l2_flush(xe); 24 21 } 25 22 26 23 u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) ··· 27 30 28 31 void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) 29 32 { 30 - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; 31 - 32 33 WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); 33 34 34 35 iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); 35 - xe_device_l2_flush(xe); 36 36 } 37 37 38 38 bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) ··· 68 74 69 75 void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) 70 76 { 77 + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; 78 + 71 79 /* 72 80 * The memory barrier here is to ensure coherency of DSB vs MMIO, 73 81 * both for weak ordering archs and discrete cards. 74 82 */ 75 - xe_device_wmb(dsb_buf->vma->bo->tile->xe); 83 + xe_device_wmb(xe); 84 + xe_device_l2_flush(xe); 76 85 }

+4 -3

drivers/gpu/drm/xe/display/xe_fb_pin.c

··· 163 163 164 164 vma->dpt = dpt; 165 165 vma->node = dpt->ggtt_node[tile0->id]; 166 + 167 + /* Ensure DPT writes are flushed */ 168 + xe_device_l2_flush(xe); 166 169 return 0; 167 170 } 168 171 ··· 227 224 goto out_unlock; 228 225 } 229 226 230 - ret = xe_ggtt_node_insert_locked(vma->node, bo->size, align, 0); 227 + ret = xe_ggtt_node_insert_locked(vma->node, xe_bo_size(bo), align, 0); 231 228 if (ret) { 232 229 xe_ggtt_node_fini(vma->node); 233 230 goto out_unlock; ··· 329 326 if (ret) 330 327 goto err_unpin; 331 328 332 - /* Ensure DPT writes are flushed */ 333 - xe_device_l2_flush(xe); 334 329 return vma; 335 330 336 331 err_unpin:

+1 -1

drivers/gpu/drm/xe/display/xe_hdcp_gsc.c

··· 85 85 86 86 cmd_in = xe_bo_ggtt_addr(bo); 87 87 cmd_out = cmd_in + PAGE_SIZE; 88 - xe_map_memset(xe, &bo->vmap, 0, 0, bo->size); 88 + xe_map_memset(xe, &bo->vmap, 0, 0, xe_bo_size(bo)); 89 89 90 90 gsc_context->hdcp_bo = bo; 91 91 gsc_context->hdcp_cmd_in = cmd_in;

+4

drivers/gpu/drm/xe/regs/xe_gsc_regs.h

··· 16 16 #define MTL_GSC_HECI1_BASE 0x00116000 17 17 #define MTL_GSC_HECI2_BASE 0x00117000 18 18 19 + #define DG1_GSC_HECI2_BASE 0x00259000 20 + #define PVC_GSC_HECI2_BASE 0x00285000 21 + #define DG2_GSC_HECI2_BASE 0x00374000 22 + 19 23 #define HECI_H_CSR(base) XE_REG((base) + 0x4) 20 24 #define HECI_H_CSR_IE REG_BIT(0) 21 25 #define HECI_H_CSR_IS REG_BIT(1)

+20

drivers/gpu/drm/xe/regs/xe_i2c_regs.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + #ifndef _XE_I2C_REGS_H_ 3 + #define _XE_I2C_REGS_H_ 4 + 5 + #include <linux/pci_regs.h> 6 + 7 + #include "xe_reg_defs.h" 8 + #include "xe_regs.h" 9 + 10 + #define I2C_BRIDGE_OFFSET (SOC_BASE + 0xd9000) 11 + #define I2C_CONFIG_SPACE_OFFSET (SOC_BASE + 0xf6000) 12 + #define I2C_MEM_SPACE_OFFSET (SOC_BASE + 0xf7400) 13 + 14 + #define REG_SG_REMAP_ADDR_PREFIX XE_REG(SOC_BASE + 0x0164) 15 + #define REG_SG_REMAP_ADDR_POSTFIX XE_REG(SOC_BASE + 0x0168) 16 + 17 + #define I2C_CONFIG_CMD XE_REG(I2C_CONFIG_SPACE_OFFSET + PCI_COMMAND) 18 + #define I2C_CONFIG_PMCSR XE_REG(I2C_CONFIG_SPACE_OFFSET + 0x84) 19 + 20 + #endif /* _XE_I2C_REGS_H_ */

+1

drivers/gpu/drm/xe/regs/xe_irq_regs.h

··· 19 19 #define MASTER_IRQ REG_BIT(31) 20 20 #define GU_MISC_IRQ REG_BIT(29) 21 21 #define DISPLAY_IRQ REG_BIT(16) 22 + #define I2C_IRQ REG_BIT(12) 22 23 #define GT_DW_IRQ(x) REG_BIT(x) 23 24 24 25 /*

+1 -1

drivers/gpu/drm/xe/regs/xe_pmt.h

··· 5 5 #ifndef _XE_PMT_H_ 6 6 #define _XE_PMT_H_ 7 7 8 - #define SOC_BASE 0x280000 8 + #include "xe_regs.h" 9 9 10 10 #define BMG_PMT_BASE_OFFSET 0xDB000 11 11 #define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET)

+2

drivers/gpu/drm/xe/regs/xe_regs.h

··· 7 7 8 8 #include "regs/xe_reg_defs.h" 9 9 10 + #define SOC_BASE 0x280000 11 + 10 12 #define GU_CNTL_PROTECTED XE_REG(0x10100C) 11 13 #define DRIVERINT_FLR_DIS REG_BIT(31) 12 14

+1 -1

drivers/gpu/drm/xe/tests/xe_bo.c

··· 106 106 } 107 107 108 108 /* Check last CCS value, or at least last value in page. */ 109 - offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); 109 + offset = xe_device_ccs_bytes(tile_to_xe(tile), xe_bo_size(bo)); 110 110 offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; 111 111 if (cpu_map[offset] != get_val) { 112 112 KUNIT_FAIL(test,

+2 -2

drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c

··· 32 32 33 33 bo->tile = tile; 34 34 bo->ttm.bdev = &xe->ttm; 35 - bo->size = size; 35 + bo->ttm.base.size = size; 36 36 iosys_map_set_vaddr(&bo->vmap, buf); 37 37 38 38 if (flags & XE_BO_FLAG_GGTT) { ··· 43 43 44 44 KUNIT_ASSERT_EQ(test, 0, 45 45 xe_ggtt_node_insert(bo->ggtt_node[tile->id], 46 - bo->size, SZ_4K)); 46 + xe_bo_size(bo), SZ_4K)); 47 47 } 48 48 49 49 return bo;

+26 -26

drivers/gpu/drm/xe/tests/xe_migrate.c

··· 74 74 { 75 75 struct xe_device *xe = tile_to_xe(m->tile); 76 76 u64 retval, expected = 0; 77 - bool big = bo->size >= SZ_2M; 77 + bool big = xe_bo_size(bo) >= SZ_2M; 78 78 struct dma_fence *fence; 79 79 const char *str = big ? "Copying big bo" : "Copying small bo"; 80 80 int err; 81 81 82 82 struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL, 83 - bo->size, 83 + xe_bo_size(bo), 84 84 ttm_bo_type_kernel, 85 85 region | 86 86 XE_BO_FLAG_NEEDS_CPU_ACCESS | ··· 105 105 goto out_unlock; 106 106 } 107 107 108 - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); 108 + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); 109 109 fence = xe_migrate_clear(m, remote, remote->ttm.resource, 110 110 XE_MIGRATE_CLEAR_FLAG_FULL); 111 111 if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : ··· 113 113 retval = xe_map_rd(xe, &remote->vmap, 0, u64); 114 114 check(retval, expected, "remote first offset should be cleared", 115 115 test); 116 - retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64); 116 + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(remote) - 8, u64); 117 117 check(retval, expected, "remote last offset should be cleared", 118 118 test); 119 119 } 120 120 dma_fence_put(fence); 121 121 122 122 /* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */ 123 - xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size); 124 - xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); 123 + xe_map_memset(xe, &remote->vmap, 0, 0xc0, xe_bo_size(remote)); 124 + xe_map_memset(xe, &bo->vmap, 0, 0xd0, xe_bo_size(bo)); 125 125 126 126 expected = 0xc0c0c0c0c0c0c0c0; 127 127 fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, ··· 131 131 retval = xe_map_rd(xe, &bo->vmap, 0, u64); 132 132 check(retval, expected, 133 133 "remote -> vram bo first offset should be copied", test); 134 - retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); 134 + retval = xe_map_rd(xe, &bo->vmap, xe_bo_size(bo) - 8, u64); 135 135 check(retval, expected, 136 136 "remote -> vram bo offset should be copied", test); 137 137 } 138 138 dma_fence_put(fence); 139 139 140 140 /* And other way around.. slightly hacky.. */ 141 - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); 142 - xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); 141 + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); 142 + xe_map_memset(xe, &bo->vmap, 0, 0xc0, xe_bo_size(bo)); 143 143 144 144 fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, 145 145 remote->ttm.resource, false); ··· 148 148 retval = xe_map_rd(xe, &remote->vmap, 0, u64); 149 149 check(retval, expected, 150 150 "vram -> remote bo first offset should be copied", test); 151 - retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64); 151 + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(bo) - 8, u64); 152 152 check(retval, expected, 153 153 "vram -> remote bo last offset should be copied", test); 154 154 } ··· 245 245 if (m->q->vm->flags & XE_VM_FLAG_64K) 246 246 expected |= XE_PTE_PS64; 247 247 if (xe_bo_is_vram(pt)) 248 - xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); 248 + xe_res_first(pt->ttm.resource, 0, xe_bo_size(pt), &src_it); 249 249 else 250 - xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); 250 + xe_res_first_sg(xe_bo_sg(pt), 0, xe_bo_size(pt), &src_it); 251 251 252 252 emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, 253 253 &src_it, XE_PAGE_SIZE, pt->ttm.resource); ··· 276 276 277 277 /* Clear a small bo */ 278 278 kunit_info(test, "Clearing small buffer object\n"); 279 - xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); 279 + xe_map_memset(xe, &tiny->vmap, 0, 0x22, xe_bo_size(tiny)); 280 280 expected = 0; 281 281 fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, 282 282 XE_MIGRATE_CLEAR_FLAG_FULL); ··· 286 286 dma_fence_put(fence); 287 287 retval = xe_map_rd(xe, &tiny->vmap, 0, u32); 288 288 check(retval, expected, "Command clear small first value", test); 289 - retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); 289 + retval = xe_map_rd(xe, &tiny->vmap, xe_bo_size(tiny) - 4, u32); 290 290 check(retval, expected, "Command clear small last value", test); 291 291 292 292 kunit_info(test, "Copying small buffer object to system\n"); ··· 298 298 299 299 /* Clear a big bo */ 300 300 kunit_info(test, "Clearing big buffer object\n"); 301 - xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); 301 + xe_map_memset(xe, &big->vmap, 0, 0x11, xe_bo_size(big)); 302 302 expected = 0; 303 303 fence = xe_migrate_clear(m, big, big->ttm.resource, 304 304 XE_MIGRATE_CLEAR_FLAG_FULL); ··· 308 308 dma_fence_put(fence); 309 309 retval = xe_map_rd(xe, &big->vmap, 0, u32); 310 310 check(retval, expected, "Command clear big first value", test); 311 - retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); 311 + retval = xe_map_rd(xe, &big->vmap, xe_bo_size(big) - 4, u32); 312 312 check(retval, expected, "Command clear big last value", test); 313 313 314 314 kunit_info(test, "Copying big buffer object to system\n"); ··· 370 370 struct xe_migrate *m = tile->migrate; 371 371 struct xe_device *xe = gt_to_xe(gt); 372 372 struct dma_fence *fence = NULL; 373 - u64 size = src_bo->size; 373 + u64 size = xe_bo_size(src_bo); 374 374 struct xe_res_cursor src_it, dst_it; 375 375 struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource; 376 376 u64 src_L0_ofs, dst_L0_ofs; ··· 498 498 long ret; 499 499 500 500 expected = 0xd0d0d0d0d0d0d0d0; 501 - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); 501 + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); 502 502 503 503 fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); 504 504 if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { ··· 523 523 524 524 retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); 525 525 check(retval, expected, "Clear evicted vram data first value", test); 526 - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); 526 + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); 527 527 check(retval, expected, "Clear evicted vram data last value", test); 528 528 529 529 fence = blt_copy(tile, vram_bo, ccs_bo, ··· 532 532 retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); 533 533 check(retval, 0, "Clear ccs data first value", test); 534 534 535 - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); 535 + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); 536 536 check(retval, 0, "Clear ccs data last value", test); 537 537 } 538 538 dma_fence_put(fence); ··· 562 562 563 563 retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); 564 564 check(retval, expected, "Restored value must be equal to initial value", test); 565 - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); 565 + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); 566 566 check(retval, expected, "Restored value must be equal to initial value", test); 567 567 568 568 fence = blt_copy(tile, vram_bo, ccs_bo, ··· 570 570 if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { 571 571 retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); 572 572 check(retval, 0, "Clear ccs data first value", test); 573 - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); 573 + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); 574 574 check(retval, 0, "Clear ccs data last value", test); 575 575 } 576 576 dma_fence_put(fence); ··· 583 583 u64 expected, retval; 584 584 585 585 expected = 0xd0d0d0d0d0d0d0d0; 586 - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); 586 + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); 587 587 588 588 fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); 589 589 if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { ··· 597 597 if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) { 598 598 retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); 599 599 check(retval, expected, "Decompressed value must be equal to initial value", test); 600 - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); 600 + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); 601 601 check(retval, expected, "Decompressed value must be equal to initial value", test); 602 602 } 603 603 dma_fence_put(fence); ··· 615 615 if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) { 616 616 retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); 617 617 check(retval, expected, "Clear main buffer first value", test); 618 - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); 618 + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); 619 619 check(retval, expected, "Clear main buffer last value", test); 620 620 } 621 621 dma_fence_put(fence); ··· 625 625 if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { 626 626 retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); 627 627 check(retval, expected, "Clear ccs data first value", test); 628 - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); 628 + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); 629 629 check(retval, expected, "Clear ccs data last value", test); 630 630 } 631 631 dma_fence_put(fence);

+31

drivers/gpu/drm/xe/tests/xe_pci.c

··· 21 21 KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc); 22 22 KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); 23 23 24 + static void xe_pci_id_kunit_desc(const struct pci_device_id *param, char *desc) 25 + { 26 + const struct xe_device_desc *dev_desc = 27 + (const struct xe_device_desc *)param->driver_data; 28 + 29 + if (dev_desc) 30 + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "0x%X (%s)", 31 + param->device, dev_desc->platform_name); 32 + } 33 + 34 + KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); 35 + 24 36 /** 25 37 * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters 26 38 * @prev: the pointer to the previous parameter to iterate from or NULL ··· 66 54 return media_ip_gen_params(prev, desc); 67 55 } 68 56 EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); 57 + 58 + /** 59 + * xe_pci_id_gen_param - Generate struct pci_device_id parameters 60 + * @prev: the pointer to the previous parameter to iterate from or NULL 61 + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE 62 + * 63 + * This function prepares struct pci_device_id parameter. 64 + * 65 + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. 66 + * 67 + * Return: pointer to the next parameter or NULL if no more parameters 68 + */ 69 + const void *xe_pci_id_gen_param(const void *prev, char *desc) 70 + { 71 + const struct pci_device_id *pci = pci_id_gen_params(prev, desc); 72 + 73 + return pci->driver_data ? pci : NULL; 74 + } 75 + EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param); 69 76 70 77 static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, 71 78 u32 *ver, u32 *revid)

+12

drivers/gpu/drm/xe/tests/xe_pci_test.c

··· 44 44 KUNIT_ASSERT_EQ(test, mask, 0); 45 45 } 46 46 47 + static void check_platform_gt_count(struct kunit *test) 48 + { 49 + const struct pci_device_id *pci = test->param_value; 50 + const struct xe_device_desc *desc = 51 + (const struct xe_device_desc *)pci->driver_data; 52 + int max_gt = desc->max_gt_per_tile; 53 + 54 + KUNIT_ASSERT_GT(test, max_gt, 0); 55 + KUNIT_ASSERT_LE(test, max_gt, XE_MAX_GT_PER_TILE); 56 + } 57 + 47 58 static struct kunit_case xe_pci_tests[] = { 48 59 KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param), 49 60 KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param), 61 + KUNIT_CASE_PARAM(check_platform_gt_count, xe_pci_id_gen_param), 50 62 {} 51 63 }; 52 64

+1

drivers/gpu/drm/xe/tests/xe_pci_test.h

··· 27 27 28 28 const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc); 29 29 const void *xe_pci_media_ip_gen_param(const void *prev, char *desc); 30 + const void *xe_pci_id_gen_param(const void *prev, char *desc); 30 31 const void *xe_pci_live_device_gen_param(const void *prev, char *desc); 31 32 32 33 #endif

+27 -10

drivers/gpu/drm/xe/xe_bo.c

··· 19 19 20 20 #include <kunit/static_stub.h> 21 21 22 + #include <trace/events/gpu_mem.h> 23 + 22 24 #include "xe_device.h" 23 25 #include "xe_dma_buf.h" 24 26 #include "xe_drm_client.h" ··· 420 418 xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0); 421 419 } 422 420 421 + static void update_global_total_pages(struct ttm_device *ttm_dev, 422 + long num_pages) 423 + { 424 + #if IS_ENABLED(CONFIG_TRACE_GPU_MEM) 425 + struct xe_device *xe = ttm_to_xe_device(ttm_dev); 426 + u64 global_total_pages = 427 + atomic64_add_return(num_pages, &xe->global_total_pages); 428 + 429 + trace_gpu_mem_total(xe->drm.primary->index, 0, 430 + global_total_pages << PAGE_SHIFT); 431 + #endif 432 + } 433 + 423 434 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, 424 435 u32 page_flags) 425 436 { ··· 452 437 453 438 extra_pages = 0; 454 439 if (xe_bo_needs_ccs_pages(bo)) 455 - extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), 440 + extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)), 456 441 PAGE_SIZE); 457 442 458 443 /* ··· 540 525 541 526 xe_tt->purgeable = false; 542 527 xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt); 528 + update_global_total_pages(ttm_dev, tt->num_pages); 543 529 544 530 return 0; 545 531 } ··· 557 541 558 542 ttm_pool_free(&ttm_dev->pool, tt); 559 543 xe_ttm_tt_account_subtract(xe, tt); 544 + update_global_total_pages(ttm_dev, -(long)tt->num_pages); 560 545 } 561 546 562 547 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) ··· 1139 1122 if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) 1140 1123 goto out_unlock_bo; 1141 1124 1142 - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, 1125 + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), 1143 1126 DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, 1144 1127 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | 1145 1128 XE_BO_FLAG_PINNED); ··· 1217 1200 goto out_unlock_bo; 1218 1201 1219 1202 if (!backup) { 1220 - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, 1203 + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, 1204 + NULL, xe_bo_size(bo), 1221 1205 DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, 1222 1206 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | 1223 1207 XE_BO_FLAG_PINNED); ··· 1272 1254 } 1273 1255 1274 1256 xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, 1275 - bo->size); 1257 + xe_bo_size(bo)); 1276 1258 } 1277 1259 1278 1260 if (!bo->backup_obj) ··· 1365 1347 } 1366 1348 1367 1349 xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr, 1368 - bo->size); 1350 + xe_bo_size(bo)); 1369 1351 } 1370 1352 1371 1353 bo->backup_obj = NULL; ··· 1576 1558 1577 1559 vram = res_to_mem_region(ttm_bo->resource); 1578 1560 xe_res_first(ttm_bo->resource, offset & PAGE_MASK, 1579 - bo->size - (offset & PAGE_MASK), &cursor); 1561 + xe_bo_size(bo) - (offset & PAGE_MASK), &cursor); 1580 1562 1581 1563 do { 1582 1564 unsigned long page_offset = (offset & ~PAGE_MASK); ··· 1876 1858 1877 1859 bo->ccs_cleared = false; 1878 1860 bo->tile = tile; 1879 - bo->size = size; 1880 1861 bo->flags = flags; 1881 1862 bo->cpu_caching = cpu_caching; 1882 1863 bo->ttm.base.funcs = &xe_gem_object_funcs; ··· 2053 2036 2054 2037 if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { 2055 2038 err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, 2056 - start + bo->size, U64_MAX); 2039 + start + xe_bo_size(bo), U64_MAX); 2057 2040 } else { 2058 2041 err = xe_ggtt_insert_bo(t->mem.ggtt, bo); 2059 2042 } ··· 2251 2234 xe_assert(xe, !(*src)->vmap.is_iomem); 2252 2235 2253 2236 bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, 2254 - (*src)->size, dst_flags); 2237 + xe_bo_size(*src), dst_flags); 2255 2238 if (IS_ERR(bo)) 2256 2239 return PTR_ERR(bo); 2257 2240 ··· 2541 2524 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap 2542 2525 * to use struct iosys_map. 2543 2526 */ 2544 - ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); 2527 + ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap); 2545 2528 if (ret) 2546 2529 return ret; 2547 2530

+15 -2

drivers/gpu/drm/xe/xe_bo.h

··· 238 238 return xe_bo_addr(bo, 0, page_size); 239 239 } 240 240 241 + /** 242 + * xe_bo_size() - Xe BO size 243 + * @bo: The bo object. 244 + * 245 + * Simple helper to return Xe BO's size. 246 + * 247 + * Return: Xe BO's size 248 + */ 249 + static inline size_t xe_bo_size(struct xe_bo *bo) 250 + { 251 + return bo->ttm.base.size; 252 + } 253 + 241 254 static inline u32 242 255 __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) 243 256 { ··· 259 246 if (XE_WARN_ON(!ggtt_node)) 260 247 return 0; 261 248 262 - XE_WARN_ON(ggtt_node->base.size > bo->size); 249 + XE_WARN_ON(ggtt_node->base.size > xe_bo_size(bo)); 263 250 XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); 264 251 return ggtt_node->base.start; 265 252 } ··· 313 300 314 301 static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) 315 302 { 316 - return PAGE_ALIGN(bo->ttm.base.size); 303 + return PAGE_ALIGN(xe_bo_size(bo)); 317 304 } 318 305 319 306 static inline bool xe_bo_has_pages(struct xe_bo *bo)

+1 -3

drivers/gpu/drm/xe/xe_bo_types.h

··· 32 32 struct xe_bo *backup_obj; 33 33 /** @parent_obj: Ref to parent bo if this a backup_obj */ 34 34 struct xe_bo *parent_obj; 35 - /** @size: Size of this buffer object */ 36 - size_t size; 37 35 /** @flags: flags for this buffer object */ 38 36 u32 flags; 39 37 /** @vm: VM this BO is attached to, for extobj this will be NULL */ ··· 84 86 u16 cpu_caching; 85 87 86 88 /** @devmem_allocation: SVM device memory allocation */ 87 - struct drm_gpusvm_devmem devmem_allocation; 89 + struct drm_pagemap_devmem devmem_allocation; 88 90 89 91 /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ 90 92 struct list_head vram_userfault_link;

+28 -22

drivers/gpu/drm/xe/xe_devcoredump.c

··· 171 171 172 172 #define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) 173 173 174 + /** 175 + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot 176 + * @buffer: Destination buffer to copy the coredump data into 177 + * @offset: Offset in the coredump data to start reading from 178 + * @count: Number of bytes to read 179 + * @data: Pointer to the xe_devcoredump structure 180 + * @datalen: Length of the data (unused) 181 + * 182 + * Reads a chunk of the coredump snapshot data into the provided buffer. 183 + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), 184 + * it is read directly from a pre-written buffer. For larger devcoredumps, 185 + * the pre-written buffer must be periodically repopulated from the snapshot 186 + * state due to kmalloc size limitations. 187 + * 188 + * Return: Number of bytes copied on success, or a negative error code on failure. 189 + */ 174 190 static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, 175 191 size_t count, void *data, size_t datalen) 176 192 { 177 193 struct xe_devcoredump *coredump = data; 178 194 struct xe_devcoredump_snapshot *ss; 179 - ssize_t byte_copied; 195 + ssize_t byte_copied = 0; 180 196 u32 chunk_offset; 181 197 ssize_t new_chunk_position; 198 + bool pm_needed = false; 199 + int ret = 0; 182 200 183 201 if (!coredump) 184 202 return -ENODEV; ··· 206 188 /* Ensure delayed work is captured before continuing */ 207 189 flush_work(&ss->work); 208 190 209 - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) 191 + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; 192 + if (pm_needed) 210 193 xe_pm_runtime_get(gt_to_xe(ss->gt)); 211 194 212 195 mutex_lock(&coredump->lock); 213 196 214 197 if (!ss->read.buffer) { 215 - mutex_unlock(&coredump->lock); 216 - return -ENODEV; 198 + ret = -ENODEV; 199 + goto unlock; 217 200 } 218 201 219 - if (offset >= ss->read.size) { 220 - mutex_unlock(&coredump->lock); 221 - return 0; 222 - } 202 + if (offset >= ss->read.size) 203 + goto unlock; 223 204 224 205 new_chunk_position = div_u64_rem(offset, 225 206 XE_DEVCOREDUMP_CHUNK_MAX, ··· 238 221 ss->read.size - offset; 239 222 memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); 240 223 224 + unlock: 241 225 mutex_unlock(&coredump->lock); 242 226 243 - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) 227 + if (pm_needed) 244 228 xe_pm_runtime_put(gt_to_xe(ss->gt)); 245 229 246 - return byte_copied; 230 + return byte_copied ? byte_copied : ret; 247 231 } 248 232 249 233 static void xe_devcoredump_free(void *data) ··· 331 313 { 332 314 struct xe_devcoredump_snapshot *ss = &coredump->snapshot; 333 315 struct xe_guc *guc = exec_queue_to_guc(q); 334 - u32 adj_logical_mask = q->logical_mask; 335 - u32 width_mask = (0x1 << q->width) - 1; 336 316 const char *process_name = "no process"; 337 - 338 317 unsigned int fw_ref; 339 318 bool cookie; 340 - int i; 341 319 342 320 ss->snapshot_time = ktime_get_real(); 343 321 ss->boot_time = ktime_get_boottime(); ··· 349 335 INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); 350 336 351 337 cookie = dma_fence_begin_signalling(); 352 - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { 353 - if (adj_logical_mask & BIT(i)) { 354 - adj_logical_mask |= width_mask << i; 355 - i += q->width; 356 - } else { 357 - ++i; 358 - } 359 - } 360 338 361 339 /* keep going if fw fails as we still want to save the memory and SW data */ 362 340 fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);

+62 -59

drivers/gpu/drm/xe/xe_device.c

··· 40 40 #include "xe_gt_printk.h" 41 41 #include "xe_gt_sriov_vf.h" 42 42 #include "xe_guc.h" 43 + #include "xe_guc_pc.h" 43 44 #include "xe_hw_engine_group.h" 44 45 #include "xe_hwmon.h" 46 + #include "xe_i2c.h" 45 47 #include "xe_irq.h" 46 - #include "xe_memirq.h" 47 48 #include "xe_mmio.h" 48 49 #include "xe_module.h" 50 + #include "xe_nvm.h" 49 51 #include "xe_oa.h" 50 52 #include "xe_observation.h" 51 53 #include "xe_pat.h" ··· 785 783 if (err) 786 784 return err; 787 785 788 - err = xe_ttm_sys_mgr_init(xe); 789 - if (err) 790 - return err; 791 - 792 786 for_each_gt(gt, xe, id) { 793 787 err = xe_gt_init_early(gt); 794 788 if (err) 795 789 return err; 796 - 797 - /* 798 - * Only after this point can GT-specific MMIO operations 799 - * (including things like communication with the GuC) 800 - * be performed. 801 - */ 802 - xe_gt_mmio_init(gt); 803 - 804 - if (IS_SRIOV_VF(xe)) { 805 - xe_guc_comm_init_early(&gt->uc.guc); 806 - err = xe_gt_sriov_vf_bootstrap(gt); 807 - if (err) 808 - return err; 809 - err = xe_gt_sriov_vf_query_config(gt); 810 - if (err) 811 - return err; 812 - } 813 790 } 814 791 815 792 for_each_tile(tile, xe, id) { 816 793 err = xe_ggtt_init_early(tile->mem.ggtt); 817 - if (err) 818 - return err; 819 - err = xe_memirq_init(&tile->memirq); 820 - if (err) 821 - return err; 822 - } 823 - 824 - for_each_gt(gt, xe, id) { 825 - err = xe_gt_init_hwconfig(gt); 826 794 if (err) 827 795 return err; 828 796 } ··· 821 849 if (err) 822 850 return err; 823 851 } 852 + 853 + /* 854 + * Allow allocations only now to ensure xe_display_init_early() 855 + * is the first to allocate, always. 856 + */ 857 + err = xe_ttm_sys_mgr_init(xe); 858 + if (err) 859 + return err; 824 860 825 861 /* Allocate and map stolen after potential VRAM resize */ 826 862 err = xe_ttm_stolen_mgr_init(xe); ··· 860 880 if (err) 861 881 return err; 862 882 } 883 + 884 + xe_nvm_init(xe); 863 885 864 886 err = xe_heci_gsc_init(xe); 865 887 if (err) ··· 903 921 if (err) 904 922 goto err_unregister_display; 905 923 924 + err = xe_i2c_probe(xe); 925 + if (err) 926 + goto err_unregister_display; 927 + 906 928 for_each_gt(gt, xe, id) 907 929 xe_gt_sanitize_freq(gt); 908 930 ··· 923 937 void xe_device_remove(struct xe_device *xe) 924 938 { 925 939 xe_display_unregister(xe); 940 + 941 + xe_nvm_fini(xe); 926 942 927 943 drm_dev_unplug(&xe->drm); 928 944 ··· 969 981 xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); 970 982 } 971 983 972 - /** 973 - * xe_device_td_flush() - Flush transient L3 cache entries 974 - * @xe: The device 975 - * 976 - * Display engine has direct access to memory and is never coherent with L3/L4 977 - * caches (or CPU caches), however KMD is responsible for specifically flushing 978 - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout 979 - * can happen from such a surface without seeing corruption. 980 - * 981 - * Display surfaces can be tagged as transient by mapping it using one of the 982 - * various L3:XD PAT index modes on Xe2. 983 - * 984 - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed 985 - * at the end of each submission via PIPE_CONTROL for compute/render, since SA 986 - * Media is not coherent with L3 and we want to support render-vs-media 987 - * usescases. For other engines like copy/blt the HW internally forces uncached 988 - * behaviour, hence why we can skip the TDF on such platforms. 984 + /* 985 + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. 989 986 */ 990 - void xe_device_td_flush(struct xe_device *xe) 987 + static void tdf_request_sync(struct xe_device *xe) 991 988 { 992 - struct xe_gt *gt; 993 989 unsigned int fw_ref; 990 + struct xe_gt *gt; 994 991 u8 id; 995 - 996 - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) 997 - return; 998 - 999 - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { 1000 - xe_device_l2_flush(xe); 1001 - return; 1002 - } 1003 992 1004 993 for_each_gt(gt, xe, id) { 1005 994 if (xe_gt_is_media_type(gt)) ··· 987 1022 return; 988 1023 989 1024 xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); 1025 + 990 1026 /* 991 1027 * FIXME: We can likely do better here with our choice of 992 1028 * timeout. Currently we just assume the worst case, i.e. 150us, ··· 1018 1052 return; 1019 1053 1020 1054 spin_lock(&gt->global_invl_lock); 1021 - xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1); 1022 1055 1056 + xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1); 1023 1057 if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) 1024 1058 xe_gt_err_once(gt, "Global invalidation timeout\n"); 1059 + 1025 1060 spin_unlock(&gt->global_invl_lock); 1026 1061 1027 1062 xe_force_wake_put(gt_to_fw(gt), fw_ref); 1063 + } 1064 + 1065 + /** 1066 + * xe_device_td_flush() - Flush transient L3 cache entries 1067 + * @xe: The device 1068 + * 1069 + * Display engine has direct access to memory and is never coherent with L3/L4 1070 + * caches (or CPU caches), however KMD is responsible for specifically flushing 1071 + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout 1072 + * can happen from such a surface without seeing corruption. 1073 + * 1074 + * Display surfaces can be tagged as transient by mapping it using one of the 1075 + * various L3:XD PAT index modes on Xe2. 1076 + * 1077 + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed 1078 + * at the end of each submission via PIPE_CONTROL for compute/render, since SA 1079 + * Media is not coherent with L3 and we want to support render-vs-media 1080 + * usescases. For other engines like copy/blt the HW internally forces uncached 1081 + * behaviour, hence why we can skip the TDF on such platforms. 1082 + */ 1083 + void xe_device_td_flush(struct xe_device *xe) 1084 + { 1085 + struct xe_gt *root_gt; 1086 + 1087 + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) 1088 + return; 1089 + 1090 + root_gt = xe_root_mmio_gt(xe); 1091 + if (XE_WA(root_gt, 16023588340)) { 1092 + /* A transient flush is not sufficient: flush the L2 */ 1093 + xe_device_l2_flush(xe); 1094 + } else { 1095 + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); 1096 + tdf_request_sync(xe); 1097 + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); 1098 + } 1028 1099 } 1029 1100 1030 1101 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)

+20 -27

drivers/gpu/drm/xe/xe_device.h

··· 60 60 return &xe->tiles[0]; 61 61 } 62 62 63 + /* 64 + * Highest GT/tile count for any platform. Used only for memory allocation 65 + * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT 66 + * structures should use the per-platform xe->info.max_gt_per_tile instead. 67 + */ 63 68 #define XE_MAX_GT_PER_TILE 2 64 - 65 - static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) 66 - { 67 - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) 68 - gt_id = 0; 69 - 70 - return gt_id ? tile->media_gt : tile->primary_gt; 71 - } 72 69 73 70 static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) 74 71 { 75 - struct xe_tile *root_tile = xe_device_get_root_tile(xe); 72 + struct xe_tile *tile; 76 73 struct xe_gt *gt; 77 74 78 - /* 79 - * FIXME: This only works for now because multi-tile and standalone 80 - * media are mutually exclusive on the platforms we have today. 81 - * 82 - * id => GT mapping may change once we settle on how we want to handle 83 - * our UAPI. 84 - */ 85 - if (MEDIA_VER(xe) >= 13) { 86 - gt = xe_tile_get_gt(root_tile, gt_id); 87 - } else { 88 - if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) 89 - gt_id = 0; 75 + if (gt_id >= xe->info.tile_count * xe->info.max_gt_per_tile) 76 + return NULL; 90 77 91 - gt = xe->tiles[gt_id].primary_gt; 78 + tile = &xe->tiles[gt_id / xe->info.max_gt_per_tile]; 79 + switch (gt_id % xe->info.max_gt_per_tile) { 80 + default: 81 + xe_assert(xe, false); 82 + fallthrough; 83 + case 0: 84 + gt = tile->primary_gt; 85 + break; 86 + case 1: 87 + gt = tile->media_gt; 88 + break; 92 89 } 93 90 94 91 if (!gt) ··· 127 130 for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \ 128 131 for_each_if((tile__) = &(xe__)->tiles[(id__)]) 129 132 130 - /* 131 - * FIXME: This only works for now since multi-tile and standalone media 132 - * happen to be mutually exclusive. Future platforms may change this... 133 - */ 134 133 #define for_each_gt(gt__, xe__, id__) \ 135 - for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ 134 + for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \ 136 135 for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) 137 136 138 137 static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)

+142 -1

drivers/gpu/drm/xe/xe_device_sysfs.c

··· 24 24 * 25 25 * vram_d3cold_threshold - Report/change vram used threshold(in MB) below 26 26 * which vram save/restore is permissible during runtime D3cold entry/exit. 27 + * 28 + * lb_fan_control_version - Fan control version provisioned by late binding. 29 + * Exposed only if supported by the device. 30 + * 31 + * lb_voltage_regulator_version - Voltage regulator version provisioned by late 32 + * binding. Exposed only if supported by the device. 27 33 */ 28 34 29 35 static ssize_t ··· 70 64 } 71 65 72 66 static DEVICE_ATTR_RW(vram_d3cold_threshold); 67 + 68 + static ssize_t 69 + lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf) 70 + { 71 + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); 72 + struct xe_tile *root = xe_device_get_root_tile(xe); 73 + u32 cap, ver_low = FAN_TABLE, ver_high = FAN_TABLE; 74 + u16 major = 0, minor = 0, hotfix = 0, build = 0; 75 + int ret; 76 + 77 + xe_pm_runtime_get(xe); 78 + 79 + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), 80 + &cap, NULL); 81 + if (ret) 82 + goto out; 83 + 84 + if (REG_FIELD_GET(V1_FAN_PROVISIONED, cap)) { 85 + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), 86 + &ver_low, NULL); 87 + if (ret) 88 + goto out; 89 + 90 + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), 91 + &ver_high, NULL); 92 + if (ret) 93 + goto out; 94 + 95 + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); 96 + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); 97 + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); 98 + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); 99 + } 100 + out: 101 + xe_pm_runtime_put(xe); 102 + 103 + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); 104 + } 105 + static DEVICE_ATTR_ADMIN_RO(lb_fan_control_version); 106 + 107 + static ssize_t 108 + lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *attr, char *buf) 109 + { 110 + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); 111 + struct xe_tile *root = xe_device_get_root_tile(xe); 112 + u32 cap, ver_low = VR_CONFIG, ver_high = VR_CONFIG; 113 + u16 major = 0, minor = 0, hotfix = 0, build = 0; 114 + int ret; 115 + 116 + xe_pm_runtime_get(xe); 117 + 118 + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), 119 + &cap, NULL); 120 + if (ret) 121 + goto out; 122 + 123 + if (REG_FIELD_GET(VR_PARAMS_PROVISIONED, cap)) { 124 + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), 125 + &ver_low, NULL); 126 + if (ret) 127 + goto out; 128 + 129 + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), 130 + &ver_high, NULL); 131 + if (ret) 132 + goto out; 133 + 134 + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); 135 + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); 136 + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); 137 + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); 138 + } 139 + out: 140 + xe_pm_runtime_put(xe); 141 + 142 + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); 143 + } 144 + static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version); 145 + 146 + static int late_bind_create_files(struct device *dev) 147 + { 148 + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); 149 + struct xe_tile *root = xe_device_get_root_tile(xe); 150 + u32 cap; 151 + int ret; 152 + 153 + xe_pm_runtime_get(xe); 154 + 155 + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), 156 + &cap, NULL); 157 + if (ret) 158 + goto out; 159 + 160 + if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) { 161 + ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); 162 + if (ret) 163 + goto out; 164 + } 165 + 166 + if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) 167 + ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); 168 + out: 169 + xe_pm_runtime_put(xe); 170 + 171 + return ret; 172 + } 173 + 174 + static void late_bind_remove_files(struct device *dev) 175 + { 176 + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); 177 + struct xe_tile *root = xe_device_get_root_tile(xe); 178 + u32 cap; 179 + int ret; 180 + 181 + xe_pm_runtime_get(xe); 182 + 183 + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), 184 + &cap, NULL); 185 + if (ret) 186 + goto out; 187 + 188 + if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) 189 + sysfs_remove_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); 190 + 191 + if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) 192 + sysfs_remove_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); 193 + out: 194 + xe_pm_runtime_put(xe); 195 + } 73 196 74 197 /** 75 198 * DOC: PCIe Gen5 Limitations ··· 286 151 if (xe->d3cold.capable) 287 152 sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); 288 153 289 - if (xe->info.platform == XE_BATTLEMAGE) 154 + if (xe->info.platform == XE_BATTLEMAGE) { 290 155 sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); 156 + late_bind_remove_files(xe->drm.dev); 157 + } 291 158 } 292 159 293 160 int xe_device_sysfs_init(struct xe_device *xe) ··· 305 168 306 169 if (xe->info.platform == XE_BATTLEMAGE) { 307 170 ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); 171 + if (ret) 172 + return ret; 173 + 174 + ret = late_bind_create_files(dev); 308 175 if (ret) 309 176 return ret; 310 177 }

+21 -1

drivers/gpu/drm/xe/xe_device_types.h

··· 32 32 33 33 struct dram_info; 34 34 struct intel_display; 35 + struct intel_dg_nvm_dev; 35 36 struct xe_ggtt; 37 + struct xe_i2c; 36 38 struct xe_pat_ops; 37 39 struct xe_pxp; 38 40 ··· 107 105 void __iomem *mapping; 108 106 /** @ttm: VRAM TTM manager */ 109 107 struct xe_ttm_vram_mgr ttm; 110 - #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) 108 + #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 111 109 /** @pagemap: Used to remap device memory as ZONE_DEVICE */ 112 110 struct dev_pagemap pagemap; 113 111 /** ··· 295 293 u8 vram_flags; 296 294 /** @info.tile_count: Number of tiles */ 297 295 u8 tile_count; 296 + /** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */ 297 + u8 max_gt_per_tile; 298 298 /** @info.gt_count: Total number of GTs for entire device */ 299 299 u8 gt_count; 300 300 /** @info.vm_max_level: Max VM level */ ··· 320 316 u8 has_fan_control:1; 321 317 /** @info.has_flat_ccs: Whether flat CCS metadata is used */ 322 318 u8 has_flat_ccs:1; 319 + /** @info.has_gsc_nvm: Device has gsc non-volatile memory */ 320 + u8 has_gsc_nvm:1; 323 321 /** @info.has_heci_cscfi: device has heci cscfi */ 324 322 u8 has_heci_cscfi:1; 325 323 /** @info.has_heci_gscfi: device has heci gscfi */ ··· 555 549 /** @heci_gsc: graphics security controller */ 556 550 struct xe_heci_gsc heci_gsc; 557 551 552 + /** @nvm: discrete graphics non-volatile memory */ 553 + struct intel_dg_nvm_dev *nvm; 554 + 558 555 /** @oa: oa observation subsystem */ 559 556 struct xe_oa oa; 560 557 ··· 586 577 /** @pmu: performance monitoring unit */ 587 578 struct xe_pmu pmu; 588 579 580 + /** @i2c: I2C host controller */ 581 + struct xe_i2c *i2c; 582 + 589 583 /** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */ 590 584 u32 atomic_svm_timeslice_ms; 591 585 ··· 598 586 * bind IOCTL based on this value 599 587 */ 600 588 u8 vm_inject_error_position; 589 + #endif 590 + 591 + #if IS_ENABLED(CONFIG_TRACE_GPU_MEM) 592 + /** 593 + * @global_total_pages: global GPU page usage tracked for gpu_mem 594 + * tracepoints 595 + */ 596 + atomic64_t global_total_pages; 601 597 #endif 602 598 603 599 /* private: */

+1 -1

drivers/gpu/drm/xe/xe_drm_client.c

··· 167 167 static void bo_meminfo(struct xe_bo *bo, 168 168 struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) 169 169 { 170 - u64 sz = bo->size; 170 + u64 sz = xe_bo_size(bo); 171 171 u32 mem_type = bo->ttm.resource->mem_type; 172 172 173 173 xe_bo_assert_held(bo);

+4 -2

drivers/gpu/drm/xe/xe_eu_stall.c

··· 258 258 static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, 259 259 struct eu_stall_open_properties *props) 260 260 { 261 - if (value >= xe->info.gt_count) { 261 + struct xe_gt *gt = xe_device_get_gt(xe, value); 262 + 263 + if (!gt) { 262 264 drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); 263 265 return -EINVAL; 264 266 } 265 - props->gt = xe_device_get_gt(xe, value); 267 + props->gt = gt; 266 268 return 0; 267 269 } 268 270

+1 -1

drivers/gpu/drm/xe/xe_exec_queue.c

··· 610 610 if (XE_IOCTL_DBG(xe, err)) 611 611 return -EFAULT; 612 612 613 - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) 613 + if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) 614 614 return -EINVAL; 615 615 616 616 if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT)

+18 -7

drivers/gpu/drm/xe/xe_ggtt.c

··· 238 238 } 239 239 EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); 240 240 241 + static void dev_fini_ggtt(void *arg) 242 + { 243 + struct xe_ggtt *ggtt = arg; 244 + 245 + drain_workqueue(ggtt->wq); 246 + } 247 + 241 248 /** 242 249 * xe_ggtt_init_early - Early GGTT initialization 243 250 * @ggtt: the &xe_ggtt to be initialized ··· 294 287 __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); 295 288 296 289 err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); 290 + if (err) 291 + return err; 292 + 293 + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); 297 294 if (err) 298 295 return err; 299 296 ··· 421 410 goto err; 422 411 } 423 412 424 - xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); 413 + xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); 425 414 426 415 xe_ggtt_initial_clear(ggtt); 427 416 ··· 693 682 return; 694 683 695 684 start = node->base.start; 696 - end = start + bo->size; 685 + end = start + xe_bo_size(bo); 697 686 698 687 pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); 699 688 if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { 700 689 xe_assert(xe_bo_device(bo), bo->ttm.ttm); 701 690 702 - for (xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &cur); 691 + for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); 703 692 cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 704 693 ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 705 694 pte | xe_res_dma(&cur)); ··· 707 696 /* Prepend GPU offset */ 708 697 pte |= vram_region_gpu_offset(bo->ttm.resource); 709 698 710 - for (xe_res_first(bo->ttm.resource, 0, bo->size, &cur); 699 + for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); 711 700 cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 712 701 ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 713 702 pte + cur.start); ··· 743 732 744 733 if (XE_WARN_ON(bo->ggtt_node[tile_id])) { 745 734 /* Someone's already inserted this BO in the GGTT */ 746 - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); 735 + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); 747 736 return 0; 748 737 } 749 738 ··· 762 751 763 752 mutex_lock(&ggtt->lock); 764 753 err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, 765 - bo->size, alignment, 0, start, end, 0); 754 + xe_bo_size(bo), alignment, 0, start, end, 0); 766 755 if (err) { 767 756 xe_ggtt_node_fini(bo->ggtt_node[tile_id]); 768 757 bo->ggtt_node[tile_id] = NULL; ··· 823 812 return; 824 813 825 814 /* This BO is not currently in the GGTT */ 826 - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); 815 + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); 827 816 828 817 xe_ggtt_node_remove(bo->ggtt_node[tile_id], 829 818 bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);

+4 -2

drivers/gpu/drm/xe/xe_gsc.c

··· 59 59 60 60 xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); 61 61 xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); 62 - xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); 62 + xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, 63 + xe_bo_size(gsc->private) - fw_size); 63 64 64 65 kfree(storage); 65 66 ··· 83 82 bb->cs[bb->len++] = GSC_FW_LOAD; 84 83 bb->cs[bb->len++] = lower_32_bits(offset); 85 84 bb->cs[bb->len++] = upper_32_bits(offset); 86 - bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; 85 + bb->cs[bb->len++] = (xe_bo_size(gsc->private) / SZ_4K) | 86 + GSC_FW_LOAD_LIMIT_VALID; 87 87 88 88 job = xe_bb_create_job(gsc->q, bb); 89 89 if (IS_ERR(job)) {

+38 -42

drivers/gpu/drm/xe/xe_gt.c

··· 390 390 391 391 int xe_gt_init_early(struct xe_gt *gt) 392 392 { 393 + unsigned int fw_ref; 393 394 int err; 394 395 395 396 if (IS_SRIOV_PF(gt_to_xe(gt))) { ··· 420 419 421 420 xe_mocs_init_early(gt); 422 421 422 + /* 423 + * Only after this point can GT-specific MMIO operations 424 + * (including things like communication with the GuC) 425 + * be performed. 426 + */ 427 + xe_gt_mmio_init(gt); 428 + 429 + err = xe_uc_init_noalloc(&gt->uc); 430 + if (err) 431 + return err; 432 + 433 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 434 + if (!fw_ref) 435 + return -ETIMEDOUT; 436 + 437 + xe_gt_mcr_init_early(gt); 438 + xe_pat_init(gt); 439 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 440 + 423 441 return 0; 424 442 } 425 443 ··· 453 433 xe_pat_dump(gt, &p); 454 434 } 455 435 456 - static int gt_fw_domain_init(struct xe_gt *gt) 436 + static int gt_init_with_gt_forcewake(struct xe_gt *gt) 457 437 { 458 438 unsigned int fw_ref; 459 439 int err; ··· 461 441 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 462 442 if (!fw_ref) 463 443 return -ETIMEDOUT; 444 + 445 + err = xe_uc_init(&gt->uc); 446 + if (err) 447 + goto err_force_wake; 448 + 449 + xe_gt_topology_init(gt); 450 + xe_gt_mcr_init(gt); 451 + xe_gt_enable_host_l2_vram(gt); 464 452 465 453 if (!xe_gt_is_media_type(gt)) { 466 454 err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); ··· 485 457 xe_gt_mcr_init(gt); 486 458 487 459 err = xe_hw_engines_init_early(gt); 488 - if (err) 460 + if (err) { 461 + dump_pat_on_error(gt); 489 462 goto err_force_wake; 463 + } 490 464 491 465 err = xe_hw_engine_class_sysfs_init(gt); 492 466 if (err) ··· 509 479 return 0; 510 480 511 481 err_force_wake: 512 - dump_pat_on_error(gt); 513 482 xe_force_wake_put(gt_to_fw(gt), fw_ref); 514 483 515 484 return err; 516 485 } 517 486 518 - static int all_fw_domain_init(struct xe_gt *gt) 487 + static int gt_init_with_all_forcewake(struct xe_gt *gt) 519 488 { 520 489 unsigned int fw_ref; 521 490 int err; ··· 573 544 } 574 545 } 575 546 576 - err = xe_uc_init_hw(&gt->uc); 547 + err = xe_uc_load_hw(&gt->uc); 577 548 if (err) 578 549 goto err_force_wake; 579 550 ··· 598 569 err_force_wake: 599 570 xe_force_wake_put(gt_to_fw(gt), fw_ref); 600 571 601 - return err; 602 - } 603 - 604 - /* 605 - * Initialize enough GT to be able to load GuC in order to obtain hwconfig and 606 - * enable CTB communication. 607 - */ 608 - int xe_gt_init_hwconfig(struct xe_gt *gt) 609 - { 610 - unsigned int fw_ref; 611 - int err; 612 - 613 - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 614 - if (!fw_ref) 615 - return -ETIMEDOUT; 616 - 617 - xe_gt_mcr_init_early(gt); 618 - xe_pat_init(gt); 619 - 620 - err = xe_uc_init(&gt->uc); 621 - if (err) 622 - goto out_fw; 623 - 624 - err = xe_uc_init_hwconfig(&gt->uc); 625 - if (err) 626 - goto out_fw; 627 - 628 - xe_gt_topology_init(gt); 629 - xe_gt_mcr_init(gt); 630 - xe_gt_enable_host_l2_vram(gt); 631 - 632 - out_fw: 633 - xe_force_wake_put(gt_to_fw(gt), fw_ref); 634 572 return err; 635 573 } 636 574 ··· 636 640 if (err) 637 641 return err; 638 642 639 - err = gt_fw_domain_init(gt); 643 + err = gt_init_with_gt_forcewake(gt); 640 644 if (err) 641 645 return err; 642 646 ··· 650 654 651 655 xe_force_wake_init_engines(gt, gt_to_fw(gt)); 652 656 653 - err = all_fw_domain_init(gt); 657 + err = gt_init_with_all_forcewake(gt); 654 658 if (err) 655 659 return err; 656 660 ··· 738 742 if (err) 739 743 return err; 740 744 741 - err = xe_uc_init_hw(&gt->uc); 745 + err = xe_uc_load_hw(&gt->uc); 742 746 if (err) 743 747 return err; 744 748 ··· 776 780 if (err) 777 781 return err; 778 782 779 - err = xe_uc_init_hw(&gt->uc); 783 + err = xe_uc_load_hw(&gt->uc); 780 784 if (err) 781 785 return err; 782 786

-1

drivers/gpu/drm/xe/xe_gt.h

··· 28 28 } 29 29 30 30 struct xe_gt *xe_gt_alloc(struct xe_tile *tile); 31 - int xe_gt_init_hwconfig(struct xe_gt *gt); 32 31 int xe_gt_init_early(struct xe_gt *gt); 33 32 int xe_gt_init(struct xe_gt *gt); 34 33 void xe_gt_mmio_init(struct xe_gt *gt);

+27 -20

drivers/gpu/drm/xe/xe_gt_mcr.c

··· 420 420 gt->steering[SQIDI_PSMI].instance_target = select & 0x1; 421 421 } 422 422 423 - static void init_steering_inst0(struct xe_gt *gt) 424 - { 425 - gt->steering[INSTANCE0].group_target = 0; /* unused */ 426 - gt->steering[INSTANCE0].instance_target = 0; /* unused */ 427 - } 428 - 429 423 static const struct { 430 424 const char *name; 431 425 void (*init)(struct xe_gt *gt); ··· 430 436 [DSS] = { "DSS", init_steering_dss }, 431 437 [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, 432 438 [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, 433 - [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, 439 + [INSTANCE0] = { "INSTANCE 0", NULL }, 434 440 [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, 435 441 }; 436 442 ··· 440 446 * 441 447 * Perform early software only initialization of the MCR lock to allow 442 448 * the synchronization on accessing the STEER_SEMAPHORE register and 443 - * use the xe_gt_mcr_multicast_write() function. 449 + * use the xe_gt_mcr_multicast_write() function, plus the minimum 450 + * safe MCR registers required for VRAM/CCS probing. 444 451 */ 445 452 void xe_gt_mcr_init_early(struct xe_gt *gt) 446 453 { 454 + struct xe_device *xe = gt_to_xe(gt); 455 + 447 456 BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES); 448 457 BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); 449 458 450 459 spin_lock_init(&gt->mcr_lock); 451 - } 452 - 453 - /** 454 - * xe_gt_mcr_init - Normal initialization of the MCR support 455 - * @gt: GT structure 456 - * 457 - * Perform normal initialization of the MCR for all usages. 458 - */ 459 - void xe_gt_mcr_init(struct xe_gt *gt) 460 - { 461 - struct xe_device *xe = gt_to_xe(gt); 462 460 463 461 if (IS_SRIOV_VF(xe)) 464 462 return; ··· 491 505 } 492 506 } 493 507 508 + /* Mark instance 0 as initialized, we need this early for VRAM and CCS probe. */ 509 + gt->steering[INSTANCE0].initialized = true; 510 + } 511 + 512 + /** 513 + * xe_gt_mcr_init - Normal initialization of the MCR support 514 + * @gt: GT structure 515 + * 516 + * Perform normal initialization of the MCR for all usages. 517 + */ 518 + void xe_gt_mcr_init(struct xe_gt *gt) 519 + { 520 + if (IS_SRIOV_VF(gt_to_xe(gt))) 521 + return; 522 + 494 523 /* Select non-terminated steering target for each type */ 495 - for (int i = 0; i < NUM_STEERING_TYPES; i++) 524 + for (int i = 0; i < NUM_STEERING_TYPES; i++) { 525 + gt->steering[i].initialized = true; 496 526 if (gt->steering[i].ranges && xe_steering_types[i].init) 497 527 xe_steering_types[i].init(gt); 528 + } 498 529 } 499 530 500 531 /** ··· 573 570 574 571 for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { 575 572 if (xe_mmio_in_range(&gt->mmio, &gt->steering[type].ranges[i], reg)) { 573 + drm_WARN(&gt_to_xe(gt)->drm, !gt->steering[type].initialized, 574 + "Uninitialized usage of MCR register %s/%#x\n", 575 + xe_steering_types[type].name, reg.addr); 576 + 576 577 *group = gt->steering[type].group_target; 577 578 *instance = gt->steering[type].instance_target; 578 579 return true;

+1

drivers/gpu/drm/xe/xe_gt_pagefault.c

··· 419 419 #define PF_MULTIPLIER 8 420 420 pf_queue->num_dw = 421 421 (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; 422 + pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); 422 423 #undef PF_MULTIPLIER 423 424 424 425 pf_queue->gt = gt;

+13 -13

drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c

··· 104 104 } 105 105 106 106 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { 107 - struct drm_printer p = xe_gt_info_printer(gt); 107 + struct drm_printer p = xe_gt_dbg_printer(gt); 108 108 void *klvs = xe_guc_buf_cpu_ptr(buf); 109 109 char name[8]; 110 110 111 - xe_gt_sriov_info(gt, "pushed %s config with %u KLV%s:\n", 112 - xe_sriov_function_name(vfid, name, sizeof(name)), 113 - num_klvs, str_plural(num_klvs)); 111 + xe_gt_sriov_dbg(gt, "pushed %s config with %u KLV%s:\n", 112 + xe_sriov_function_name(vfid, name, sizeof(name)), 113 + num_klvs, str_plural(num_klvs)); 114 114 xe_guc_klv_print(klvs, num_dwords, &p); 115 115 } 116 116 ··· 282 282 283 283 if (config->lmem_obj) { 284 284 cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE); 285 - cfg[n++] = lower_32_bits(config->lmem_obj->size); 286 - cfg[n++] = upper_32_bits(config->lmem_obj->size); 285 + cfg[n++] = lower_32_bits(xe_bo_size(config->lmem_obj)); 286 + cfg[n++] = upper_32_bits(xe_bo_size(config->lmem_obj)); 287 287 } 288 288 289 289 cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); ··· 1299 1299 struct xe_bo *bo; 1300 1300 1301 1301 bo = config->lmem_obj; 1302 - return bo ? bo->size : 0; 1302 + return bo ? xe_bo_size(bo) : 0; 1303 1303 } 1304 1304 1305 1305 static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) ··· 1388 1388 err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset); 1389 1389 if (err) 1390 1390 goto fail; 1391 - offset += bo->size; 1391 + offset += xe_bo_size(bo); 1392 1392 } 1393 1393 } 1394 1394 ··· 1469 1469 goto release; 1470 1470 } 1471 1471 1472 - err = pf_push_vf_cfg_lmem(gt, vfid, bo->size); 1472 + err = pf_push_vf_cfg_lmem(gt, vfid, xe_bo_size(bo)); 1473 1473 if (unlikely(err)) 1474 1474 goto reset_lmtt; 1475 1475 1476 1476 xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n", 1477 - vfid, bo->size, bo->size / SZ_1M); 1477 + vfid, xe_bo_size(bo), xe_bo_size(bo) / SZ_1M); 1478 1478 return 0; 1479 1479 1480 1480 reset_lmtt: ··· 2349 2349 return -EINVAL; 2350 2350 2351 2351 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { 2352 - struct drm_printer p = xe_gt_info_printer(gt); 2352 + struct drm_printer p = xe_gt_dbg_printer(gt); 2353 2353 2354 2354 drm_printf(&p, "restoring VF%u config:\n", vfid); 2355 2355 xe_guc_klv_print(buf, size / sizeof(u32), &p); ··· 2552 2552 if (!config->lmem_obj) 2553 2553 continue; 2554 2554 2555 - string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2, 2555 + string_get_size(xe_bo_size(config->lmem_obj), 1, STRING_UNITS_2, 2556 2556 buf, sizeof(buf)); 2557 2557 drm_printf(p, "VF%u:\t%zu\t(%s)\n", 2558 - n, config->lmem_obj->size, buf); 2558 + n, xe_bo_size(config->lmem_obj), buf); 2559 2559 } 2560 2560 2561 2561 mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));

+1 -1

drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c

··· 266 266 read_many(gt, size, regs, values); 267 267 268 268 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { 269 - struct drm_printer p = xe_gt_info_printer(gt); 269 + struct drm_printer p = xe_gt_dbg_printer(gt); 270 270 271 271 xe_gt_sriov_pf_service_print_runtime(gt, &p); 272 272 }

-1

drivers/gpu/drm/xe/xe_gt_sriov_vf.c

··· 966 966 struct vf_runtime_reg *rr; 967 967 968 968 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 969 - xe_gt_assert(gt, gt->sriov.vf.pf_version.major); 970 969 xe_gt_assert(gt, !reg.vf); 971 970 972 971 if (reg.addr == GMD_ID.addr) {

+2

drivers/gpu/drm/xe/xe_gt_types.h

··· 377 377 u16 group_target; 378 378 /** @steering.instance_target: instance to steer accesses to */ 379 379 u16 instance_target; 380 + /** @steering.initialized: Whether this steering range is initialized */ 381 + bool initialized; 380 382 } steering[NUM_STEERING_TYPES]; 381 383 382 384 /**

+129 -18

drivers/gpu/drm/xe/xe_guc.c

··· 29 29 #include "xe_guc_db_mgr.h" 30 30 #include "xe_guc_engine_activity.h" 31 31 #include "xe_guc_hwconfig.h" 32 + #include "xe_guc_klv_helpers.h" 32 33 #include "xe_guc_log.h" 33 34 #include "xe_guc_pc.h" 34 35 #include "xe_guc_relay.h" ··· 60 59 /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ 61 60 xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); 62 61 xe_assert(xe, addr < GUC_GGTT_TOP); 63 - xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); 62 + xe_assert(xe, xe_bo_size(bo) <= GUC_GGTT_TOP - addr); 64 63 65 64 return addr; 66 65 } ··· 421 420 buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE; 422 421 423 422 xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); 424 - xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size); 423 + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(g2g_bo)); 425 424 426 425 return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev, 427 426 desc, buf, G2G_BUFFER_SIZE); ··· 571 570 return err; 572 571 } 573 572 573 + static int __guc_opt_in_features_enable(struct xe_guc *guc, u64 addr, u32 num_dwords) 574 + { 575 + u32 action[] = { 576 + XE_GUC_ACTION_OPT_IN_FEATURE_KLV, 577 + lower_32_bits(addr), 578 + upper_32_bits(addr), 579 + num_dwords 580 + }; 581 + 582 + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); 583 + } 584 + 585 + static bool supports_dynamic_ics(struct xe_guc *guc) 586 + { 587 + struct xe_device *xe = guc_to_xe(guc); 588 + struct xe_gt *gt = guc_to_gt(guc); 589 + 590 + /* Dynamic ICS is available for PVC and Xe2 and newer platforms. */ 591 + if (xe->info.platform != XE_PVC && GRAPHICS_VER(xe) < 20) 592 + return false; 593 + 594 + /* 595 + * The feature is currently not compatible with multi-lrc, so the GuC 596 + * does not support it at all on the media engines (which are the main 597 + * users of mlrc). On the primary GT side, to avoid it being used in 598 + * conjunction with mlrc, we only enable it if we are in single CCS 599 + * mode. 600 + */ 601 + if (xe_gt_is_media_type(gt) || gt->ccs_mode > 1) 602 + return false; 603 + 604 + /* 605 + * Dynamic ICS requires GuC v70.40.1, which maps to compatibility 606 + * version v1.18.4. 607 + */ 608 + return GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 18, 4); 609 + } 610 + 611 + #define OPT_IN_MAX_DWORDS 16 612 + int xe_guc_opt_in_features_enable(struct xe_guc *guc) 613 + { 614 + struct xe_device *xe = guc_to_xe(guc); 615 + CLASS(xe_guc_buf, buf)(&guc->buf, OPT_IN_MAX_DWORDS); 616 + u32 count = 0; 617 + u32 *klvs; 618 + int ret; 619 + 620 + if (!xe_guc_buf_is_valid(buf)) 621 + return -ENOBUFS; 622 + 623 + klvs = xe_guc_buf_cpu_ptr(buf); 624 + 625 + /* 626 + * The extra CAT error type opt-in was added in GuC v70.17.0, which maps 627 + * to compatibility version v1.7.0. 628 + * Note that the GuC allows enabling this KLV even on platforms that do 629 + * not support the extra type; in such case the returned type variable 630 + * will be set to a known invalid value which we can check against. 631 + */ 632 + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 7, 0)) 633 + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_EXT_CAT_ERR_TYPE); 634 + 635 + if (supports_dynamic_ics(guc)) 636 + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH); 637 + 638 + if (count) { 639 + xe_assert(xe, count <= OPT_IN_MAX_DWORDS); 640 + 641 + ret = __guc_opt_in_features_enable(guc, xe_guc_buf_flush(buf), count); 642 + if (ret < 0) { 643 + xe_gt_err(guc_to_gt(guc), 644 + "failed to enable GuC opt-in features: %pe\n", 645 + ERR_PTR(ret)); 646 + return ret; 647 + } 648 + } 649 + 650 + return 0; 651 + } 652 + 574 653 static void guc_fini_hw(void *arg) 575 654 { 576 655 struct xe_guc *guc = arg; ··· 658 577 unsigned int fw_ref; 659 578 660 579 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 661 - xe_uc_fini_hw(&guc_to_gt(guc)->uc); 580 + xe_uc_sanitize_reset(&guc_to_gt(guc)->uc); 662 581 xe_force_wake_put(gt_to_fw(gt), fw_ref); 663 582 664 583 guc_g2g_fini(guc); ··· 708 627 return 0; 709 628 } 710 629 711 - static int vf_guc_init(struct xe_guc *guc) 630 + static int vf_guc_init_noalloc(struct xe_guc *guc) 712 631 { 632 + struct xe_gt *gt = guc_to_gt(guc); 713 633 int err; 714 634 715 - xe_guc_comm_init_early(guc); 716 - 717 - err = xe_guc_ct_init(&guc->ct); 635 + err = xe_gt_sriov_vf_bootstrap(gt); 718 636 if (err) 719 637 return err; 720 638 721 - err = xe_guc_relay_init(&guc->relay); 639 + err = xe_gt_sriov_vf_query_config(gt); 722 640 if (err) 723 641 return err; 724 642 725 643 return 0; 644 + } 645 + 646 + int xe_guc_init_noalloc(struct xe_guc *guc) 647 + { 648 + struct xe_device *xe = guc_to_xe(guc); 649 + struct xe_gt *gt = guc_to_gt(guc); 650 + int ret; 651 + 652 + xe_guc_comm_init_early(guc); 653 + 654 + ret = xe_guc_ct_init_noalloc(&guc->ct); 655 + if (ret) 656 + goto out; 657 + 658 + ret = xe_guc_relay_init(&guc->relay); 659 + if (ret) 660 + goto out; 661 + 662 + if (IS_SRIOV_VF(xe)) { 663 + ret = vf_guc_init_noalloc(guc); 664 + if (ret) 665 + goto out; 666 + } 667 + 668 + return 0; 669 + 670 + out: 671 + xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret)); 672 + return ret; 726 673 } 727 674 728 675 int xe_guc_init(struct xe_guc *guc) ··· 762 653 guc->fw.type = XE_UC_FW_TYPE_GUC; 763 654 ret = xe_uc_fw_init(&guc->fw); 764 655 if (ret) 765 - goto out; 656 + return ret; 766 657 767 658 if (!xe_uc_fw_is_enabled(&guc->fw)) 768 659 return 0; 769 660 770 661 if (IS_SRIOV_VF(xe)) { 771 - ret = vf_guc_init(guc); 662 + ret = xe_guc_ct_init(&guc->ct); 772 663 if (ret) 773 664 goto out; 774 665 return 0; ··· 790 681 if (ret) 791 682 goto out; 792 683 793 - ret = xe_guc_relay_init(&guc->relay); 794 - if (ret) 795 - goto out; 796 - 797 684 xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); 798 685 799 686 ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); ··· 797 692 goto out; 798 693 799 694 guc_init_params(guc); 800 - 801 - xe_guc_comm_init_early(guc); 802 695 803 696 return 0; 804 697 ··· 869 766 int ret; 870 767 871 768 xe_guc_ads_populate_post_load(&guc->ads); 769 + 770 + ret = xe_guc_opt_in_features_enable(guc); 771 + if (ret) 772 + return ret; 872 773 873 774 if (xe_guc_g2g_wanted(guc_to_xe(guc))) { 874 775 ret = guc_g2g_start(guc); ··· 1219 1112 1220 1113 ret = xe_gt_sriov_vf_connect(gt); 1221 1114 if (ret) 1222 - return ret; 1115 + goto err_out; 1223 1116 1224 1117 ret = xe_gt_sriov_vf_query_runtime(gt); 1225 1118 if (ret) 1226 - return ret; 1119 + goto err_out; 1227 1120 1228 1121 return 0; 1122 + 1123 + err_out: 1124 + xe_guc_sanitize(guc); 1125 + return ret; 1229 1126 } 1230 1127 1231 1128 /**

+2

drivers/gpu/drm/xe/xe_guc.h

··· 26 26 struct drm_printer; 27 27 28 28 void xe_guc_comm_init_early(struct xe_guc *guc); 29 + int xe_guc_init_noalloc(struct xe_guc *guc); 29 30 int xe_guc_init(struct xe_guc *guc); 30 31 int xe_guc_init_post_hwconfig(struct xe_guc *guc); 31 32 int xe_guc_post_load_init(struct xe_guc *guc); ··· 34 33 int xe_guc_upload(struct xe_guc *guc); 35 34 int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); 36 35 int xe_guc_enable_communication(struct xe_guc *guc); 36 + int xe_guc_opt_in_features_enable(struct xe_guc *guc); 37 37 int xe_guc_suspend(struct xe_guc *guc); 38 38 void xe_guc_notify(struct xe_guc *guc); 39 39 int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);

+11 -20

drivers/gpu/drm/xe/xe_guc_ads.c

··· 890 890 891 891 xe_gt_assert(gt, ads->bo); 892 892 893 - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 893 + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); 894 894 guc_policies_init(ads); 895 895 guc_golden_lrc_init(ads); 896 896 guc_mapping_table_init_invalid(gt, &info_map); ··· 914 914 915 915 xe_gt_assert(gt, ads->bo); 916 916 917 - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 917 + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); 918 918 guc_policies_init(ads); 919 919 fill_engine_enable_masks(gt, &info_map); 920 920 guc_mmio_reg_state_init(ads); ··· 995 995 return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 996 996 } 997 997 998 - static int guc_ads_update_policies(struct xe_guc_ads *ads, const struct guc_policies *policies) 999 - { 1000 - CLASS(xe_guc_buf_from_data, buf)(&ads_to_guc(ads)->buf, policies, sizeof(*policies)); 1001 - 1002 - if (!xe_guc_buf_is_valid(buf)) 1003 - return -ENOBUFS; 1004 - 1005 - return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); 1006 - } 1007 - 1008 998 /** 1009 999 * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy 1010 1000 * @ads: Additional data structures object ··· 1005 1015 */ 1006 1016 int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) 1007 1017 { 1008 - struct xe_device *xe = ads_to_xe(ads); 1009 1018 struct guc_policies *policies; 1010 - int ret; 1019 + struct xe_guc *guc = ads_to_guc(ads); 1020 + struct xe_device *xe = ads_to_xe(ads); 1021 + CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies)); 1011 1022 1012 - policies = kmalloc(sizeof(*policies), GFP_KERNEL); 1013 - if (!policies) 1014 - return -ENOMEM; 1023 + if (!xe_guc_buf_is_valid(buf)) 1024 + return -ENOBUFS; 1025 + 1026 + policies = xe_guc_buf_cpu_ptr(buf); 1027 + memset(policies, 0, sizeof(*policies)); 1015 1028 1016 1029 policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); 1017 1030 policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); ··· 1024 1031 else 1025 1032 policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; 1026 1033 1027 - ret = guc_ads_update_policies(ads, policies); 1028 - kfree(policies); 1029 - return ret; 1034 + return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); 1030 1035 }

+60 -21

drivers/gpu/drm/xe/xe_guc_ct.c

··· 35 35 #include "xe_pm.h" 36 36 #include "xe_trace_guc.h" 37 37 38 + static void receive_g2h(struct xe_guc_ct *ct); 39 + static void g2h_worker_func(struct work_struct *w); 40 + static void safe_mode_worker_func(struct work_struct *w); 41 + static void ct_exit_safe_mode(struct xe_guc_ct *ct); 42 + 38 43 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) 39 44 enum { 40 45 /* Internal states, not error conditions */ ··· 85 80 u16 error; 86 81 u16 hint; 87 82 u16 reason; 83 + bool cancel; 88 84 bool retry; 89 85 bool fail; 90 86 bool done; ··· 102 96 g2h_fence->retry = false; 103 97 g2h_fence->done = false; 104 98 g2h_fence->seqno = ~0x0; 99 + } 100 + 101 + static void g2h_fence_cancel(struct g2h_fence *g2h_fence) 102 + { 103 + g2h_fence->cancel = true; 104 + g2h_fence->fail = true; 105 + g2h_fence->done = true; 105 106 } 106 107 107 108 static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) ··· 202 189 { 203 190 struct xe_guc_ct *ct = arg; 204 191 192 + ct_exit_safe_mode(ct); 205 193 destroy_workqueue(ct->g2h_wq); 206 194 xa_destroy(&ct->fence_lookup); 207 195 } 208 - 209 - static void receive_g2h(struct xe_guc_ct *ct); 210 - static void g2h_worker_func(struct work_struct *w); 211 - static void safe_mode_worker_func(struct work_struct *w); 212 196 213 197 static void primelockdep(struct xe_guc_ct *ct) 214 198 { ··· 217 207 fs_reclaim_release(GFP_KERNEL); 218 208 } 219 209 220 - int xe_guc_ct_init(struct xe_guc_ct *ct) 210 + int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) 221 211 { 222 212 struct xe_device *xe = ct_to_xe(ct); 223 213 struct xe_gt *gt = ct_to_gt(ct); 224 - struct xe_tile *tile = gt_to_tile(gt); 225 - struct xe_bo *bo; 226 214 int err; 227 215 228 216 xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); ··· 246 238 247 239 primelockdep(ct); 248 240 241 + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); 242 + if (err) 243 + return err; 244 + 245 + xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); 246 + ct->state = XE_GUC_CT_STATE_DISABLED; 247 + return 0; 248 + } 249 + ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */ 250 + 251 + int xe_guc_ct_init(struct xe_guc_ct *ct) 252 + { 253 + struct xe_device *xe = ct_to_xe(ct); 254 + struct xe_gt *gt = ct_to_gt(ct); 255 + struct xe_tile *tile = gt_to_tile(gt); 256 + struct xe_bo *bo; 257 + 249 258 bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), 250 259 XE_BO_FLAG_SYSTEM | 251 260 XE_BO_FLAG_GGTT | ··· 272 247 return PTR_ERR(bo); 273 248 274 249 ct->bo = bo; 275 - 276 - err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); 277 - if (err) 278 - return err; 279 - 280 - xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); 281 - ct->state = XE_GUC_CT_STATE_DISABLED; 282 250 return 0; 283 251 } 284 252 ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */ ··· 392 374 return ret > 0 ? -EPROTO : ret; 393 375 } 394 376 395 - static void xe_guc_ct_set_state(struct xe_guc_ct *ct, 377 + static void guc_ct_change_state(struct xe_guc_ct *ct, 396 378 enum xe_guc_ct_state state) 397 379 { 380 + struct xe_gt *gt = ct_to_gt(ct); 381 + struct g2h_fence *g2h_fence; 382 + unsigned long idx; 383 + 398 384 mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ 399 385 spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ 400 386 ··· 410 388 ct->g2h_outstanding = 0; 411 389 ct->state = state; 412 390 391 + xe_gt_dbg(gt, "GuC CT communication channel %s\n", 392 + state == XE_GUC_CT_STATE_STOPPED ? "stopped" : 393 + str_enabled_disabled(state == XE_GUC_CT_STATE_ENABLED)); 394 + 413 395 spin_unlock_irq(&ct->fast_lock); 396 + 397 + /* cancel all in-flight send-recv requests */ 398 + xa_for_each(&ct->fence_lookup, idx, g2h_fence) 399 + g2h_fence_cancel(g2h_fence); 400 + 401 + /* make sure guc_ct_send_recv() will see g2h_fence changes */ 402 + smp_mb(); 403 + wake_up_all(&ct->g2h_fence_wq); 414 404 415 405 /* 416 406 * Lockdep doesn't like this under the fast lock and he destroy only ··· 477 443 478 444 xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); 479 445 480 - xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size); 446 + xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); 481 447 guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); 482 448 guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); 483 449 ··· 493 459 if (err) 494 460 goto err_out; 495 461 496 - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED); 462 + guc_ct_change_state(ct, XE_GUC_CT_STATE_ENABLED); 497 463 498 464 smp_mb(); 499 465 wake_up_all(&ct->wq); 500 - xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); 501 466 502 467 if (ct_needs_safe_mode(ct)) 503 468 ct_enter_safe_mode(ct); ··· 537 504 */ 538 505 void xe_guc_ct_disable(struct xe_guc_ct *ct) 539 506 { 540 - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); 507 + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); 541 508 ct_exit_safe_mode(ct); 542 509 stop_g2h_handler(ct); 543 510 } ··· 553 520 if (!xe_guc_ct_initialized(ct)) 554 521 return; 555 522 556 - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); 523 + guc_ct_change_state(ct, XE_GUC_CT_STATE_STOPPED); 557 524 stop_g2h_handler(ct); 558 525 } 559 526 ··· 1116 1083 goto retry; 1117 1084 } 1118 1085 if (g2h_fence.fail) { 1086 + if (g2h_fence.cancel) { 1087 + xe_gt_dbg(gt, "H2G request %#x canceled!\n", action[0]); 1088 + ret = -ECANCELED; 1089 + goto unlock; 1090 + } 1119 1091 xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n", 1120 1092 action[0], g2h_fence.error, g2h_fence.hint); 1121 1093 ret = -EIO; ··· 1129 1091 if (ret > 0) 1130 1092 ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data; 1131 1093 1094 + unlock: 1132 1095 mutex_unlock(&ct->lock); 1133 1096 1134 1097 return ret; ··· 1936 1897 return NULL; 1937 1898 1938 1899 if (ct->bo && want_ctb) { 1939 - snapshot->ctb_size = ct->bo->size; 1900 + snapshot->ctb_size = xe_bo_size(ct->bo); 1940 1901 snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); 1941 1902 } 1942 1903

+1

drivers/gpu/drm/xe/xe_guc_ct.h

··· 11 11 struct drm_printer; 12 12 struct xe_device; 13 13 14 + int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct); 14 15 int xe_guc_ct_init(struct xe_guc_ct *ct); 15 16 int xe_guc_ct_enable(struct xe_guc_ct *ct); 16 17 void xe_guc_ct_disable(struct xe_guc_ct *ct);

+1 -1

drivers/gpu/drm/xe/xe_guc_log.c

··· 79 79 * Also, can't use vmalloc as might be called from atomic context. So need 80 80 * to break the buffer up into smaller chunks that can be allocated. 81 81 */ 82 - snapshot->size = log->bo->size; 82 + snapshot->size = xe_bo_size(log->bo); 83 83 snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE); 84 84 85 85 snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy),

+212 -71

drivers/gpu/drm/xe/xe_guc_pc.c

··· 5 5 6 6 #include "xe_guc_pc.h" 7 7 8 + #include <linux/cleanup.h> 8 9 #include <linux/delay.h> 10 + #include <linux/jiffies.h> 9 11 #include <linux/ktime.h> 12 + #include <linux/wait_bit.h> 10 13 11 14 #include <drm/drm_managed.h> 12 15 #include <drm/drm_print.h> ··· 55 52 #define LNL_MERT_FREQ_CAP 800 56 53 #define BMG_MERT_FREQ_CAP 2133 57 54 #define BMG_MIN_FREQ 1200 55 + #define BMG_MERT_FLUSH_FREQ_CAP 2600 58 56 59 57 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ 60 58 #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ 59 + #define SLPC_ACT_FREQ_TIMEOUT_MS 100 61 60 62 61 /** 63 62 * DOC: GuC Power Conservation (PC) ··· 147 142 return -ETIMEDOUT; 148 143 } 149 144 145 + static int wait_for_flush_complete(struct xe_guc_pc *pc) 146 + { 147 + const unsigned long timeout = msecs_to_jiffies(30); 148 + 149 + if (!wait_var_event_timeout(&pc->flush_freq_limit, 150 + !atomic_read(&pc->flush_freq_limit), 151 + timeout)) 152 + return -ETIMEDOUT; 153 + 154 + return 0; 155 + } 156 + 157 + static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) 158 + { 159 + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; 160 + int slept, wait = 10; 161 + 162 + for (slept = 0; slept < timeout_us;) { 163 + if (xe_guc_pc_get_act_freq(pc) <= freq) 164 + return 0; 165 + 166 + usleep_range(wait, wait << 1); 167 + slept += wait; 168 + wait <<= 1; 169 + if (slept + wait > timeout_us) 170 + wait = timeout_us - slept; 171 + } 172 + 173 + return -ETIMEDOUT; 174 + } 150 175 static int pc_action_reset(struct xe_guc_pc *pc) 151 176 { 152 177 struct xe_guc_ct *ct = pc_to_ct(pc); ··· 589 554 return pc->rpn_freq; 590 555 } 591 556 557 + static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) 558 + { 559 + int ret; 560 + 561 + lockdep_assert_held(&pc->freq_lock); 562 + 563 + /* Might be in the middle of a gt reset */ 564 + if (!pc->freq_ready) 565 + return -EAGAIN; 566 + 567 + ret = pc_action_query_task_state(pc); 568 + if (ret) 569 + return ret; 570 + 571 + *freq = pc_get_min_freq(pc); 572 + 573 + return 0; 574 + } 575 + 592 576 /** 593 577 * xe_guc_pc_get_min_freq - Get the min operational frequency 594 578 * @pc: The GuC PC ··· 618 564 */ 619 565 int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) 620 566 { 567 + guard(mutex)(&pc->freq_lock); 568 + 569 + return xe_guc_pc_get_min_freq_locked(pc, freq); 570 + } 571 + 572 + static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) 573 + { 621 574 int ret; 622 575 623 - xe_device_assert_mem_access(pc_to_xe(pc)); 576 + lockdep_assert_held(&pc->freq_lock); 624 577 625 - mutex_lock(&pc->freq_lock); 626 - if (!pc->freq_ready) { 627 - /* Might be in the middle of a gt reset */ 628 - ret = -EAGAIN; 629 - goto out; 630 - } 578 + /* Might be in the middle of a gt reset */ 579 + if (!pc->freq_ready) 580 + return -EAGAIN; 631 581 632 - ret = pc_action_query_task_state(pc); 582 + ret = pc_set_min_freq(pc, freq); 633 583 if (ret) 634 - goto out; 584 + return ret; 635 585 636 - *freq = pc_get_min_freq(pc); 586 + pc->user_requested_min = freq; 637 587 638 - out: 639 - mutex_unlock(&pc->freq_lock); 640 - return ret; 588 + return 0; 641 589 } 642 590 643 591 /** ··· 653 597 */ 654 598 int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) 655 599 { 600 + guard(mutex)(&pc->freq_lock); 601 + 602 + return xe_guc_pc_set_min_freq_locked(pc, freq); 603 + } 604 + 605 + static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) 606 + { 656 607 int ret; 657 608 658 - mutex_lock(&pc->freq_lock); 659 - if (!pc->freq_ready) { 660 - /* Might be in the middle of a gt reset */ 661 - ret = -EAGAIN; 662 - goto out; 663 - } 609 + lockdep_assert_held(&pc->freq_lock); 664 610 665 - ret = pc_set_min_freq(pc, freq); 611 + /* Might be in the middle of a gt reset */ 612 + if (!pc->freq_ready) 613 + return -EAGAIN; 614 + 615 + ret = pc_action_query_task_state(pc); 666 616 if (ret) 667 - goto out; 617 + return ret; 668 618 669 - pc->user_requested_min = freq; 619 + *freq = pc_get_max_freq(pc); 670 620 671 - out: 672 - mutex_unlock(&pc->freq_lock); 673 - return ret; 621 + return 0; 674 622 } 675 623 676 624 /** ··· 687 627 */ 688 628 int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) 689 629 { 630 + guard(mutex)(&pc->freq_lock); 631 + 632 + return xe_guc_pc_get_max_freq_locked(pc, freq); 633 + } 634 + 635 + static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) 636 + { 690 637 int ret; 691 638 692 - mutex_lock(&pc->freq_lock); 693 - if (!pc->freq_ready) { 694 - /* Might be in the middle of a gt reset */ 695 - ret = -EAGAIN; 696 - goto out; 697 - } 639 + lockdep_assert_held(&pc->freq_lock); 698 640 699 - ret = pc_action_query_task_state(pc); 641 + /* Might be in the middle of a gt reset */ 642 + if (!pc->freq_ready) 643 + return -EAGAIN; 644 + 645 + ret = pc_set_max_freq(pc, freq); 700 646 if (ret) 701 - goto out; 647 + return ret; 702 648 703 - *freq = pc_get_max_freq(pc); 649 + pc->user_requested_max = freq; 704 650 705 - out: 706 - mutex_unlock(&pc->freq_lock); 707 - return ret; 651 + return 0; 708 652 } 709 653 710 654 /** ··· 722 658 */ 723 659 int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) 724 660 { 725 - int ret; 726 - 727 - mutex_lock(&pc->freq_lock); 728 - if (!pc->freq_ready) { 729 - /* Might be in the middle of a gt reset */ 730 - ret = -EAGAIN; 731 - goto out; 661 + if (XE_WA(pc_to_gt(pc), 22019338487)) { 662 + if (wait_for_flush_complete(pc) != 0) 663 + return -EAGAIN; 732 664 } 733 665 734 - ret = pc_set_max_freq(pc, freq); 735 - if (ret) 736 - goto out; 666 + guard(mutex)(&pc->freq_lock); 737 667 738 - pc->user_requested_max = freq; 739 - 740 - out: 741 - mutex_unlock(&pc->freq_lock); 742 - return ret; 668 + return xe_guc_pc_set_max_freq_locked(pc, freq); 743 669 } 744 670 745 671 /** ··· 927 873 return ret; 928 874 } 929 875 930 - static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) 876 + static bool needs_flush_freq_limit(struct xe_guc_pc *pc) 931 877 { 878 + struct xe_gt *gt = pc_to_gt(pc); 879 + 880 + return XE_WA(gt, 22019338487) && 881 + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; 882 + } 883 + 884 + /** 885 + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush 886 + * @pc: the xe_guc_pc object 887 + * 888 + * As per the WA, reduce max GT frequency during L2 cache flush 889 + */ 890 + void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) 891 + { 892 + struct xe_gt *gt = pc_to_gt(pc); 893 + u32 max_freq; 894 + int ret; 895 + 896 + if (!needs_flush_freq_limit(pc)) 897 + return; 898 + 899 + guard(mutex)(&pc->freq_lock); 900 + 901 + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); 902 + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { 903 + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); 904 + if (ret) { 905 + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", 906 + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); 907 + return; 908 + } 909 + 910 + atomic_set(&pc->flush_freq_limit, 1); 911 + 912 + /* 913 + * If user has previously changed max freq, stash that value to 914 + * restore later, otherwise use the current max. New user 915 + * requests wait on flush. 916 + */ 917 + if (pc->user_requested_max != 0) 918 + pc->stashed_max_freq = pc->user_requested_max; 919 + else 920 + pc->stashed_max_freq = max_freq; 921 + } 922 + 923 + /* 924 + * Wait for actual freq to go below the flush cap: even if the previous 925 + * max was below cap, the current one might still be above it 926 + */ 927 + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); 928 + if (ret) 929 + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", 930 + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); 931 + } 932 + 933 + /** 934 + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. 935 + * @pc: the xe_guc_pc object 936 + * 937 + * Retrieve the previous GT max frequency value. 938 + */ 939 + void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) 940 + { 941 + struct xe_gt *gt = pc_to_gt(pc); 932 942 int ret = 0; 933 943 934 - if (XE_WA(pc_to_gt(pc), 22019338487)) { 935 - /* 936 - * Get updated min/max and stash them. 937 - */ 938 - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); 939 - if (!ret) 940 - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); 941 - if (ret) 942 - return ret; 944 + if (!needs_flush_freq_limit(pc)) 945 + return; 943 946 944 - /* 945 - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. 946 - */ 947 - mutex_lock(&pc->freq_lock); 948 - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); 949 - if (!ret) 950 - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); 951 - mutex_unlock(&pc->freq_lock); 952 - } 947 + if (!atomic_read(&pc->flush_freq_limit)) 948 + return; 949 + 950 + mutex_lock(&pc->freq_lock); 951 + 952 + ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq); 953 + if (ret) 954 + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", 955 + pc->stashed_max_freq, ret); 956 + 957 + atomic_set(&pc->flush_freq_limit, 0); 958 + mutex_unlock(&pc->freq_lock); 959 + wake_up_var(&pc->flush_freq_limit); 960 + } 961 + 962 + static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) 963 + { 964 + int ret; 965 + 966 + if (!XE_WA(pc_to_gt(pc), 22019338487)) 967 + return 0; 968 + 969 + guard(mutex)(&pc->freq_lock); 970 + 971 + /* 972 + * Get updated min/max and stash them. 973 + */ 974 + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); 975 + if (!ret) 976 + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); 977 + if (ret) 978 + return ret; 979 + 980 + /* 981 + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. 982 + */ 983 + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); 984 + if (!ret) 985 + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); 953 986 954 987 return ret; 955 988 }

+2

drivers/gpu/drm/xe/xe_guc_pc.h

··· 38 38 void xe_guc_pc_init_early(struct xe_guc_pc *pc); 39 39 int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); 40 40 void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); 41 + void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); 42 + void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); 41 43 42 44 #endif /* _XE_GUC_PC_H_ */

+2

drivers/gpu/drm/xe/xe_guc_pc_types.h

··· 15 15 struct xe_guc_pc { 16 16 /** @bo: GGTT buffer object that is shared with GuC PC */ 17 17 struct xe_bo *bo; 18 + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ 19 + atomic_t flush_freq_limit; 18 20 /** @rp0_freq: HW RP0 frequency - The Maximum one */ 19 21 u32 rp0_freq; 20 22 /** @rpa_freq: HW RPa frequency - The Achievable one */

+24 -7

drivers/gpu/drm/xe/xe_guc_submit.c

··· 908 908 struct xe_exec_queue *q = ge->q; 909 909 struct xe_guc *guc = exec_queue_to_guc(q); 910 910 struct xe_gpu_scheduler *sched = &ge->sched; 911 - bool wedged; 911 + bool wedged = false; 912 912 913 913 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); 914 914 trace_xe_exec_queue_lr_cleanup(q); 915 915 916 - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 916 + if (!exec_queue_killed(q)) 917 + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 917 918 918 919 /* Kill the run_job / process_msg entry points */ 919 920 xe_sched_submission_stop(sched); ··· 1085 1084 int err = -ETIME; 1086 1085 pid_t pid = -1; 1087 1086 int i = 0; 1088 - bool wedged, skip_timeout_check; 1087 + bool wedged = false, skip_timeout_check; 1089 1088 1090 1089 /* 1091 1090 * TDR has fired before free job worker. Common if exec queue ··· 1131 1130 * doesn't work for SRIOV. For now assuming timeouts in wedged mode are 1132 1131 * genuine timeouts. 1133 1132 */ 1134 - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1133 + if (!exec_queue_killed(q)) 1134 + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1135 1135 1136 1136 /* Engine state now stable, disable scheduling to check timestamp */ 1137 1137 if (!wedged && exec_queue_registered(q)) { ··· 2092 2090 struct xe_gt *gt = guc_to_gt(guc); 2093 2091 struct xe_exec_queue *q; 2094 2092 u32 guc_id; 2093 + u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 2095 2094 2096 - if (unlikely(len < 1)) 2095 + if (unlikely(!len || len > 2)) 2097 2096 return -EPROTO; 2098 2097 2099 2098 guc_id = msg[0]; 2099 + 2100 + if (len == 2) 2101 + type = msg[1]; 2100 2102 2101 2103 if (guc_id == GUC_ID_UNKNOWN) { 2102 2104 /* ··· 2115 2109 if (unlikely(!q)) 2116 2110 return -EPROTO; 2117 2111 2118 - xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d", 2119 - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2112 + /* 2113 + * The type is HW-defined and changes based on platform, so we don't 2114 + * decode it in the kernel and only check if it is valid. 2115 + * See bspec 54047 and 72187 for details. 2116 + */ 2117 + if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 2118 + xe_gt_dbg(gt, 2119 + "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 2120 + type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2121 + else 2122 + xe_gt_dbg(gt, 2123 + "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 2124 + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2120 2125 2121 2126 trace_xe_exec_queue_memory_cat_error(q); 2122 2127

+1 -4

drivers/gpu/drm/xe/xe_heci_gsc.c

··· 11 11 #include "xe_device_types.h" 12 12 #include "xe_drv.h" 13 13 #include "xe_heci_gsc.h" 14 + #include "regs/xe_gsc_regs.h" 14 15 #include "xe_platform_types.h" 15 16 #include "xe_survivability_mode.h" 16 17 17 18 #define GSC_BAR_LENGTH 0x00000FFC 18 - 19 - #define DG1_GSC_HECI2_BASE 0x259000 20 - #define PVC_GSC_HECI2_BASE 0x285000 21 - #define DG2_GSC_HECI2_BASE 0x374000 22 19 23 20 static void heci_gsc_irq_mask(struct irq_data *d) 24 21 {

+1 -1

drivers/gpu/drm/xe/xe_huc.c

··· 171 171 sizeof(struct pxp43_new_huc_auth_in)); 172 172 wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset, 173 173 xe_bo_ggtt_addr(huc->fw.bo), 174 - huc->fw.bo->size); 174 + xe_bo_size(huc->fw.bo)); 175 175 do { 176 176 err = xe_gsc_pkt_submit_kernel(&gt->uc.gsc, ggtt_offset, wr_offset, 177 177 ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE,

+2 -1

drivers/gpu/drm/xe/xe_hw_engine.c

··· 1059 1059 xe_hw_engine_lookup(struct xe_device *xe, 1060 1060 struct drm_xe_engine_class_instance eci) 1061 1061 { 1062 + struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id); 1062 1063 unsigned int idx; 1063 1064 1064 1065 if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) 1065 1066 return NULL; 1066 1067 1067 - if (eci.gt_id >= xe->info.gt_count) 1068 + if (!gt) 1068 1069 return NULL; 1069 1070 1070 1071 idx = array_index_nospec(eci.engine_class,

+5 -14

drivers/gpu/drm/xe/xe_hw_engine_group.c

··· 13 13 #include "xe_vm.h" 14 14 15 15 static void 16 - hw_engine_group_free(struct drm_device *drm, void *arg) 17 - { 18 - struct xe_hw_engine_group *group = arg; 19 - 20 - destroy_workqueue(group->resume_wq); 21 - kfree(group); 22 - } 23 - 24 - static void 25 16 hw_engine_group_resume_lr_jobs_func(struct work_struct *w) 26 17 { 27 18 struct xe_exec_queue *q; ··· 44 53 struct xe_hw_engine_group *group; 45 54 int err; 46 55 47 - group = kzalloc(sizeof(*group), GFP_KERNEL); 56 + group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL); 48 57 if (!group) 49 58 return ERR_PTR(-ENOMEM); 50 59 ··· 52 61 if (!group->resume_wq) 53 62 return ERR_PTR(-ENOMEM); 54 63 64 + err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq); 65 + if (err) 66 + return ERR_PTR(err); 67 + 55 68 init_rwsem(&group->mode_sem); 56 69 INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); 57 70 INIT_LIST_HEAD(&group->exec_queue_list); 58 - 59 - err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); 60 - if (err) 61 - return ERR_PTR(err); 62 71 63 72 return group; 64 73 }

+329

drivers/gpu/drm/xe/xe_i2c.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 OR MIT 2 + /* 3 + * Intel Xe I2C attached Microcontroller Units (MCU) 4 + * 5 + * Copyright (C) 2025 Intel Corporation. 6 + */ 7 + 8 + #include <linux/array_size.h> 9 + #include <linux/container_of.h> 10 + #include <linux/device.h> 11 + #include <linux/err.h> 12 + #include <linux/i2c.h> 13 + #include <linux/ioport.h> 14 + #include <linux/irq.h> 15 + #include <linux/irqdomain.h> 16 + #include <linux/notifier.h> 17 + #include <linux/pci.h> 18 + #include <linux/platform_device.h> 19 + #include <linux/property.h> 20 + #include <linux/regmap.h> 21 + #include <linux/sprintf.h> 22 + #include <linux/string.h> 23 + #include <linux/types.h> 24 + #include <linux/workqueue.h> 25 + 26 + #include "regs/xe_i2c_regs.h" 27 + #include "regs/xe_irq_regs.h" 28 + 29 + #include "xe_device.h" 30 + #include "xe_device_types.h" 31 + #include "xe_i2c.h" 32 + #include "xe_mmio.h" 33 + #include "xe_platform_types.h" 34 + 35 + /** 36 + * DOC: Xe I2C devices 37 + * 38 + * Register a platform device for the I2C host controller (Synpsys DesignWare 39 + * I2C) if the registers of that controller are mapped to the MMIO, and also the 40 + * I2C client device for the Add-In Management Controller (the MCU) attached to 41 + * the host controller. 42 + * 43 + * See drivers/i2c/busses/i2c-designware-* for more information on the I2C host 44 + * controller. 45 + */ 46 + 47 + static const char adapter_name[] = "i2c_designware"; 48 + 49 + static const struct property_entry xe_i2c_adapter_properties[] = { 50 + PROPERTY_ENTRY_STRING("compatible", "intel,xe-i2c"), 51 + PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_FAST_MODE_PLUS_FREQ), 52 + { } 53 + }; 54 + 55 + static inline void xe_i2c_read_endpoint(struct xe_mmio *mmio, void *ep) 56 + { 57 + u32 *val = ep; 58 + 59 + val[0] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_PREFIX); 60 + val[1] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_POSTFIX); 61 + } 62 + 63 + static void xe_i2c_client_work(struct work_struct *work) 64 + { 65 + struct xe_i2c *i2c = container_of(work, struct xe_i2c, work); 66 + struct i2c_board_info info = { 67 + .type = "amc", 68 + .flags = I2C_CLIENT_HOST_NOTIFY, 69 + .addr = i2c->ep.addr[1], 70 + }; 71 + 72 + i2c->client[0] = i2c_new_client_device(i2c->adapter, &info); 73 + } 74 + 75 + static int xe_i2c_notifier(struct notifier_block *nb, unsigned long action, void *data) 76 + { 77 + struct xe_i2c *i2c = container_of(nb, struct xe_i2c, bus_notifier); 78 + struct i2c_adapter *adapter = i2c_verify_adapter(data); 79 + struct device *dev = data; 80 + 81 + if (action == BUS_NOTIFY_ADD_DEVICE && 82 + adapter && dev->parent == &i2c->pdev->dev) { 83 + i2c->adapter = adapter; 84 + schedule_work(&i2c->work); 85 + return NOTIFY_OK; 86 + } 87 + 88 + return NOTIFY_DONE; 89 + } 90 + 91 + static int xe_i2c_register_adapter(struct xe_i2c *i2c) 92 + { 93 + struct pci_dev *pci = to_pci_dev(i2c->drm_dev); 94 + struct platform_device *pdev; 95 + struct fwnode_handle *fwnode; 96 + int ret; 97 + 98 + fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL); 99 + if (!fwnode) 100 + return -ENOMEM; 101 + 102 + /* 103 + * Not using platform_device_register_full() here because we don't have 104 + * a handle to the platform_device before it returns. xe_i2c_notifier() 105 + * uses that handle, but it may be called before 106 + * platform_device_register_full() is done. 107 + */ 108 + pdev = platform_device_alloc(adapter_name, pci_dev_id(pci)); 109 + if (!pdev) { 110 + ret = -ENOMEM; 111 + goto err_fwnode_remove; 112 + } 113 + 114 + if (i2c->adapter_irq) { 115 + struct resource res; 116 + 117 + res = DEFINE_RES_IRQ_NAMED(i2c->adapter_irq, "xe_i2c"); 118 + 119 + ret = platform_device_add_resources(pdev, &res, 1); 120 + if (ret) 121 + goto err_pdev_put; 122 + } 123 + 124 + pdev->dev.parent = i2c->drm_dev; 125 + pdev->dev.fwnode = fwnode; 126 + i2c->adapter_node = fwnode; 127 + i2c->pdev = pdev; 128 + 129 + ret = platform_device_add(pdev); 130 + if (ret) 131 + goto err_pdev_put; 132 + 133 + return 0; 134 + 135 + err_pdev_put: 136 + platform_device_put(pdev); 137 + err_fwnode_remove: 138 + fwnode_remove_software_node(fwnode); 139 + 140 + return ret; 141 + } 142 + 143 + static void xe_i2c_unregister_adapter(struct xe_i2c *i2c) 144 + { 145 + platform_device_unregister(i2c->pdev); 146 + fwnode_remove_software_node(i2c->adapter_node); 147 + } 148 + 149 + /** 150 + * xe_i2c_irq_handler: Handler for I2C interrupts 151 + * @xe: xe device instance 152 + * @master_ctl: interrupt register 153 + * 154 + * Forward interrupts generated by the I2C host adapter to the I2C host adapter 155 + * driver. 156 + */ 157 + void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) 158 + { 159 + if (!xe->i2c || !xe->i2c->adapter_irq) 160 + return; 161 + 162 + if (master_ctl & I2C_IRQ) 163 + generic_handle_irq_safe(xe->i2c->adapter_irq); 164 + } 165 + 166 + static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq, 167 + irq_hw_number_t hw_irq_num) 168 + { 169 + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); 170 + return 0; 171 + } 172 + 173 + static const struct irq_domain_ops xe_i2c_irq_ops = { 174 + .map = xe_i2c_irq_map, 175 + }; 176 + 177 + static int xe_i2c_create_irq(struct xe_i2c *i2c) 178 + { 179 + struct irq_domain *domain; 180 + 181 + if (!(i2c->ep.capabilities & XE_I2C_EP_CAP_IRQ)) 182 + return 0; 183 + 184 + domain = irq_domain_create_linear(dev_fwnode(i2c->drm_dev), 1, &xe_i2c_irq_ops, NULL); 185 + if (!domain) 186 + return -ENOMEM; 187 + 188 + i2c->adapter_irq = irq_create_mapping(domain, 0); 189 + i2c->irqdomain = domain; 190 + 191 + return 0; 192 + } 193 + 194 + static void xe_i2c_remove_irq(struct xe_i2c *i2c) 195 + { 196 + if (!i2c->irqdomain) 197 + return; 198 + 199 + irq_dispose_mapping(i2c->adapter_irq); 200 + irq_domain_remove(i2c->irqdomain); 201 + } 202 + 203 + static int xe_i2c_read(void *context, unsigned int reg, unsigned int *val) 204 + { 205 + struct xe_i2c *i2c = context; 206 + 207 + *val = xe_mmio_read32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET)); 208 + 209 + return 0; 210 + } 211 + 212 + static int xe_i2c_write(void *context, unsigned int reg, unsigned int val) 213 + { 214 + struct xe_i2c *i2c = context; 215 + 216 + xe_mmio_write32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET), val); 217 + 218 + return 0; 219 + } 220 + 221 + static const struct regmap_config i2c_regmap_config = { 222 + .reg_bits = 32, 223 + .val_bits = 32, 224 + .reg_read = xe_i2c_read, 225 + .reg_write = xe_i2c_write, 226 + .fast_io = true, 227 + }; 228 + 229 + void xe_i2c_pm_suspend(struct xe_device *xe) 230 + { 231 + struct xe_mmio *mmio = xe_root_tile_mmio(xe); 232 + 233 + if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) 234 + return; 235 + 236 + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D3hot); 237 + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); 238 + } 239 + 240 + void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) 241 + { 242 + struct xe_mmio *mmio = xe_root_tile_mmio(xe); 243 + 244 + if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) 245 + return; 246 + 247 + if (d3cold) 248 + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY); 249 + 250 + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0); 251 + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); 252 + } 253 + 254 + static void xe_i2c_remove(void *data) 255 + { 256 + struct xe_i2c *i2c = data; 257 + unsigned int i; 258 + 259 + for (i = 0; i < XE_I2C_MAX_CLIENTS; i++) 260 + i2c_unregister_device(i2c->client[i]); 261 + 262 + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); 263 + xe_i2c_unregister_adapter(i2c); 264 + xe_i2c_remove_irq(i2c); 265 + } 266 + 267 + /** 268 + * xe_i2c_probe: Probe the I2C host adapter and the I2C clients attached to it 269 + * @xe: xe device instance 270 + * 271 + * Register all the I2C devices described in the I2C Endpoint data structure. 272 + * 273 + * Return: 0 on success, error code on failure 274 + */ 275 + int xe_i2c_probe(struct xe_device *xe) 276 + { 277 + struct device *drm_dev = xe->drm.dev; 278 + struct xe_i2c_endpoint ep; 279 + struct regmap *regmap; 280 + struct xe_i2c *i2c; 281 + int ret; 282 + 283 + if (xe->info.platform != XE_BATTLEMAGE) 284 + return 0; 285 + 286 + xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); 287 + if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) 288 + return 0; 289 + 290 + i2c = devm_kzalloc(drm_dev, sizeof(*i2c), GFP_KERNEL); 291 + if (!i2c) 292 + return -ENOMEM; 293 + 294 + INIT_WORK(&i2c->work, xe_i2c_client_work); 295 + i2c->mmio = xe_root_tile_mmio(xe); 296 + i2c->drm_dev = drm_dev; 297 + i2c->ep = ep; 298 + xe->i2c = i2c; 299 + 300 + /* PCI PM isn't aware of this device, bring it up and match it with SGUnit state. */ 301 + xe_i2c_pm_resume(xe, true); 302 + 303 + regmap = devm_regmap_init(drm_dev, NULL, i2c, &i2c_regmap_config); 304 + if (IS_ERR(regmap)) 305 + return PTR_ERR(regmap); 306 + 307 + i2c->bus_notifier.notifier_call = xe_i2c_notifier; 308 + ret = bus_register_notifier(&i2c_bus_type, &i2c->bus_notifier); 309 + if (ret) 310 + return ret; 311 + 312 + ret = xe_i2c_create_irq(i2c); 313 + if (ret) 314 + goto err_unregister_notifier; 315 + 316 + ret = xe_i2c_register_adapter(i2c); 317 + if (ret) 318 + goto err_remove_irq; 319 + 320 + return devm_add_action_or_reset(drm_dev, xe_i2c_remove, i2c); 321 + 322 + err_remove_irq: 323 + xe_i2c_remove_irq(i2c); 324 + 325 + err_unregister_notifier: 326 + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); 327 + 328 + return ret; 329 + }

+62

drivers/gpu/drm/xe/xe_i2c.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + #ifndef _XE_I2C_H_ 3 + #define _XE_I2C_H_ 4 + 5 + #include <linux/bits.h> 6 + #include <linux/notifier.h> 7 + #include <linux/types.h> 8 + #include <linux/workqueue.h> 9 + 10 + struct device; 11 + struct fwnode_handle; 12 + struct i2c_adapter; 13 + struct i2c_client; 14 + struct irq_domain; 15 + struct platform_device; 16 + struct xe_device; 17 + struct xe_mmio; 18 + 19 + #define XE_I2C_MAX_CLIENTS 3 20 + 21 + #define XE_I2C_EP_COOKIE_DEVICE 0xde 22 + 23 + /* Endpoint Capabilities */ 24 + #define XE_I2C_EP_CAP_IRQ BIT(0) 25 + 26 + struct xe_i2c_endpoint { 27 + u8 cookie; 28 + u8 capabilities; 29 + u16 addr[XE_I2C_MAX_CLIENTS]; 30 + }; 31 + 32 + struct xe_i2c { 33 + struct fwnode_handle *adapter_node; 34 + struct platform_device *pdev; 35 + struct i2c_adapter *adapter; 36 + struct i2c_client *client[XE_I2C_MAX_CLIENTS]; 37 + 38 + struct notifier_block bus_notifier; 39 + struct work_struct work; 40 + 41 + struct irq_domain *irqdomain; 42 + int adapter_irq; 43 + 44 + struct xe_i2c_endpoint ep; 45 + struct device *drm_dev; 46 + 47 + struct xe_mmio *mmio; 48 + }; 49 + 50 + #if IS_ENABLED(CONFIG_I2C) 51 + int xe_i2c_probe(struct xe_device *xe); 52 + void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl); 53 + void xe_i2c_pm_suspend(struct xe_device *xe); 54 + void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold); 55 + #else 56 + static inline int xe_i2c_probe(struct xe_device *xe) { return 0; } 57 + static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { } 58 + static inline void xe_i2c_pm_suspend(struct xe_device *xe) { } 59 + static inline void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { } 60 + #endif 61 + 62 + #endif

+2

drivers/gpu/drm/xe/xe_irq.c

··· 18 18 #include "xe_gt.h" 19 19 #include "xe_guc.h" 20 20 #include "xe_hw_engine.h" 21 + #include "xe_i2c.h" 21 22 #include "xe_memirq.h" 22 23 #include "xe_mmio.h" 23 24 #include "xe_pxp.h" ··· 477 476 if (xe->info.has_heci_cscfi) 478 477 xe_heci_csc_irq_handler(xe, master_ctl); 479 478 xe_display_irq_handler(xe, master_ctl); 479 + xe_i2c_irq_handler(xe, master_ctl); 480 480 gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); 481 481 } 482 482 }

+13 -2

drivers/gpu/drm/xe/xe_lmtt.c

··· 78 78 } 79 79 80 80 lmtt_assert(lmtt, xe_bo_is_vram(bo)); 81 + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); 82 + 83 + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, xe_bo_size(bo)); 81 84 82 85 pt->level = level; 83 86 pt->bo = bo; ··· 94 91 95 92 static void lmtt_pt_free(struct xe_lmtt_pt *pt) 96 93 { 94 + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", 95 + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); 96 + 97 97 xe_bo_unpin_map_no_vm(pt->bo); 98 98 kfree(pt); 99 99 } ··· 232 226 233 227 switch (lmtt->ops->lmtt_pte_size(level)) { 234 228 case sizeof(u32): 229 + lmtt_assert(lmtt, !overflows_type(pte, u32)); 230 + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); 231 + 235 232 xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); 236 233 break; 237 234 case sizeof(u64): 235 + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); 236 + 238 237 xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); 239 238 break; 240 239 default: ··· 397 386 u64 addr, vram_offset; 398 387 399 388 lmtt_assert(lmtt, IS_ALIGNED(start, page_size)); 400 - lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size)); 389 + lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size)); 401 390 lmtt_assert(lmtt, xe_bo_is_vram(bo)); 402 391 403 392 vram_offset = vram_region_gpu_offset(bo->ttm.resource); 404 - xe_res_first(bo->ttm.resource, 0, bo->size, &cur); 393 + xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); 405 394 while (cur.remaining) { 406 395 addr = xe_res_dma(&cur); 407 396 addr += vram_offset; /* XXX */

+18 -23

drivers/gpu/drm/xe/xe_lrc.c

··· 717 717 718 718 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) 719 719 { 720 - /* Indirect ring state page is at the very end of LRC */ 721 - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; 720 + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_RING_STATE_SIZE; 721 + } 722 + 723 + static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) 724 + { 725 + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; 722 726 } 723 727 724 728 #define DECL_MAP_ADDR_HELPERS(elem) \ ··· 977 973 u32 *batch, size_t max_size); 978 974 }; 979 975 980 - static size_t wa_bb_offset(struct xe_lrc *lrc) 981 - { 982 - return lrc->bo->size - LRC_WA_BB_SIZE; 983 - } 984 - 985 976 static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) 986 977 { 987 978 const size_t max_size = LRC_WA_BB_SIZE; ··· 992 993 return -ENOMEM; 993 994 cmd = buf; 994 995 } else { 995 - cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc); 996 + cmd = lrc->bo->vmap.vaddr + __xe_lrc_wa_bb_offset(lrc); 996 997 } 997 998 998 999 remain = max_size / sizeof(*cmd); ··· 1016 1017 1017 1018 if (buf) { 1018 1019 xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, 1019 - wa_bb_offset(lrc), buf, 1020 + __xe_lrc_wa_bb_offset(lrc), buf, 1020 1021 (cmd - buf) * sizeof(*cmd)); 1021 1022 kfree(buf); 1022 1023 } 1023 1024 1024 1025 xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + 1025 - wa_bb_offset(lrc) + 1); 1026 + __xe_lrc_wa_bb_offset(lrc) + 1); 1026 1027 1027 1028 return 0; 1028 1029 ··· 1039 1040 u32 init_flags) 1040 1041 { 1041 1042 struct xe_gt *gt = hwe->gt; 1043 + const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); 1044 + const u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; 1042 1045 struct xe_tile *tile = gt_to_tile(gt); 1043 1046 struct xe_device *xe = gt_to_xe(gt); 1044 1047 struct iosys_map map; 1045 1048 void *init_data = NULL; 1046 1049 u32 arb_enable; 1047 - u32 lrc_size; 1048 1050 u32 bo_flags; 1049 1051 int err; 1050 1052 1051 1053 kref_init(&lrc->refcount); 1052 1054 lrc->gt = gt; 1055 + lrc->size = lrc_size; 1053 1056 lrc->flags = 0; 1054 - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); 1057 + lrc->ring.size = ring_size; 1058 + lrc->ring.tail = 0; 1055 1059 if (xe_gt_has_indirect_ring_state(gt)) 1056 1060 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; 1057 1061 ··· 1067 1065 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address 1068 1066 * via VM bind calls. 1069 1067 */ 1070 - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, 1071 - lrc_size + LRC_WA_BB_SIZE, 1068 + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, 1072 1069 ttm_bo_type_kernel, 1073 1070 bo_flags); 1074 1071 if (IS_ERR(lrc->bo)) 1075 1072 return PTR_ERR(lrc->bo); 1076 - 1077 - lrc->size = lrc_size; 1078 - lrc->ring.size = ring_size; 1079 - lrc->ring.tail = 0; 1080 1073 1081 1074 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, 1082 1075 hwe->fence_irq, hwe->name); ··· 1093 1096 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ 1094 1097 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, 1095 1098 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, 1096 - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); 1099 + lrc_size - LRC_PPHWSP_SIZE); 1097 1100 } else { 1098 - xe_map_memcpy_to(xe, &map, 0, init_data, 1099 - xe_gt_lrc_size(gt, hwe->class)); 1101 + xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); 1100 1102 kfree(init_data); 1101 1103 } 1102 1104 ··· 1855 1859 snapshot->seqno = xe_lrc_seqno(lrc); 1856 1860 snapshot->lrc_bo = xe_bo_get(lrc->bo); 1857 1861 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 1858 - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset - 1859 - LRC_WA_BB_SIZE; 1862 + snapshot->lrc_size = lrc->size; 1860 1863 snapshot->lrc_snapshot = NULL; 1861 1864 snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); 1862 1865 snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);

+1 -1

drivers/gpu/drm/xe/xe_lrc_types.h

··· 22 22 */ 23 23 struct xe_bo *bo; 24 24 25 - /** @size: size of lrc including any indirect ring state page */ 25 + /** @size: size of the lrc and optional indirect ring state */ 26 26 u32 size; 27 27 28 28 /** @gt: gt which this LRC belongs to */

+26 -24

drivers/gpu/drm/xe/xe_migrate.c

··· 82 82 * of the instruction. Subtracting the instruction header (1 dword) and 83 83 * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. 84 84 */ 85 - #define MAX_PTE_PER_SDI 0x1FE 85 + #define MAX_PTE_PER_SDI 0x1FEU 86 86 87 87 /** 88 88 * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. ··· 203 203 BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); 204 204 205 205 /* Need to be sure everything fits in the first PT, or create more */ 206 - xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); 206 + xe_tile_assert(tile, m->batch_base_ofs + xe_bo_size(batch) < SZ_2M); 207 207 208 208 bo = xe_bo_create_pin_map(vm->xe, tile, vm, 209 209 num_entries * XE_PAGE_SIZE, ··· 214 214 return PTR_ERR(bo); 215 215 216 216 /* PT30 & PT31 reserved for 2M identity map */ 217 - pt29_ofs = bo->size - 3 * XE_PAGE_SIZE; 217 + pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE; 218 218 entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index); 219 219 xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); 220 220 ··· 236 236 if (!IS_DGFX(xe)) { 237 237 /* Write out batch too */ 238 238 m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; 239 - for (i = 0; i < batch->size; 239 + for (i = 0; i < xe_bo_size(batch); 240 240 i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : 241 241 XE_PAGE_SIZE) { 242 242 entry = vm->pt_ops->pte_encode_bo(batch, i, ··· 247 247 level++; 248 248 } 249 249 if (xe->info.has_usm) { 250 - xe_tile_assert(tile, batch->size == SZ_1M); 250 + xe_tile_assert(tile, xe_bo_size(batch) == SZ_1M); 251 251 252 252 batch = tile->primary_gt->usm.bb_pool->bo; 253 253 m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; 254 - xe_tile_assert(tile, batch->size == SZ_512K); 254 + xe_tile_assert(tile, xe_bo_size(batch) == SZ_512K); 255 255 256 - for (i = 0; i < batch->size; 256 + for (i = 0; i < xe_bo_size(batch); 257 257 i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : 258 258 XE_PAGE_SIZE) { 259 259 entry = vm->pt_ops->pte_encode_bo(batch, i, ··· 306 306 307 307 /* Identity map the entire vram at 256GiB offset */ 308 308 if (IS_DGFX(xe)) { 309 - u64 pt30_ofs = bo->size - 2 * XE_PAGE_SIZE; 309 + u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; 310 310 311 311 xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, 312 312 pat_index, pt30_ofs); ··· 321 321 u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; 322 322 u64 vram_offset = IDENTITY_OFFSET + 323 323 DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); 324 - u64 pt31_ofs = bo->size - XE_PAGE_SIZE; 324 + u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE; 325 325 326 326 xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - 327 327 IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); ··· 768 768 struct xe_gt *gt = m->tile->primary_gt; 769 769 struct xe_device *xe = gt_to_xe(gt); 770 770 struct dma_fence *fence = NULL; 771 - u64 size = src_bo->size; 771 + u64 size = xe_bo_size(src_bo); 772 772 struct xe_res_cursor src_it, dst_it, ccs_it; 773 773 u64 src_L0_ofs, dst_L0_ofs; 774 774 u32 src_L0_pt, dst_L0_pt; ··· 791 791 if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) 792 792 return ERR_PTR(-EINVAL); 793 793 794 - if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) 794 + if (src_bo != dst_bo && XE_WARN_ON(xe_bo_size(src_bo) != xe_bo_size(dst_bo))) 795 795 return ERR_PTR(-EINVAL); 796 796 797 797 if (!src_is_vram) ··· 863 863 if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) 864 864 xe_res_next(&src_it, src_L0); 865 865 else 866 - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, 866 + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, 867 867 &src_it, src_L0, src); 868 868 869 869 if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) ··· 1064 1064 struct xe_device *xe = gt_to_xe(gt); 1065 1065 bool clear_only_system_ccs = false; 1066 1066 struct dma_fence *fence = NULL; 1067 - u64 size = bo->size; 1067 + u64 size = xe_bo_size(bo); 1068 1068 struct xe_res_cursor src_it; 1069 1069 struct ttm_resource *src = dst; 1070 1070 int err; ··· 1076 1076 clear_only_system_ccs = true; 1077 1077 1078 1078 if (!clear_vram) 1079 - xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); 1079 + xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &src_it); 1080 1080 else 1081 - xe_res_first(src, 0, bo->size, &src_it); 1081 + xe_res_first(src, 0, xe_bo_size(bo), &src_it); 1082 1082 1083 1083 while (size) { 1084 1084 u64 clear_L0_ofs; ··· 1407 1407 if (idx == chunk) 1408 1408 goto next_cmd; 1409 1409 1410 - xe_tile_assert(tile, pt_bo->size == SZ_4K); 1410 + xe_tile_assert(tile, xe_bo_size(pt_bo) == SZ_4K); 1411 1411 1412 1412 /* Map a PT at most once */ 1413 1413 if (pt_bo->update_index < 0) ··· 1553 1553 u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); 1554 1554 1555 1555 XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); 1556 + 1556 1557 /* 1557 1558 * MI_STORE_DATA_IMM command is used to update page table. Each 1558 - * instruction can update maximumly 0x1ff pte entries. To update 1559 - * n (n <= 0x1ff) pte entries, we need: 1560 - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) 1561 - * 2 dword for the page table's physical location 1562 - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) 1559 + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To 1560 + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: 1561 + * 1562 + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) 1563 + * - 2 dword for the page table's physical location 1564 + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) 1563 1565 */ 1564 - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); 1566 + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); 1565 1567 num_dword += entries * 2; 1566 1568 1567 1569 return num_dword; ··· 1579 1577 1580 1578 ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); 1581 1579 while (ptes) { 1582 - u32 chunk = min(0x1ffU, ptes); 1580 + u32 chunk = min(MAX_PTE_PER_SDI, ptes); 1583 1581 1584 1582 bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); 1585 1583 bb->cs[bb->len++] = pt_offset; ··· 1868 1866 if (IS_ERR(dma_addr)) 1869 1867 return PTR_ERR(dma_addr); 1870 1868 1871 - xe_res_first(bo->ttm.resource, offset, bo->size - offset, &cursor); 1869 + xe_res_first(bo->ttm.resource, offset, xe_bo_size(bo) - offset, &cursor); 1872 1870 1873 1871 do { 1874 1872 struct dma_fence *__fence;

+10 -6

drivers/gpu/drm/xe/xe_mmio.c

··· 55 55 static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) 56 56 { 57 57 struct xe_tile *tile; 58 + struct xe_gt *gt; 58 59 u8 id; 59 60 60 61 /* ··· 68 67 /* Possibly override number of tile based on configuration register */ 69 68 if (!xe->info.skip_mtcfg) { 70 69 struct xe_mmio *mmio = xe_root_tile_mmio(xe); 71 - u8 tile_count; 70 + u8 tile_count, gt_count; 72 71 u32 mtcfg; 73 72 74 73 /* ··· 85 84 xe->info.tile_count = tile_count; 86 85 87 86 /* 88 - * FIXME: Needs some work for standalone media, but 89 - * should be impossible with multi-tile for now: 90 - * multi-tile platform with standalone media doesn't 91 - * exist 87 + * We've already setup gt_count according to the full 88 + * tile count. Re-calculate it to only include the GTs 89 + * that belong to the remaining tile(s). 92 90 */ 93 - xe->info.gt_count = xe->info.tile_count; 91 + gt_count = 0; 92 + for_each_gt(gt, xe, id) 93 + if (gt->info.id < tile_count * xe->info.max_gt_per_tile) 94 + gt_count++; 95 + xe->info.gt_count = gt_count; 94 96 } 95 97 } 96 98

+167

drivers/gpu/drm/xe/xe_nvm.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. 4 + */ 5 + 6 + #include <linux/intel_dg_nvm_aux.h> 7 + #include <linux/pci.h> 8 + 9 + #include "xe_device.h" 10 + #include "xe_device_types.h" 11 + #include "xe_mmio.h" 12 + #include "xe_nvm.h" 13 + #include "regs/xe_gsc_regs.h" 14 + #include "xe_sriov.h" 15 + 16 + #define GEN12_GUNIT_NVM_BASE 0x00102040 17 + #define GEN12_DEBUG_NVM_BASE 0x00101018 18 + 19 + #define GEN12_CNTL_PROTECTED_NVM_REG 0x0010100C 20 + 21 + #define GEN12_GUNIT_NVM_SIZE 0x80 22 + #define GEN12_DEBUG_NVM_SIZE 0x4 23 + 24 + #define NVM_NON_POSTED_ERASE_CHICKEN_BIT BIT(13) 25 + 26 + #define HECI_FW_STATUS_2_NVM_ACCESS_MODE BIT(3) 27 + 28 + static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { 29 + [0] = { .name = "DESCRIPTOR", }, 30 + [2] = { .name = "GSC", }, 31 + [9] = { .name = "PADDING", }, 32 + [11] = { .name = "OptionROM", }, 33 + [12] = { .name = "DAM", }, 34 + }; 35 + 36 + static void xe_nvm_release_dev(struct device *dev) 37 + { 38 + } 39 + 40 + static bool xe_nvm_non_posted_erase(struct xe_device *xe) 41 + { 42 + struct xe_gt *gt = xe_root_mmio_gt(xe); 43 + 44 + if (xe->info.platform != XE_BATTLEMAGE) 45 + return false; 46 + return !(xe_mmio_read32(&gt->mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & 47 + NVM_NON_POSTED_ERASE_CHICKEN_BIT); 48 + } 49 + 50 + static bool xe_nvm_writable_override(struct xe_device *xe) 51 + { 52 + struct xe_gt *gt = xe_root_mmio_gt(xe); 53 + bool writable_override; 54 + resource_size_t base; 55 + 56 + switch (xe->info.platform) { 57 + case XE_BATTLEMAGE: 58 + base = DG2_GSC_HECI2_BASE; 59 + break; 60 + case XE_PVC: 61 + base = PVC_GSC_HECI2_BASE; 62 + break; 63 + case XE_DG2: 64 + base = DG2_GSC_HECI2_BASE; 65 + break; 66 + case XE_DG1: 67 + base = DG1_GSC_HECI2_BASE; 68 + break; 69 + default: 70 + drm_err(&xe->drm, "Unknown platform\n"); 71 + return true; 72 + } 73 + 74 + writable_override = 75 + !(xe_mmio_read32(&gt->mmio, HECI_FWSTS2(base)) & 76 + HECI_FW_STATUS_2_NVM_ACCESS_MODE); 77 + if (writable_override) 78 + drm_info(&xe->drm, "NVM access overridden by jumper\n"); 79 + return writable_override; 80 + } 81 + 82 + int xe_nvm_init(struct xe_device *xe) 83 + { 84 + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 85 + struct auxiliary_device *aux_dev; 86 + struct intel_dg_nvm_dev *nvm; 87 + int ret; 88 + 89 + if (!xe->info.has_gsc_nvm) 90 + return 0; 91 + 92 + /* No access to internal NVM from VFs */ 93 + if (IS_SRIOV_VF(xe)) 94 + return 0; 95 + 96 + /* Nvm pointer should be NULL here */ 97 + if (WARN_ON(xe->nvm)) 98 + return -EFAULT; 99 + 100 + xe->nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); 101 + if (!xe->nvm) 102 + return -ENOMEM; 103 + 104 + nvm = xe->nvm; 105 + 106 + nvm->writable_override = xe_nvm_writable_override(xe); 107 + nvm->non_posted_erase = xe_nvm_non_posted_erase(xe); 108 + nvm->bar.parent = &pdev->resource[0]; 109 + nvm->bar.start = GEN12_GUNIT_NVM_BASE + pdev->resource[0].start; 110 + nvm->bar.end = nvm->bar.start + GEN12_GUNIT_NVM_SIZE - 1; 111 + nvm->bar.flags = IORESOURCE_MEM; 112 + nvm->bar.desc = IORES_DESC_NONE; 113 + nvm->regions = regions; 114 + 115 + nvm->bar2.parent = &pdev->resource[0]; 116 + nvm->bar2.start = GEN12_DEBUG_NVM_BASE + pdev->resource[0].start; 117 + nvm->bar2.end = nvm->bar2.start + GEN12_DEBUG_NVM_SIZE - 1; 118 + nvm->bar2.flags = IORESOURCE_MEM; 119 + nvm->bar2.desc = IORES_DESC_NONE; 120 + 121 + aux_dev = &nvm->aux_dev; 122 + 123 + aux_dev->name = "nvm"; 124 + aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | pci_dev_id(pdev); 125 + aux_dev->dev.parent = &pdev->dev; 126 + aux_dev->dev.release = xe_nvm_release_dev; 127 + 128 + ret = auxiliary_device_init(aux_dev); 129 + if (ret) { 130 + drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); 131 + goto err; 132 + } 133 + 134 + ret = auxiliary_device_add(aux_dev); 135 + if (ret) { 136 + drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); 137 + auxiliary_device_uninit(aux_dev); 138 + goto err; 139 + } 140 + return 0; 141 + 142 + err: 143 + kfree(nvm); 144 + xe->nvm = NULL; 145 + return ret; 146 + } 147 + 148 + void xe_nvm_fini(struct xe_device *xe) 149 + { 150 + struct intel_dg_nvm_dev *nvm = xe->nvm; 151 + 152 + if (!xe->info.has_gsc_nvm) 153 + return; 154 + 155 + /* No access to internal NVM from VFs */ 156 + if (IS_SRIOV_VF(xe)) 157 + return; 158 + 159 + /* Nvm pointer should not be NULL here */ 160 + if (WARN_ON(!nvm)) 161 + return; 162 + 163 + auxiliary_device_delete(&nvm->aux_dev); 164 + auxiliary_device_uninit(&nvm->aux_dev); 165 + kfree(nvm); 166 + xe->nvm = NULL; 167 + }

+15

drivers/gpu/drm/xe/xe_nvm.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright(c) 2019-2025 Intel Corporation. All rights reserved. 4 + */ 5 + 6 + #ifndef __XE_NVM_H__ 7 + #define __XE_NVM_H__ 8 + 9 + struct xe_device; 10 + 11 + int xe_nvm_init(struct xe_device *xe); 12 + 13 + void xe_nvm_fini(struct xe_device *xe); 14 + 15 + #endif

+5 -5

drivers/gpu/drm/xe/xe_oa.c

··· 403 403 static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) 404 404 { 405 405 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 406 - int size_exponent = __ffs(stream->oa_buffer.bo->size); 406 + int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo)); 407 407 u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; 408 408 struct xe_mmio *mmio = &stream->gt->mmio; 409 409 unsigned long flags; ··· 435 435 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 436 436 437 437 /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ 438 - memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); 438 + memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); 439 439 } 440 440 441 441 static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) ··· 1065 1065 static u32 oag_buf_size_select(const struct xe_oa_stream *stream) 1066 1066 { 1067 1067 return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, 1068 - stream->oa_buffer.bo->size > SZ_16M ? 1068 + xe_bo_size(stream->oa_buffer.bo) > SZ_16M ? 1069 1069 OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); 1070 1070 } 1071 1071 ··· 1582 1582 1583 1583 static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) 1584 1584 { 1585 - struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; 1585 + struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(stream->oa_buffer.bo), }; 1586 1586 void __user *uaddr = (void __user *)arg; 1587 1587 1588 1588 if (copy_to_user(uaddr, &info, sizeof(info))) ··· 1668 1668 } 1669 1669 1670 1670 /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ 1671 - if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { 1671 + if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) { 1672 1672 drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); 1673 1673 return -EINVAL; 1674 1674 }

+28 -49

drivers/gpu/drm/xe/xe_pci.c

··· 38 38 D3COLD_ENABLE, 39 39 }; 40 40 41 - struct xe_subplatform_desc { 42 - enum xe_subplatform subplatform; 43 - const char *name; 44 - const u16 *pciidlist; 45 - }; 46 - 47 - struct xe_device_desc { 48 - /* Should only ever be set for platforms without GMD_ID */ 49 - const struct xe_ip *pre_gmdid_graphics_ip; 50 - /* Should only ever be set for platforms without GMD_ID */ 51 - const struct xe_ip *pre_gmdid_media_ip; 52 - 53 - const char *platform_name; 54 - const struct xe_subplatform_desc *subplatforms; 55 - 56 - enum xe_platform platform; 57 - 58 - u8 dma_mask_size; 59 - u8 max_remote_tiles:2; 60 - 61 - u8 require_force_probe:1; 62 - u8 is_dgfx:1; 63 - 64 - u8 has_display:1; 65 - u8 has_fan_control:1; 66 - u8 has_heci_gscfi:1; 67 - u8 has_heci_cscfi:1; 68 - u8 has_llc:1; 69 - u8 has_mbx_power_limits:1; 70 - u8 has_pxp:1; 71 - u8 has_sriov:1; 72 - u8 needs_scratch:1; 73 - u8 skip_guc_pc:1; 74 - u8 skip_mtcfg:1; 75 - u8 skip_pcode:1; 76 - }; 77 - 78 41 __diag_push(); 79 42 __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); 80 43 ··· 103 140 .has_asid = 1, \ 104 141 .has_atomic_enable_pte_bit = 1, \ 105 142 .has_flat_ccs = 1, \ 106 - .has_indirect_ring_state = 1, \ 107 143 .has_range_tlb_invalidation = 1, \ 108 144 .has_usm = 1, \ 109 145 .has_64bit_timestamp = 1, \ ··· 146 184 { 2004, "Xe2_LPG", &graphics_xe2 }, 147 185 { 3000, "Xe3_LPG", &graphics_xe2 }, 148 186 { 3001, "Xe3_LPG", &graphics_xe2 }, 187 + { 3003, "Xe3_LPG", &graphics_xe2 }, 149 188 }; 150 189 151 190 /* Pre-GMDID Media IPs */ ··· 159 196 { 1301, "Xe2_HPM", &media_xelpmp }, 160 197 { 2000, "Xe2_LPM", &media_xelpmp }, 161 198 { 3000, "Xe3_LPM", &media_xelpmp }, 199 + { 3002, "Xe3_LPM", &media_xelpmp }, 162 200 }; 163 201 164 202 static const struct xe_device_desc tgl_desc = { ··· 169 205 .dma_mask_size = 39, 170 206 .has_display = true, 171 207 .has_llc = true, 208 + .max_gt_per_tile = 1, 172 209 .require_force_probe = true, 173 210 }; 174 211 ··· 180 215 .dma_mask_size = 39, 181 216 .has_display = true, 182 217 .has_llc = true, 218 + .max_gt_per_tile = 1, 183 219 .require_force_probe = true, 184 220 }; 185 221 ··· 193 227 .dma_mask_size = 39, 194 228 .has_display = true, 195 229 .has_llc = true, 230 + .max_gt_per_tile = 1, 196 231 .require_force_probe = true, 197 232 .subplatforms = (const struct xe_subplatform_desc[]) { 198 233 { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, ··· 210 243 .dma_mask_size = 39, 211 244 .has_display = true, 212 245 .has_llc = true, 246 + .max_gt_per_tile = 1, 213 247 .require_force_probe = true, 214 248 .subplatforms = (const struct xe_subplatform_desc[]) { 215 249 { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, ··· 225 257 .dma_mask_size = 39, 226 258 .has_display = true, 227 259 .has_llc = true, 260 + .max_gt_per_tile = 1, 228 261 .require_force_probe = true, 229 262 }; 230 263 ··· 239 270 PLATFORM(DG1), 240 271 .dma_mask_size = 39, 241 272 .has_display = true, 273 + .has_gsc_nvm = 1, 242 274 .has_heci_gscfi = 1, 275 + .max_gt_per_tile = 1, 243 276 .require_force_probe = true, 244 277 }; 245 278 ··· 252 281 #define DG2_FEATURES \ 253 282 DGFX_FEATURES, \ 254 283 PLATFORM(DG2), \ 284 + .has_gsc_nvm = 1, \ 255 285 .has_heci_gscfi = 1, \ 256 286 .subplatforms = (const struct xe_subplatform_desc[]) { \ 257 287 { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ ··· 265 293 .pre_gmdid_graphics_ip = &graphics_ip_xehpg, 266 294 .pre_gmdid_media_ip = &media_ip_xehpm, 267 295 .dma_mask_size = 46, 296 + .max_gt_per_tile = 1, 268 297 .require_force_probe = true, 269 298 270 299 DG2_FEATURES, ··· 276 303 .pre_gmdid_graphics_ip = &graphics_ip_xehpg, 277 304 .pre_gmdid_media_ip = &media_ip_xehpm, 278 305 .dma_mask_size = 46, 306 + .max_gt_per_tile = 1, 279 307 .require_force_probe = true, 280 308 281 309 DG2_FEATURES, ··· 291 317 PLATFORM(PVC), 292 318 .dma_mask_size = 52, 293 319 .has_display = false, 320 + .has_gsc_nvm = 1, 294 321 .has_heci_gscfi = 1, 322 + .max_gt_per_tile = 1, 295 323 .max_remote_tiles = 1, 296 324 .require_force_probe = true, 297 325 .has_mbx_power_limits = false, ··· 306 330 .dma_mask_size = 46, 307 331 .has_display = true, 308 332 .has_pxp = true, 333 + .max_gt_per_tile = 2, 309 334 }; 310 335 311 336 static const struct xe_device_desc lnl_desc = { ··· 314 337 .dma_mask_size = 46, 315 338 .has_display = true, 316 339 .has_pxp = true, 340 + .max_gt_per_tile = 2, 317 341 .needs_scratch = true, 318 342 }; 319 343 ··· 325 347 .has_display = true, 326 348 .has_fan_control = true, 327 349 .has_mbx_power_limits = true, 350 + .has_gsc_nvm = 1, 328 351 .has_heci_cscfi = 1, 352 + .max_gt_per_tile = 2, 329 353 .needs_scratch = true, 330 354 }; 331 355 ··· 336 356 .dma_mask_size = 46, 337 357 .has_display = true, 338 358 .has_sriov = true, 339 - .require_force_probe = true, 359 + .max_gt_per_tile = 2, 340 360 .needs_scratch = true, 341 361 }; 342 362 ··· 570 590 xe->info.is_dgfx = desc->is_dgfx; 571 591 xe->info.has_fan_control = desc->has_fan_control; 572 592 xe->info.has_mbx_power_limits = desc->has_mbx_power_limits; 593 + xe->info.has_gsc_nvm = desc->has_gsc_nvm; 573 594 xe->info.has_heci_gscfi = desc->has_heci_gscfi; 574 595 xe->info.has_heci_cscfi = desc->has_heci_cscfi; 575 596 xe->info.has_llc = desc->has_llc; ··· 584 603 xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && 585 604 xe_modparam.probe_display && 586 605 desc->has_display; 606 + 607 + xe_assert(xe, desc->max_gt_per_tile > 0); 608 + xe_assert(xe, desc->max_gt_per_tile <= XE_MAX_GT_PER_TILE); 609 + xe->info.max_gt_per_tile = desc->max_gt_per_tile; 587 610 xe->info.tile_count = 1 + desc->max_remote_tiles; 588 611 589 612 err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); ··· 687 702 */ 688 703 for_each_tile(tile, xe, id) { 689 704 gt = tile->primary_gt; 690 - gt->info.id = xe->info.gt_count++; 691 705 gt->info.type = XE_GT_TYPE_MAIN; 706 + gt->info.id = tile->id * xe->info.max_gt_per_tile; 692 707 gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; 693 708 gt->info.engine_mask = graphics_desc->hw_engine_mask; 709 + xe->info.gt_count++; 694 710 695 711 if (MEDIA_VER(xe) < 13 && media_desc) 696 712 gt->info.engine_mask |= media_desc->hw_engine_mask; ··· 709 723 710 724 gt = tile->media_gt; 711 725 gt->info.type = XE_GT_TYPE_MEDIA; 726 + gt->info.id = tile->id * xe->info.max_gt_per_tile + 1; 712 727 gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; 713 728 gt->info.engine_mask = media_desc->hw_engine_mask; 714 - 715 - /* 716 - * FIXME: At the moment multi-tile and standalone media are 717 - * mutually exclusive on current platforms. We'll need to 718 - * come up with a better way to number GTs if we ever wind 719 - * up with platforms that support both together. 720 - */ 721 - drm_WARN_ON(&xe->drm, id != 0); 722 - gt->info.id = xe->info.gt_count++; 729 + xe->info.gt_count++; 723 730 } 724 731 725 732 return 0;

+41

drivers/gpu/drm/xe/xe_pci_types.h

··· 8 8 9 9 #include <linux/types.h> 10 10 11 + #include "xe_platform_types.h" 12 + 13 + struct xe_subplatform_desc { 14 + enum xe_subplatform subplatform; 15 + const char *name; 16 + const u16 *pciidlist; 17 + }; 18 + 19 + struct xe_device_desc { 20 + /* Should only ever be set for platforms without GMD_ID */ 21 + const struct xe_ip *pre_gmdid_graphics_ip; 22 + /* Should only ever be set for platforms without GMD_ID */ 23 + const struct xe_ip *pre_gmdid_media_ip; 24 + 25 + const char *platform_name; 26 + const struct xe_subplatform_desc *subplatforms; 27 + 28 + enum xe_platform platform; 29 + 30 + u8 dma_mask_size; 31 + u8 max_remote_tiles:2; 32 + u8 max_gt_per_tile:2; 33 + 34 + u8 require_force_probe:1; 35 + u8 is_dgfx:1; 36 + 37 + u8 has_display:1; 38 + u8 has_fan_control:1; 39 + u8 has_gsc_nvm:1; 40 + u8 has_heci_gscfi:1; 41 + u8 has_heci_cscfi:1; 42 + u8 has_llc:1; 43 + u8 has_mbx_power_limits:1; 44 + u8 has_pxp:1; 45 + u8 has_sriov:1; 46 + u8 needs_scratch:1; 47 + u8 skip_guc_pc:1; 48 + u8 skip_mtcfg:1; 49 + u8 skip_pcode:1; 50 + }; 51 + 11 52 struct xe_graphics_desc { 12 53 u8 va_bits; 13 54 u8 vm_max_level;

+15

drivers/gpu/drm/xe/xe_pcode_api.h

··· 50 50 #define READ_PL_FROM_FW 0x1 51 51 #define READ_PL_FROM_PCODE 0x0 52 52 53 + #define PCODE_LATE_BINDING 0x5C 54 + #define GET_CAPABILITY_STATUS 0x0 55 + #define V1_FAN_SUPPORTED REG_BIT(0) 56 + #define VR_PARAMS_SUPPORTED REG_BIT(3) 57 + #define V1_FAN_PROVISIONED REG_BIT(16) 58 + #define VR_PARAMS_PROVISIONED REG_BIT(19) 59 + #define GET_VERSION_LOW 0x1 60 + #define GET_VERSION_HIGH 0x2 61 + #define MAJOR_VERSION_MASK REG_GENMASK(31, 16) 62 + #define MINOR_VERSION_MASK REG_GENMASK(15, 0) 63 + #define HOTFIX_VERSION_MASK REG_GENMASK(31, 16) 64 + #define BUILD_VERSION_MASK REG_GENMASK(15, 0) 65 + #define FAN_TABLE 1 66 + #define VR_CONFIG 2 67 + 53 68 #define PCODE_FREQUENCY_CONFIG 0x6e 54 69 /* Frequency Config Sub Commands (param1) */ 55 70 #define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0

+15 -5

drivers/gpu/drm/xe/xe_pm.c

··· 19 19 #include "xe_ggtt.h" 20 20 #include "xe_gt.h" 21 21 #include "xe_guc.h" 22 + #include "xe_i2c.h" 22 23 #include "xe_irq.h" 23 24 #include "xe_pcode.h" 24 25 #include "xe_pxp.h" ··· 135 134 /* FIXME: Super racey... */ 136 135 err = xe_bo_evict_all(xe); 137 136 if (err) 138 - goto err_pxp; 137 + goto err_display; 139 138 140 139 for_each_gt(gt, xe, id) { 141 140 err = xe_gt_suspend(gt); ··· 147 146 148 147 xe_display_pm_suspend_late(xe); 149 148 149 + xe_i2c_pm_suspend(xe); 150 + 150 151 drm_dbg(&xe->drm, "Device suspended\n"); 151 152 return 0; 152 153 153 154 err_display: 154 155 xe_display_pm_resume(xe); 155 - err_pxp: 156 156 xe_pxp_pm_resume(xe->pxp); 157 157 err: 158 158 drm_dbg(&xe->drm, "Device suspend failed %d\n", err); ··· 192 190 err = xe_bo_restore_early(xe); 193 191 if (err) 194 192 goto err; 193 + 194 + xe_i2c_pm_resume(xe, xe->d3cold.allowed); 195 195 196 196 xe_irq_resume(xe); 197 197 ··· 492 488 493 489 xe_display_pm_runtime_suspend_late(xe); 494 490 491 + xe_i2c_pm_suspend(xe); 492 + 495 493 xe_rpm_lockmap_release(xe); 496 494 xe_pm_write_callback_task(xe, NULL); 497 495 return 0; ··· 540 534 if (err) 541 535 goto out; 542 536 } 537 + 538 + xe_i2c_pm_resume(xe, xe->d3cold.allowed); 543 539 544 540 xe_irq_resume(xe); 545 541 ··· 761 753 } 762 754 763 755 /** 764 - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold 756 + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold 765 757 * @xe: xe device instance 766 - * @threshold: VRAM size in bites for the D3cold threshold 758 + * @threshold: VRAM size in MiB for the D3cold threshold 767 759 * 768 - * Returns 0 for success, negative error code otherwise. 760 + * Return: 761 + * * 0 - success 762 + * * -EINVAL - invalid argument 769 763 */ 770 764 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) 771 765 {

+5 -2

drivers/gpu/drm/xe/xe_pmu.c

··· 157 157 return true; 158 158 } 159 159 160 - static bool event_supported(struct xe_pmu *pmu, unsigned int gt, 160 + static bool event_supported(struct xe_pmu *pmu, unsigned int gt_id, 161 161 unsigned int id) 162 162 { 163 - if (gt >= XE_MAX_GT_PER_TILE) 163 + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); 164 + struct xe_gt *gt = xe_device_get_gt(xe, gt_id); 165 + 166 + if (!gt) 164 167 return false; 165 168 166 169 return id < sizeof(pmu->supported_events) * BITS_PER_BYTE &&

+16 -13

drivers/gpu/drm/xe/xe_query.c

··· 141 141 return -EINVAL; 142 142 143 143 eci = &resp.eci; 144 - if (eci->gt_id >= XE_MAX_GT_PER_TILE) 144 + if (eci->gt_id >= xe->info.max_gt_per_tile) 145 145 return -EINVAL; 146 146 147 147 gt = xe_device_get_gt(xe, eci->gt_id); ··· 368 368 struct drm_xe_query_gt_list __user *query_ptr = 369 369 u64_to_user_ptr(query->data); 370 370 struct drm_xe_query_gt_list *gt_list; 371 + int iter = 0; 371 372 u8 id; 372 373 373 374 if (query->size == 0) { ··· 386 385 387 386 for_each_gt(gt, xe, id) { 388 387 if (xe_gt_is_media_type(gt)) 389 - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; 388 + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MEDIA; 390 389 else 391 - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; 392 - gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; 393 - gt_list->gt_list[id].gt_id = gt->info.id; 394 - gt_list->gt_list[id].reference_clock = gt->info.reference_clock; 390 + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MAIN; 391 + gt_list->gt_list[iter].tile_id = gt_to_tile(gt)->id; 392 + gt_list->gt_list[iter].gt_id = gt->info.id; 393 + gt_list->gt_list[iter].reference_clock = gt->info.reference_clock; 395 394 /* 396 395 * The mem_regions indexes in the mask below need to 397 396 * directly identify the struct ··· 407 406 * assumption. 408 407 */ 409 408 if (!IS_DGFX(xe)) 410 - gt_list->gt_list[id].near_mem_regions = 0x1; 409 + gt_list->gt_list[iter].near_mem_regions = 0x1; 411 410 else 412 - gt_list->gt_list[id].near_mem_regions = 411 + gt_list->gt_list[iter].near_mem_regions = 413 412 BIT(gt_to_tile(gt)->id) << 1; 414 - gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ 415 - gt_list->gt_list[id].near_mem_regions; 413 + gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ 414 + gt_list->gt_list[iter].near_mem_regions; 416 415 417 - gt_list->gt_list[id].ip_ver_major = 416 + gt_list->gt_list[iter].ip_ver_major = 418 417 REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); 419 - gt_list->gt_list[id].ip_ver_minor = 418 + gt_list->gt_list[iter].ip_ver_minor = 420 419 REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); 421 - gt_list->gt_list[id].ip_ver_rev = 420 + gt_list->gt_list[iter].ip_ver_rev = 422 421 REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); 422 + 423 + iter++; 423 424 } 424 425 425 426 if (copy_to_user(query_ptr, gt_list, size)) {

+91 -104

drivers/gpu/drm/xe/xe_sriov_vf.c

··· 147 147 xe_sriov_info(xe, "migration not supported by this module version\n"); 148 148 } 149 149 150 - /** 151 - * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. 150 + static bool gt_vf_post_migration_needed(struct xe_gt *gt) 151 + { 152 + return test_bit(gt->info.id, &gt_to_xe(gt)->sriov.vf.migration.gt_flags); 153 + } 154 + 155 + /* 156 + * Notify GuCs marked in flags about resource fixups apply finished. 152 157 * @xe: the &xe_device struct instance 158 + * @gt_flags: flags marking to which GTs the notification shall be sent 159 + */ 160 + static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags) 161 + { 162 + struct xe_gt *gt; 163 + unsigned int id; 164 + int err = 0; 165 + 166 + for_each_gt(gt, xe, id) { 167 + if (!test_bit(id, &gt_flags)) 168 + continue; 169 + /* skip asking GuC for RESFIX exit if new recovery request arrived */ 170 + if (gt_vf_post_migration_needed(gt)) 171 + continue; 172 + err = xe_gt_sriov_vf_notify_resfix_done(gt); 173 + if (err) 174 + break; 175 + clear_bit(id, &gt_flags); 176 + } 177 + 178 + if (gt_flags && !err) 179 + drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n"); 180 + return err; 181 + } 182 + 183 + static int vf_get_next_migrated_gt_id(struct xe_device *xe) 184 + { 185 + struct xe_gt *gt; 186 + unsigned int id; 187 + 188 + for_each_gt(gt, xe, id) { 189 + if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags)) 190 + return id; 191 + } 192 + return -1; 193 + } 194 + 195 + /** 196 + * Perform post-migration fixups on a single GT. 153 197 * 154 - * After migration, we need to re-query all VF configuration to make sure 155 - * they match previous provisioning. Note that most of VF provisioning 156 - * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. 198 + * After migration, GuC needs to be re-queried for VF configuration to check 199 + * if it matches previous provisioning. Most of VF provisioning shall be the 200 + * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT 201 + * range has changed, we have to perform fixups - shift all GGTT references 202 + * used anywhere within the driver. After the fixups in this function succeed, 203 + * it is allowed to ask the GuC bound to this GT to continue normal operation. 157 204 * 158 205 * Returns: 0 if the operation completed successfully, or a negative error 159 206 * code otherwise. 160 207 */ 161 - static int vf_post_migration_requery_guc(struct xe_device *xe) 208 + static int gt_vf_post_migration_fixups(struct xe_gt *gt) 162 209 { 163 - struct xe_gt *gt; 164 - unsigned int id; 165 - int err, ret = 0; 210 + s64 shift; 211 + int err; 166 212 167 - for_each_gt(gt, xe, id) { 168 - err = xe_gt_sriov_vf_query_config(gt); 169 - ret = ret ?: err; 170 - } 213 + err = xe_gt_sriov_vf_query_config(gt); 214 + if (err) 215 + return err; 171 216 172 - return ret; 173 - } 174 - 175 - static void vf_post_migration_fixup_ctb(struct xe_device *xe) 176 - { 177 - struct xe_gt *gt; 178 - unsigned int id; 179 - 180 - xe_assert(xe, IS_SRIOV_VF(xe)); 181 - 182 - for_each_gt(gt, xe, id) { 183 - s32 shift = xe_gt_sriov_vf_ggtt_shift(gt); 184 - 217 + shift = xe_gt_sriov_vf_ggtt_shift(gt); 218 + if (shift) { 219 + xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); 220 + /* FIXME: add the recovery steps */ 185 221 xe_guc_ct_fixup_messages_with_ggtt(&gt->uc.guc.ct, shift); 186 222 } 187 - } 188 - 189 - /* 190 - * vf_post_migration_imminent - Check if post-restore recovery is coming. 191 - * @xe: the &xe_device struct instance 192 - * 193 - * Return: True if migration recovery worker will soon be running. Any worker currently 194 - * executing does not affect the result. 195 - */ 196 - static bool vf_post_migration_imminent(struct xe_device *xe) 197 - { 198 - return xe->sriov.vf.migration.gt_flags != 0 || 199 - work_pending(&xe->sriov.vf.migration.worker); 200 - } 201 - 202 - static bool vf_post_migration_fixup_ggtt_nodes(struct xe_device *xe) 203 - { 204 - bool need_fixups = false; 205 - struct xe_tile *tile; 206 - unsigned int id; 207 - 208 - for_each_tile(tile, xe, id) { 209 - struct xe_gt *gt = tile->primary_gt; 210 - s64 shift; 211 - 212 - shift = xe_gt_sriov_vf_ggtt_shift(gt); 213 - if (shift) { 214 - need_fixups = true; 215 - xe_tile_sriov_vf_fixup_ggtt_nodes(tile, shift); 216 - } 217 - } 218 - return need_fixups; 219 - } 220 - 221 - /* 222 - * Notify all GuCs about resource fixups apply finished. 223 - */ 224 - static void vf_post_migration_notify_resfix_done(struct xe_device *xe) 225 - { 226 - struct xe_gt *gt; 227 - unsigned int id; 228 - 229 - for_each_gt(gt, xe, id) { 230 - if (vf_post_migration_imminent(xe)) 231 - goto skip; 232 - xe_gt_sriov_vf_notify_resfix_done(gt); 233 - } 234 - return; 235 - 236 - skip: 237 - drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); 223 + return 0; 238 224 } 239 225 240 226 static void vf_post_migration_recovery(struct xe_device *xe) 241 227 { 242 - bool need_fixups; 243 - int err; 228 + unsigned long fixed_gts = 0; 229 + int id, err; 244 230 245 231 drm_dbg(&xe->drm, "migration recovery in progress\n"); 246 232 xe_pm_runtime_get(xe); 247 - err = vf_post_migration_requery_guc(xe); 248 - if (vf_post_migration_imminent(xe)) 249 - goto defer; 250 - if (unlikely(err)) 251 - goto fail; 233 + 252 234 if (!vf_migration_supported(xe)) { 253 235 xe_sriov_err(xe, "migration not supported by this module version\n"); 254 236 err = -ENOTRECOVERABLE; 255 237 goto fail; 256 238 } 257 239 258 - need_fixups = vf_post_migration_fixup_ggtt_nodes(xe); 259 - /* FIXME: add the recovery steps */ 260 - if (need_fixups) 261 - vf_post_migration_fixup_ctb(xe); 240 + while (id = vf_get_next_migrated_gt_id(xe), id >= 0) { 241 + struct xe_gt *gt = xe_device_get_gt(xe, id); 262 242 263 - vf_post_migration_notify_resfix_done(xe); 243 + err = gt_vf_post_migration_fixups(gt); 244 + if (err) 245 + goto fail; 246 + 247 + set_bit(id, &fixed_gts); 248 + } 249 + 250 + err = vf_post_migration_notify_resfix_done(xe, fixed_gts); 251 + if (err) 252 + goto fail; 253 + 264 254 xe_pm_runtime_put(xe); 265 255 drm_notice(&xe->drm, "migration recovery ended\n"); 266 - return; 267 - defer: 268 - xe_pm_runtime_put(xe); 269 - drm_dbg(&xe->drm, "migration recovery deferred\n"); 270 256 return; 271 257 fail: 272 258 xe_pm_runtime_put(xe); ··· 268 282 vf_post_migration_recovery(xe); 269 283 } 270 284 271 - static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) 285 + /* 286 + * Check if post-restore recovery is coming on any of GTs. 287 + * @xe: the &xe_device struct instance 288 + * 289 + * Return: True if migration recovery worker will soon be running. Any worker currently 290 + * executing does not affect the result. 291 + */ 292 + static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe) 272 293 { 273 294 struct xe_gt *gt; 274 295 unsigned int id; 275 296 276 297 for_each_gt(gt, xe, id) { 277 - if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { 278 - xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); 279 - return false; 280 - } 298 + if (test_bit(id, &xe->sriov.vf.migration.gt_flags)) 299 + return true; 281 300 } 282 - return true; 301 + return false; 283 302 } 284 303 285 304 /** ··· 299 308 300 309 xe_assert(xe, IS_SRIOV_VF(xe)); 301 310 302 - if (!vf_ready_to_recovery_on_all_gts(xe)) 311 + if (!vf_ready_to_recovery_on_any_gts(xe)) 303 312 return; 304 - 305 - WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); 306 - /* Ensure other threads see that no flags are set now. */ 307 - smp_mb(); 308 313 309 314 started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); 310 315 drm_info(&xe->drm, "VF migration recovery %s\n", started ?

+2

drivers/gpu/drm/xe/xe_step.c

··· 5 5 6 6 #include "xe_step.h" 7 7 8 + #include <kunit/visibility.h> 8 9 #include <linux/bitfield.h> 9 10 10 11 #include "xe_device.h" ··· 256 255 return "**"; 257 256 } 258 257 } 258 + EXPORT_SYMBOL_IF_KUNIT(xe_step_name);

+11 -8

drivers/gpu/drm/xe/xe_survivability_mode.c

··· 14 14 #include "xe_device.h" 15 15 #include "xe_gt.h" 16 16 #include "xe_heci_gsc.h" 17 + #include "xe_i2c.h" 17 18 #include "xe_mmio.h" 18 19 #include "xe_pcode_api.h" 19 20 #include "xe_vsec.h" ··· 174 173 survivability->mode = true; 175 174 176 175 ret = xe_heci_gsc_init(xe); 177 - if (ret) { 178 - /* 179 - * But if it fails, device can't enter survivability 180 - * so move it back for correct error handling 181 - */ 182 - survivability->mode = false; 183 - return ret; 184 - } 176 + if (ret) 177 + goto err; 185 178 186 179 xe_vsec_init(xe); 180 + 181 + ret = xe_i2c_probe(xe); 182 + if (ret) 183 + goto err; 187 184 188 185 dev_err(dev, "In Survivability Mode\n"); 189 186 190 187 return 0; 188 + 189 + err: 190 + survivability->mode = false; 191 + return ret; 191 192 } 192 193 193 194 /**

+75 -50

drivers/gpu/drm/xe/xe_svm.c

··· 3 3 * Copyright © 2024 Intel Corporation 4 4 */ 5 5 6 + #include <drm/drm_drv.h> 7 + 6 8 #include "xe_bo.h" 7 9 #include "xe_gt_stats.h" 8 10 #include "xe_gt_tlb_invalidation.h" 9 11 #include "xe_migrate.h" 10 12 #include "xe_module.h" 13 + #include "xe_pm.h" 11 14 #include "xe_pt.h" 12 15 #include "xe_svm.h" 16 + #include "xe_tile.h" 13 17 #include "xe_ttm_vram_mgr.h" 14 18 #include "xe_vm.h" 15 19 #include "xe_vm_types.h" ··· 299 295 up_write(&vm->lock); 300 296 } 301 297 302 - #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) 298 + #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 303 299 304 300 static struct xe_vram_region *page_to_vr(struct page *page) 305 301 { ··· 487 483 return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); 488 484 } 489 485 490 - static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation) 486 + static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) 491 487 { 492 488 return container_of(devmem_allocation, struct xe_bo, devmem_allocation); 493 489 } 494 490 495 - static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation) 491 + static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) 496 492 { 497 493 struct xe_bo *bo = to_xe_bo(devmem_allocation); 494 + struct xe_device *xe = xe_bo_device(bo); 498 495 499 496 xe_bo_put_async(bo); 497 + xe_pm_runtime_put(xe); 500 498 } 501 499 502 500 static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) ··· 511 505 return &tile->mem.vram.ttm.mm; 512 506 } 513 507 514 - static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation, 508 + static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, 515 509 unsigned long npages, unsigned long *pfn) 516 510 { 517 511 struct xe_bo *bo = to_xe_bo(devmem_allocation); ··· 534 528 return 0; 535 529 } 536 530 537 - static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { 531 + static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = { 538 532 .devmem_release = xe_svm_devmem_release, 539 533 .populate_devmem_pfn = xe_svm_populate_devmem_pfn, 540 534 .copy_to_devmem = xe_svm_copy_to_devmem, ··· 682 676 min(end, xe_vma_end(vma))); 683 677 } 684 678 685 - #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) 679 + #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 686 680 static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) 687 681 { 688 682 return &tile->mem.vram; 689 683 } 690 684 691 - /** 692 - * xe_svm_alloc_vram()- Allocate device memory pages for range, 693 - * migrating existing data. 694 - * @vm: The VM. 695 - * @tile: tile to allocate vram from 696 - * @range: SVM range 697 - * @ctx: DRM GPU SVM context 698 - * 699 - * Return: 0 on success, error code on failure. 700 - */ 701 - int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 702 - struct xe_svm_range *range, 703 - const struct drm_gpusvm_ctx *ctx) 685 + static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, 686 + unsigned long start, unsigned long end, 687 + struct mm_struct *mm, 688 + unsigned long timeslice_ms) 704 689 { 705 - struct mm_struct *mm = vm->svm.gpusvm.mm; 690 + struct xe_tile *tile = container_of(dpagemap, typeof(*tile), mem.vram.dpagemap); 691 + struct xe_device *xe = tile_to_xe(tile); 692 + struct device *dev = xe->drm.dev; 706 693 struct xe_vram_region *vr = tile_to_vr(tile); 707 694 struct drm_buddy_block *block; 708 695 struct list_head *blocks; 709 696 struct xe_bo *bo; 710 - ktime_t end = 0; 711 - int err; 697 + ktime_t time_end = 0; 698 + int err, idx; 712 699 713 - range_debug(range, "ALLOCATE VRAM"); 700 + if (!drm_dev_enter(&xe->drm, &idx)) 701 + return -ENODEV; 714 702 715 - if (!mmget_not_zero(mm)) 716 - return -EFAULT; 717 - mmap_read_lock(mm); 703 + xe_pm_runtime_get(xe); 718 704 719 - retry: 720 - bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, 721 - xe_svm_range_size(range), 705 + retry: 706 + bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, end - start, 722 707 ttm_bo_type_device, 723 708 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 724 709 XE_BO_FLAG_CPU_ADDR_MIRROR); 725 710 if (IS_ERR(bo)) { 726 711 err = PTR_ERR(bo); 727 - if (xe_vm_validate_should_retry(NULL, err, &end)) 712 + if (xe_vm_validate_should_retry(NULL, err, &time_end)) 728 713 goto retry; 729 - goto unlock; 714 + goto out_pm_put; 730 715 } 731 716 732 - drm_gpusvm_devmem_init(&bo->devmem_allocation, 733 - vm->xe->drm.dev, mm, 734 - &gpusvm_devmem_ops, 735 - &tile->mem.vram.dpagemap, 736 - xe_svm_range_size(range)); 717 + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, 718 + &dpagemap_devmem_ops, 719 + &tile->mem.vram.dpagemap, 720 + end - start); 737 721 738 722 blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; 739 723 list_for_each_entry(block, blocks, link) 740 724 block->private = vr; 741 725 742 726 xe_bo_get(bo); 743 - err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, 744 - &bo->devmem_allocation, ctx); 727 + 728 + /* Ensure the device has a pm ref while there are device pages active. */ 729 + xe_pm_runtime_get_noresume(xe); 730 + err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, 731 + start, end, timeslice_ms, 732 + xe_svm_devm_owner(xe)); 745 733 if (err) 746 734 xe_svm_devmem_release(&bo->devmem_allocation); 747 735 748 736 xe_bo_unlock(bo); 749 737 xe_bo_put(bo); 750 738 751 - unlock: 752 - mmap_read_unlock(mm); 753 - mmput(mm); 739 + out_pm_put: 740 + xe_pm_runtime_put(xe); 741 + drm_dev_exit(idx); 754 742 755 743 return err; 756 744 } ··· 810 810 struct drm_gpusvm_ctx ctx = { 811 811 .read_only = xe_vma_read_only(vma), 812 812 .devmem_possible = IS_DGFX(vm->xe) && 813 - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), 813 + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), 814 814 .check_pages_threshold = IS_DGFX(vm->xe) && 815 - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, 815 + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0, 816 816 .devmem_only = atomic && IS_DGFX(vm->xe) && 817 - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), 817 + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), 818 818 .timeslice_ms = atomic && IS_DGFX(vm->xe) && 819 - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 819 + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? 820 820 vm->xe->atomic_svm_timeslice_ms : 0, 821 821 }; 822 822 struct xe_svm_range *range; ··· 852 852 853 853 if (--migrate_try_count >= 0 && 854 854 xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) { 855 - err = xe_svm_alloc_vram(vm, tile, range, &ctx); 855 + err = xe_svm_alloc_vram(tile, range, &ctx); 856 856 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 857 857 if (err) { 858 858 if (migrate_try_count || !ctx.devmem_only) { ··· 944 944 */ 945 945 int xe_svm_bo_evict(struct xe_bo *bo) 946 946 { 947 - return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); 947 + return drm_pagemap_evict_to_ram(&bo->devmem_allocation); 948 948 } 949 949 950 950 /** ··· 997 997 return err; 998 998 } 999 999 1000 - #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) 1000 + #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 1001 + 1002 + /** 1003 + * xe_svm_alloc_vram()- Allocate device memory pages for range, 1004 + * migrating existing data. 1005 + * @tile: tile to allocate vram from 1006 + * @range: SVM range 1007 + * @ctx: DRM GPU SVM context 1008 + * 1009 + * Return: 0 on success, error code on failure. 1010 + */ 1011 + int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, 1012 + const struct drm_gpusvm_ctx *ctx) 1013 + { 1014 + struct drm_pagemap *dpagemap; 1015 + 1016 + xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem); 1017 + range_debug(range, "ALLOCATE VRAM"); 1018 + 1019 + dpagemap = xe_tile_local_pagemap(tile); 1020 + return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), 1021 + xe_svm_range_end(range), 1022 + range->base.gpusvm->mm, 1023 + ctx->timeslice_ms); 1024 + } 1001 1025 1002 1026 static struct drm_pagemap_device_addr 1003 1027 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, ··· 1047 1023 1048 1024 static const struct drm_pagemap_ops xe_drm_pagemap_ops = { 1049 1025 .device_map = xe_drm_pagemap_device_map, 1026 + .populate_mm = xe_drm_pagemap_populate_mm, 1050 1027 }; 1051 1028 1052 1029 /** ··· 1079 1054 vr->pagemap.range.start = res->start; 1080 1055 vr->pagemap.range.end = res->end; 1081 1056 vr->pagemap.nr_range = 1; 1082 - vr->pagemap.ops = drm_gpusvm_pagemap_ops_get(); 1057 + vr->pagemap.ops = drm_pagemap_pagemap_ops_get(); 1083 1058 vr->pagemap.owner = xe_svm_devm_owner(xe); 1084 1059 addr = devm_memremap_pages(dev, &vr->pagemap); 1085 1060 ··· 1100 1075 return 0; 1101 1076 } 1102 1077 #else 1103 - int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 1078 + int xe_svm_alloc_vram(struct xe_tile *tile, 1104 1079 struct xe_svm_range *range, 1105 1080 const struct drm_gpusvm_ctx *ctx) 1106 1081 {

+4 -6

drivers/gpu/drm/xe/xe_svm.h

··· 70 70 71 71 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); 72 72 73 - int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 74 - struct xe_svm_range *range, 73 + int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, 75 74 const struct drm_gpusvm_ctx *ctx); 76 75 77 76 struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, ··· 236 237 { 237 238 } 238 239 239 - static inline 240 - int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 241 - struct xe_svm_range *range, 242 - const struct drm_gpusvm_ctx *ctx) 240 + static inline int 241 + xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, 242 + const struct drm_gpusvm_ctx *ctx) 243 243 { 244 244 return -EOPNOTSUPP; 245 245 }

+7

drivers/gpu/drm/xe/xe_tile.c

··· 10 10 #include "xe_device.h" 11 11 #include "xe_ggtt.h" 12 12 #include "xe_gt.h" 13 + #include "xe_memirq.h" 13 14 #include "xe_migrate.h" 14 15 #include "xe_pcode.h" 15 16 #include "xe_sa.h" ··· 175 174 176 175 int xe_tile_init(struct xe_tile *tile) 177 176 { 177 + int err; 178 + 179 + err = xe_memirq_init(&tile->memirq); 180 + if (err) 181 + return err; 182 + 178 183 tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); 179 184 if (IS_ERR(tile->mem.kernel_bb_pool)) 180 185 return PTR_ERR(tile->mem.kernel_bb_pool);

+11

drivers/gpu/drm/xe/xe_tile.h

··· 16 16 17 17 void xe_tile_migrate_wait(struct xe_tile *tile); 18 18 19 + #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 20 + static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) 21 + { 22 + return &tile->mem.vram.dpagemap; 23 + } 24 + #else 25 + static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) 26 + { 27 + return NULL; 28 + } 29 + #endif 19 30 #endif

+2 -2

drivers/gpu/drm/xe/xe_trace_bo.h

··· 33 33 34 34 TP_fast_assign( 35 35 __assign_str(dev); 36 - __entry->size = bo->size; 36 + __entry->size = xe_bo_size(bo); 37 37 __entry->flags = bo->flags; 38 38 __entry->vm = bo->vm; 39 39 ), ··· 73 73 74 74 TP_fast_assign( 75 75 __entry->bo = bo; 76 - __entry->size = bo->size; 76 + __entry->size = xe_bo_size(bo); 77 77 __assign_str(new_placement_name); 78 78 __assign_str(old_placement_name); 79 79 __assign_str(device_id);

+41 -37

drivers/gpu/drm/xe/xe_uc.c

··· 33 33 } 34 34 35 35 /* Should be called once at driver load only */ 36 + int xe_uc_init_noalloc(struct xe_uc *uc) 37 + { 38 + int ret; 39 + 40 + ret = xe_guc_init_noalloc(&uc->guc); 41 + if (ret) 42 + goto err; 43 + 44 + /* HuC and GSC have no early dependencies and will be initialized during xe_uc_init(). */ 45 + return 0; 46 + 47 + err: 48 + xe_gt_err(uc_to_gt(uc), "Failed to early initialize uC (%pe)\n", ERR_PTR(ret)); 49 + return ret; 50 + } 51 + 36 52 int xe_uc_init(struct xe_uc *uc) 37 53 { 38 54 int ret; ··· 72 56 if (!xe_device_uc_enabled(uc_to_xe(uc))) 73 57 return 0; 74 58 75 - if (IS_SRIOV_VF(uc_to_xe(uc))) 76 - return 0; 59 + if (!IS_SRIOV_VF(uc_to_xe(uc))) { 60 + ret = xe_wopcm_init(&uc->wopcm); 61 + if (ret) 62 + goto err; 63 + } 77 64 78 - ret = xe_wopcm_init(&uc->wopcm); 65 + ret = xe_guc_min_load_for_hwconfig(&uc->guc); 79 66 if (ret) 80 67 goto err; 81 68 82 69 return 0; 83 - 84 70 err: 85 71 xe_gt_err(uc_to_gt(uc), "Failed to initialize uC (%pe)\n", ERR_PTR(ret)); 86 72 return ret; ··· 144 126 return uc_reset(uc); 145 127 } 146 128 147 - /** 148 - * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig 149 - * @uc: The UC object 150 - * 151 - * Return: 0 on success, negative error code on error. 152 - */ 153 - int xe_uc_init_hwconfig(struct xe_uc *uc) 154 - { 155 - int ret; 156 - 157 - /* GuC submission not enabled, nothing to do */ 158 - if (!xe_device_uc_enabled(uc_to_xe(uc))) 159 - return 0; 160 - 161 - ret = xe_guc_min_load_for_hwconfig(&uc->guc); 162 - if (ret) 163 - return ret; 164 - 165 - return 0; 166 - } 167 - 168 - static int vf_uc_init_hw(struct xe_uc *uc) 129 + static int vf_uc_load_hw(struct xe_uc *uc) 169 130 { 170 131 int err; 171 132 ··· 158 161 159 162 err = xe_gt_sriov_vf_connect(uc_to_gt(uc)); 160 163 if (err) 161 - return err; 164 + goto err_out; 162 165 163 166 uc->guc.submission_state.enabled = true; 164 167 165 - err = xe_gt_record_default_lrcs(uc_to_gt(uc)); 168 + err = xe_guc_opt_in_features_enable(&uc->guc); 166 169 if (err) 167 170 return err; 168 171 172 + err = xe_gt_record_default_lrcs(uc_to_gt(uc)); 173 + if (err) 174 + goto err_out; 175 + 169 176 return 0; 177 + 178 + err_out: 179 + xe_guc_sanitize(&uc->guc); 180 + return err; 170 181 } 171 182 172 183 /* 173 184 * Should be called during driver load, after every GT reset, and after every 174 185 * suspend to reload / auth the firmwares. 175 186 */ 176 - int xe_uc_init_hw(struct xe_uc *uc) 187 + int xe_uc_load_hw(struct xe_uc *uc) 177 188 { 178 189 int ret; 179 190 ··· 190 185 return 0; 191 186 192 187 if (IS_SRIOV_VF(uc_to_xe(uc))) 193 - return vf_uc_init_hw(uc); 188 + return vf_uc_load_hw(uc); 194 189 195 190 ret = xe_huc_upload(&uc->huc); 196 191 if (ret) ··· 206 201 207 202 ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); 208 203 if (ret) 209 - return ret; 204 + goto err_out; 210 205 211 206 ret = xe_guc_post_load_init(&uc->guc); 212 207 if (ret) 213 - return ret; 208 + goto err_out; 214 209 215 210 ret = xe_guc_pc_start(&uc->guc.pc); 216 211 if (ret) 217 - return ret; 212 + goto err_out; 218 213 219 214 xe_guc_engine_activity_enable_stats(&uc->guc); 220 215 ··· 226 221 xe_gsc_load_start(&uc->gsc); 227 222 228 223 return 0; 229 - } 230 224 231 - int xe_uc_fini_hw(struct xe_uc *uc) 232 - { 233 - return xe_uc_sanitize_reset(uc); 225 + err_out: 226 + xe_guc_sanitize(&uc->guc); 227 + return ret; 234 228 } 235 229 236 230 int xe_uc_reset_prepare(struct xe_uc *uc)

+2 -3

drivers/gpu/drm/xe/xe_uc.h

··· 8 8 9 9 struct xe_uc; 10 10 11 + int xe_uc_init_noalloc(struct xe_uc *uc); 11 12 int xe_uc_init(struct xe_uc *uc); 12 - int xe_uc_init_hwconfig(struct xe_uc *uc); 13 13 int xe_uc_init_post_hwconfig(struct xe_uc *uc); 14 - int xe_uc_init_hw(struct xe_uc *uc); 15 - int xe_uc_fini_hw(struct xe_uc *uc); 14 + int xe_uc_load_hw(struct xe_uc *uc); 16 15 void xe_uc_gucrc_disable(struct xe_uc *uc); 17 16 int xe_uc_reset_prepare(struct xe_uc *uc); 18 17 void xe_uc_stop_prepare(struct xe_uc *uc);

+5 -3

drivers/gpu/drm/xe/xe_uc_fw.c

··· 115 115 #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED 116 116 117 117 #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ 118 - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ 119 - fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ 118 + fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 47, 0)) \ 119 + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ 120 + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ 120 121 fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ 121 - fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ 122 + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ 122 123 fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ 123 124 fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ 124 125 fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ ··· 128 127 fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) 129 128 130 129 #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ 130 + fw_def(PANTHERLAKE, GT_TYPE_ANY, no_ver(xe, huc, ptl)) \ 131 131 fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \ 132 132 fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \ 133 133 fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \

+4 -4

drivers/gpu/drm/xe/xe_vm.c

··· 2913 2913 2914 2914 if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { 2915 2915 tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; 2916 - err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx); 2916 + err = xe_svm_alloc_vram(tile, svm_range, &ctx); 2917 2917 if (err) { 2918 2918 drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", 2919 2919 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); ··· 3466 3466 { 3467 3467 u16 coh_mode; 3468 3468 3469 - if (XE_IOCTL_DBG(xe, range > bo->size) || 3469 + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || 3470 3470 XE_IOCTL_DBG(xe, obj_offset > 3471 - bo->size - range)) { 3471 + xe_bo_size(bo) - range)) { 3472 3472 return -EINVAL; 3473 3473 } 3474 3474 ··· 3771 3771 3772 3772 xe_vma_ops_init(&vops, vm, q, NULL, 0); 3773 3773 3774 - ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, bo->size, 3774 + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), 3775 3775 DRM_XE_VM_BIND_OP_MAP, 0, 0, 3776 3776 vm->xe->pat.idx[cache_lvl]); 3777 3777 if (IS_ERR(ops)) {

+16

drivers/gpu/drm/xe/xe_wa.c

··· 285 285 XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), 286 286 XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), 287 287 }, 288 + { XE_RTP_NAME("16021865536"), 289 + XE_RTP_RULES(MEDIA_VERSION(3002), 290 + ENGINE_CLASS(VIDEO_DECODE)), 291 + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), 292 + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), 293 + }, 294 + { XE_RTP_NAME("16021867713"), 295 + XE_RTP_RULES(MEDIA_VERSION(3002), 296 + ENGINE_CLASS(VIDEO_DECODE)), 297 + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), 298 + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), 299 + }, 288 300 { XE_RTP_NAME("14021486841"), 289 301 XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), 290 302 ENGINE_CLASS(VIDEO_DECODE)), ··· 655 643 GRAPHICS_VERSION_RANGE(2001, 3001)), 656 644 XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, 657 645 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 646 + }, 647 + { XE_RTP_NAME("14021402888"), 648 + XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), 649 + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) 658 650 }, 659 651 }; 660 652

+8 -2

drivers/gpu/drm/xe/xe_wa_oob.rules

··· 34 34 14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) 35 35 GRAPHICS_VERSION(2004) 36 36 GRAPHICS_VERSION_RANGE(3000, 3001) 37 + GRAPHICS_VERSION(3003) 37 38 22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) 38 39 GRAPHICS_VERSION(2004) 39 40 GRAPHICS_VERSION_RANGE(3000, 3001) 41 + GRAPHICS_VERSION(3003) 40 42 22019338487 MEDIA_VERSION(2000) 41 - GRAPHICS_VERSION(2001) 43 + GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 42 44 MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 43 45 22019338487_display PLATFORM(LUNARLAKE) 44 - 16023588340 GRAPHICS_VERSION(2001) 46 + 16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 45 47 14019789679 GRAPHICS_VERSION(1255) 46 48 GRAPHICS_VERSION_RANGE(1270, 2004) 47 49 no_media_l3 MEDIA_VERSION(3000) ··· 60 58 GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) 61 59 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) 62 60 MEDIA_VERSION_RANGE(1301, 3000) 61 + MEDIA_VERSION(3002) 62 + GRAPHICS_VERSION(3003) 63 63 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) 64 64 MEDIA_VERSION_RANGE(1300, 3000) 65 + MEDIA_VERSION(3002) 66 + GRAPHICS_VERSION(3003) 65 67 66 68 # SoC workaround - currently applies to all platforms with the following 67 69 # primary GT GMDID

+12 -6

drivers/i2c/busses/i2c-designware-platdrv.c

··· 101 101 } 102 102 #endif 103 103 104 - static int txgbe_i2c_request_regs(struct dw_i2c_dev *dev) 104 + static int dw_i2c_get_parent_regmap(struct dw_i2c_dev *dev) 105 105 { 106 106 dev->map = dev_get_regmap(dev->dev->parent, NULL); 107 107 if (!dev->map) ··· 123 123 struct platform_device *pdev = to_platform_device(dev->dev); 124 124 int ret; 125 125 126 + if (device_is_compatible(dev->dev, "intel,xe-i2c")) 127 + return dw_i2c_get_parent_regmap(dev); 128 + 126 129 switch (dev->flags & MODEL_MASK) { 127 130 case MODEL_BAIKAL_BT1: 128 131 ret = bt1_i2c_request_regs(dev); 129 132 break; 130 133 case MODEL_WANGXUN_SP: 131 - ret = txgbe_i2c_request_regs(dev); 134 + ret = dw_i2c_get_parent_regmap(dev); 132 135 break; 133 136 default: 134 137 dev->base = devm_platform_ioremap_resource(pdev, 0); ··· 208 205 209 206 static int dw_i2c_plat_probe(struct platform_device *pdev) 210 207 { 208 + u32 flags = (uintptr_t)device_get_match_data(&pdev->dev); 211 209 struct device *device = &pdev->dev; 212 210 struct i2c_adapter *adap; 213 211 struct dw_i2c_dev *dev; 214 212 int irq, ret; 215 213 216 - irq = platform_get_irq(pdev, 0); 217 - if (irq < 0) 214 + irq = platform_get_irq_optional(pdev, 0); 215 + if (irq == -ENXIO) 216 + flags |= ACCESS_POLLING; 217 + else if (irq < 0) 218 218 return irq; 219 219 220 220 dev = devm_kzalloc(device, sizeof(*dev), GFP_KERNEL); 221 221 if (!dev) 222 222 return -ENOMEM; 223 223 224 - dev->flags = (uintptr_t)device_get_match_data(device); 225 224 if (device_property_present(device, "wx,i2c-snps-model")) 226 - dev->flags = MODEL_WANGXUN_SP | ACCESS_POLLING; 225 + flags = MODEL_WANGXUN_SP | ACCESS_POLLING; 227 226 228 227 dev->dev = device; 229 228 dev->irq = irq; 229 + dev->flags = flags; 230 230 platform_set_drvdata(pdev, dev); 231 231 232 232 ret = dw_i2c_plat_request_regs(dev);

+11

drivers/mtd/devices/Kconfig

··· 183 183 platforms from Linux. This device abstracts away the 184 184 firmware interface for flash access. 185 185 186 + config MTD_INTEL_DG 187 + tristate "Intel Discrete Graphics non-volatile memory driver" 188 + depends on AUXILIARY_BUS 189 + depends on MTD 190 + help 191 + This provides an MTD device to access Intel Discrete Graphics 192 + non-volatile memory. 193 + 194 + To compile this driver as a module, choose M here: the module 195 + will be called mtd-intel-dg. 196 + 186 197 comment "Disk-On-Chip Device Drivers" 187 198 188 199 config MTD_DOCG3

+1

drivers/mtd/devices/Makefile

··· 18 18 obj-$(CONFIG_MTD_BCM47XXSFLASH) += bcm47xxsflash.o 19 19 obj-$(CONFIG_MTD_ST_SPI_FSM) += st_spi_fsm.o 20 20 obj-$(CONFIG_MTD_POWERNV_FLASH) += powernv_flash.o 21 + obj-$(CONFIG_MTD_INTEL_DG) += mtd_intel_dg.o 21 22 22 23 23 24 CFLAGS_docg3.o += -I$(src)

+830

drivers/mtd/devices/mtd_intel_dg.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. 4 + */ 5 + 6 + #include <linux/bitfield.h> 7 + #include <linux/bits.h> 8 + #include <linux/cleanup.h> 9 + #include <linux/delay.h> 10 + #include <linux/device.h> 11 + #include <linux/intel_dg_nvm_aux.h> 12 + #include <linux/io.h> 13 + #include <linux/io-64-nonatomic-lo-hi.h> 14 + #include <linux/kernel.h> 15 + #include <linux/module.h> 16 + #include <linux/mtd/mtd.h> 17 + #include <linux/mtd/partitions.h> 18 + #include <linux/string.h> 19 + #include <linux/slab.h> 20 + #include <linux/sizes.h> 21 + #include <linux/types.h> 22 + 23 + struct intel_dg_nvm { 24 + struct kref refcnt; 25 + struct mtd_info mtd; 26 + struct mutex lock; /* region access lock */ 27 + void __iomem *base; 28 + void __iomem *base2; 29 + bool non_posted_erase; 30 + 31 + size_t size; 32 + unsigned int nregions; 33 + struct { 34 + const char *name; 35 + u8 id; 36 + u64 offset; 37 + u64 size; 38 + unsigned int is_readable:1; 39 + unsigned int is_writable:1; 40 + } regions[] __counted_by(nregions); 41 + }; 42 + 43 + #define NVM_TRIGGER_REG 0x00000000 44 + #define NVM_VALSIG_REG 0x00000010 45 + #define NVM_ADDRESS_REG 0x00000040 46 + #define NVM_REGION_ID_REG 0x00000044 47 + #define NVM_DEBUG_REG 0x00000000 48 + /* 49 + * [15:0]-Erase size = 0x0010 4K 0x0080 32K 0x0100 64K 50 + * [23:16]-Reserved 51 + * [31:24]-Erase MEM RegionID 52 + */ 53 + #define NVM_ERASE_REG 0x00000048 54 + #define NVM_ACCESS_ERROR_REG 0x00000070 55 + #define NVM_ADDRESS_ERROR_REG 0x00000074 56 + 57 + /* Flash Valid Signature */ 58 + #define NVM_FLVALSIG 0x0FF0A55A 59 + 60 + #define NVM_MAP_ADDR_MASK GENMASK(7, 0) 61 + #define NVM_MAP_ADDR_SHIFT 0x00000004 62 + 63 + #define NVM_REGION_ID_DESCRIPTOR 0 64 + /* Flash Region Base Address */ 65 + #define NVM_FRBA 0x40 66 + /* Flash Region __n - Flash Descriptor Record */ 67 + #define NVM_FLREG(__n) (NVM_FRBA + ((__n) * 4)) 68 + /* Flash Map 1 Register */ 69 + #define NVM_FLMAP1_REG 0x18 70 + #define NVM_FLMSTR4_OFFSET 0x00C 71 + 72 + #define NVM_ACCESS_ERROR_PCIE_MASK 0x7 73 + 74 + #define NVM_FREG_BASE_MASK GENMASK(15, 0) 75 + #define NVM_FREG_ADDR_MASK GENMASK(31, 16) 76 + #define NVM_FREG_ADDR_SHIFT 12 77 + #define NVM_FREG_MIN_REGION_SIZE 0xFFF 78 + 79 + #define NVM_NON_POSTED_ERASE_DONE BIT(23) 80 + #define NVM_NON_POSTED_ERASE_DONE_ITER 3000 81 + 82 + static inline void idg_nvm_set_region_id(struct intel_dg_nvm *nvm, u8 region) 83 + { 84 + iowrite32((u32)region, nvm->base + NVM_REGION_ID_REG); 85 + } 86 + 87 + static inline u32 idg_nvm_error(struct intel_dg_nvm *nvm) 88 + { 89 + void __iomem *base = nvm->base; 90 + 91 + u32 reg = ioread32(base + NVM_ACCESS_ERROR_REG) & NVM_ACCESS_ERROR_PCIE_MASK; 92 + 93 + /* reset error bits */ 94 + if (reg) 95 + iowrite32(reg, base + NVM_ACCESS_ERROR_REG); 96 + 97 + return reg; 98 + } 99 + 100 + static inline u32 idg_nvm_read32(struct intel_dg_nvm *nvm, u32 address) 101 + { 102 + void __iomem *base = nvm->base; 103 + 104 + iowrite32(address, base + NVM_ADDRESS_REG); 105 + 106 + return ioread32(base + NVM_TRIGGER_REG); 107 + } 108 + 109 + static inline u64 idg_nvm_read64(struct intel_dg_nvm *nvm, u32 address) 110 + { 111 + void __iomem *base = nvm->base; 112 + 113 + iowrite32(address, base + NVM_ADDRESS_REG); 114 + 115 + return readq(base + NVM_TRIGGER_REG); 116 + } 117 + 118 + static void idg_nvm_write32(struct intel_dg_nvm *nvm, u32 address, u32 data) 119 + { 120 + void __iomem *base = nvm->base; 121 + 122 + iowrite32(address, base + NVM_ADDRESS_REG); 123 + 124 + iowrite32(data, base + NVM_TRIGGER_REG); 125 + } 126 + 127 + static void idg_nvm_write64(struct intel_dg_nvm *nvm, u32 address, u64 data) 128 + { 129 + void __iomem *base = nvm->base; 130 + 131 + iowrite32(address, base + NVM_ADDRESS_REG); 132 + 133 + writeq(data, base + NVM_TRIGGER_REG); 134 + } 135 + 136 + static int idg_nvm_get_access_map(struct intel_dg_nvm *nvm, u32 *access_map) 137 + { 138 + u32 fmstr4_addr; 139 + u32 fmstr4; 140 + u32 flmap1; 141 + u32 fmba; 142 + 143 + idg_nvm_set_region_id(nvm, NVM_REGION_ID_DESCRIPTOR); 144 + 145 + flmap1 = idg_nvm_read32(nvm, NVM_FLMAP1_REG); 146 + if (idg_nvm_error(nvm)) 147 + return -EIO; 148 + /* Get Flash Master Baser Address (FMBA) */ 149 + fmba = (FIELD_GET(NVM_MAP_ADDR_MASK, flmap1) << NVM_MAP_ADDR_SHIFT); 150 + fmstr4_addr = fmba + NVM_FLMSTR4_OFFSET; 151 + 152 + fmstr4 = idg_nvm_read32(nvm, fmstr4_addr); 153 + if (idg_nvm_error(nvm)) 154 + return -EIO; 155 + 156 + *access_map = fmstr4; 157 + return 0; 158 + } 159 + 160 + /* 161 + * Region read/write access encoded in the access map 162 + * in the following order from the lower bit: 163 + * [3:0] regions 12-15 read state 164 + * [7:4] regions 12-15 write state 165 + * [19:8] regions 0-11 read state 166 + * [31:20] regions 0-11 write state 167 + */ 168 + static bool idg_nvm_region_readable(u32 access_map, u8 region) 169 + { 170 + if (region < 12) 171 + return access_map & BIT(region + 8); /* [19:8] */ 172 + else 173 + return access_map & BIT(region - 12); /* [3:0] */ 174 + } 175 + 176 + static bool idg_nvm_region_writable(u32 access_map, u8 region) 177 + { 178 + if (region < 12) 179 + return access_map & BIT(region + 20); /* [31:20] */ 180 + else 181 + return access_map & BIT(region - 8); /* [7:4] */ 182 + } 183 + 184 + static int idg_nvm_is_valid(struct intel_dg_nvm *nvm) 185 + { 186 + u32 is_valid; 187 + 188 + idg_nvm_set_region_id(nvm, NVM_REGION_ID_DESCRIPTOR); 189 + 190 + is_valid = idg_nvm_read32(nvm, NVM_VALSIG_REG); 191 + if (idg_nvm_error(nvm)) 192 + return -EIO; 193 + 194 + if (is_valid != NVM_FLVALSIG) 195 + return -ENODEV; 196 + 197 + return 0; 198 + } 199 + 200 + static unsigned int idg_nvm_get_region(const struct intel_dg_nvm *nvm, loff_t from) 201 + { 202 + unsigned int i; 203 + 204 + for (i = 0; i < nvm->nregions; i++) { 205 + if ((nvm->regions[i].offset + nvm->regions[i].size - 1) >= from && 206 + nvm->regions[i].offset <= from && 207 + nvm->regions[i].size != 0) 208 + break; 209 + } 210 + 211 + return i; 212 + } 213 + 214 + static ssize_t idg_nvm_rewrite_partial(struct intel_dg_nvm *nvm, loff_t to, 215 + loff_t offset, size_t len, const u32 *newdata) 216 + { 217 + u32 data = idg_nvm_read32(nvm, to); 218 + 219 + if (idg_nvm_error(nvm)) 220 + return -EIO; 221 + 222 + memcpy((u8 *)&data + offset, newdata, len); 223 + 224 + idg_nvm_write32(nvm, to, data); 225 + if (idg_nvm_error(nvm)) 226 + return -EIO; 227 + 228 + return len; 229 + } 230 + 231 + static ssize_t idg_write(struct intel_dg_nvm *nvm, u8 region, 232 + loff_t to, size_t len, const unsigned char *buf) 233 + { 234 + size_t len_s = len; 235 + size_t to_shift; 236 + size_t len8; 237 + size_t len4; 238 + ssize_t ret; 239 + size_t to4; 240 + size_t i; 241 + 242 + idg_nvm_set_region_id(nvm, region); 243 + 244 + to4 = ALIGN_DOWN(to, sizeof(u32)); 245 + to_shift = min(sizeof(u32) - ((size_t)to - to4), len); 246 + if (to - to4) { 247 + ret = idg_nvm_rewrite_partial(nvm, to4, to - to4, to_shift, (u32 *)&buf[0]); 248 + if (ret < 0) 249 + return ret; 250 + 251 + buf += to_shift; 252 + to += to_shift; 253 + len_s -= to_shift; 254 + } 255 + 256 + if (!IS_ALIGNED(to, sizeof(u64)) && 257 + ((to ^ (to + len_s)) & GENMASK(31, 10))) { 258 + /* 259 + * Workaround reads/writes across 1k-aligned addresses 260 + * (start u32 before 1k, end u32 after) 261 + * as this fails on hardware. 262 + */ 263 + u32 data; 264 + 265 + memcpy(&data, &buf[0], sizeof(u32)); 266 + idg_nvm_write32(nvm, to, data); 267 + if (idg_nvm_error(nvm)) 268 + return -EIO; 269 + buf += sizeof(u32); 270 + to += sizeof(u32); 271 + len_s -= sizeof(u32); 272 + } 273 + 274 + len8 = ALIGN_DOWN(len_s, sizeof(u64)); 275 + for (i = 0; i < len8; i += sizeof(u64)) { 276 + u64 data; 277 + 278 + memcpy(&data, &buf[i], sizeof(u64)); 279 + idg_nvm_write64(nvm, to + i, data); 280 + if (idg_nvm_error(nvm)) 281 + return -EIO; 282 + } 283 + 284 + len4 = len_s - len8; 285 + if (len4 >= sizeof(u32)) { 286 + u32 data; 287 + 288 + memcpy(&data, &buf[i], sizeof(u32)); 289 + idg_nvm_write32(nvm, to + i, data); 290 + if (idg_nvm_error(nvm)) 291 + return -EIO; 292 + i += sizeof(u32); 293 + len4 -= sizeof(u32); 294 + } 295 + 296 + if (len4 > 0) { 297 + ret = idg_nvm_rewrite_partial(nvm, to + i, 0, len4, (u32 *)&buf[i]); 298 + if (ret < 0) 299 + return ret; 300 + } 301 + 302 + return len; 303 + } 304 + 305 + static ssize_t idg_read(struct intel_dg_nvm *nvm, u8 region, 306 + loff_t from, size_t len, unsigned char *buf) 307 + { 308 + size_t len_s = len; 309 + size_t from_shift; 310 + size_t from4; 311 + size_t len8; 312 + size_t len4; 313 + size_t i; 314 + 315 + idg_nvm_set_region_id(nvm, region); 316 + 317 + from4 = ALIGN_DOWN(from, sizeof(u32)); 318 + from_shift = min(sizeof(u32) - ((size_t)from - from4), len); 319 + 320 + if (from - from4) { 321 + u32 data = idg_nvm_read32(nvm, from4); 322 + 323 + if (idg_nvm_error(nvm)) 324 + return -EIO; 325 + memcpy(&buf[0], (u8 *)&data + (from - from4), from_shift); 326 + len_s -= from_shift; 327 + buf += from_shift; 328 + from += from_shift; 329 + } 330 + 331 + if (!IS_ALIGNED(from, sizeof(u64)) && 332 + ((from ^ (from + len_s)) & GENMASK(31, 10))) { 333 + /* 334 + * Workaround reads/writes across 1k-aligned addresses 335 + * (start u32 before 1k, end u32 after) 336 + * as this fails on hardware. 337 + */ 338 + u32 data = idg_nvm_read32(nvm, from); 339 + 340 + if (idg_nvm_error(nvm)) 341 + return -EIO; 342 + memcpy(&buf[0], &data, sizeof(data)); 343 + len_s -= sizeof(u32); 344 + buf += sizeof(u32); 345 + from += sizeof(u32); 346 + } 347 + 348 + len8 = ALIGN_DOWN(len_s, sizeof(u64)); 349 + for (i = 0; i < len8; i += sizeof(u64)) { 350 + u64 data = idg_nvm_read64(nvm, from + i); 351 + 352 + if (idg_nvm_error(nvm)) 353 + return -EIO; 354 + 355 + memcpy(&buf[i], &data, sizeof(data)); 356 + } 357 + 358 + len4 = len_s - len8; 359 + if (len4 >= sizeof(u32)) { 360 + u32 data = idg_nvm_read32(nvm, from + i); 361 + 362 + if (idg_nvm_error(nvm)) 363 + return -EIO; 364 + memcpy(&buf[i], &data, sizeof(data)); 365 + i += sizeof(u32); 366 + len4 -= sizeof(u32); 367 + } 368 + 369 + if (len4 > 0) { 370 + u32 data = idg_nvm_read32(nvm, from + i); 371 + 372 + if (idg_nvm_error(nvm)) 373 + return -EIO; 374 + memcpy(&buf[i], &data, len4); 375 + } 376 + 377 + return len; 378 + } 379 + 380 + static ssize_t 381 + idg_erase(struct intel_dg_nvm *nvm, u8 region, loff_t from, u64 len, u64 *fail_addr) 382 + { 383 + void __iomem *base2 = nvm->base2; 384 + void __iomem *base = nvm->base; 385 + const u32 block = 0x10; 386 + u32 iter = 0; 387 + u32 reg; 388 + u64 i; 389 + 390 + for (i = 0; i < len; i += SZ_4K) { 391 + iowrite32(from + i, base + NVM_ADDRESS_REG); 392 + iowrite32(region << 24 | block, base + NVM_ERASE_REG); 393 + if (nvm->non_posted_erase) { 394 + /* Wait for Erase Done */ 395 + reg = ioread32(base2 + NVM_DEBUG_REG); 396 + while (!(reg & NVM_NON_POSTED_ERASE_DONE) && 397 + ++iter < NVM_NON_POSTED_ERASE_DONE_ITER) { 398 + msleep(10); 399 + reg = ioread32(base2 + NVM_DEBUG_REG); 400 + } 401 + if (reg & NVM_NON_POSTED_ERASE_DONE) { 402 + /* Clear Erase Done */ 403 + iowrite32(reg, base2 + NVM_DEBUG_REG); 404 + } else { 405 + *fail_addr = from + i; 406 + return -ETIME; 407 + } 408 + } 409 + /* Since the writes are via sgunit 410 + * we cannot do back to back erases. 411 + */ 412 + msleep(50); 413 + } 414 + return len; 415 + } 416 + 417 + static int intel_dg_nvm_init(struct intel_dg_nvm *nvm, struct device *device, 418 + bool non_posted_erase) 419 + { 420 + u32 access_map = 0; 421 + unsigned int i, n; 422 + int ret; 423 + 424 + /* clean error register, previous errors are ignored */ 425 + idg_nvm_error(nvm); 426 + 427 + ret = idg_nvm_is_valid(nvm); 428 + if (ret) { 429 + dev_err(device, "The MEM is not valid %d\n", ret); 430 + return ret; 431 + } 432 + 433 + if (idg_nvm_get_access_map(nvm, &access_map)) 434 + return -EIO; 435 + 436 + for (i = 0, n = 0; i < nvm->nregions; i++) { 437 + u32 address, base, limit, region; 438 + u8 id = nvm->regions[i].id; 439 + 440 + address = NVM_FLREG(id); 441 + region = idg_nvm_read32(nvm, address); 442 + 443 + base = FIELD_GET(NVM_FREG_BASE_MASK, region) << NVM_FREG_ADDR_SHIFT; 444 + limit = (FIELD_GET(NVM_FREG_ADDR_MASK, region) << NVM_FREG_ADDR_SHIFT) | 445 + NVM_FREG_MIN_REGION_SIZE; 446 + 447 + dev_dbg(device, "[%d] %s: region: 0x%08X base: 0x%08x limit: 0x%08x\n", 448 + id, nvm->regions[i].name, region, base, limit); 449 + 450 + if (base >= limit || (i > 0 && limit == 0)) { 451 + dev_dbg(device, "[%d] %s: disabled\n", 452 + id, nvm->regions[i].name); 453 + nvm->regions[i].is_readable = 0; 454 + continue; 455 + } 456 + 457 + if (nvm->size < limit) 458 + nvm->size = limit; 459 + 460 + nvm->regions[i].offset = base; 461 + nvm->regions[i].size = limit - base + 1; 462 + /* No write access to descriptor; mask it out*/ 463 + nvm->regions[i].is_writable = idg_nvm_region_writable(access_map, id); 464 + 465 + nvm->regions[i].is_readable = idg_nvm_region_readable(access_map, id); 466 + dev_dbg(device, "Registered, %s id=%d offset=%lld size=%lld rd=%d wr=%d\n", 467 + nvm->regions[i].name, 468 + nvm->regions[i].id, 469 + nvm->regions[i].offset, 470 + nvm->regions[i].size, 471 + nvm->regions[i].is_readable, 472 + nvm->regions[i].is_writable); 473 + 474 + if (nvm->regions[i].is_readable) 475 + n++; 476 + } 477 + 478 + nvm->non_posted_erase = non_posted_erase; 479 + 480 + dev_dbg(device, "Registered %d regions\n", n); 481 + dev_dbg(device, "Non posted erase %d\n", nvm->non_posted_erase); 482 + 483 + /* Need to add 1 to the amount of memory 484 + * so it is reported as an even block 485 + */ 486 + nvm->size += 1; 487 + 488 + return n; 489 + } 490 + 491 + static int intel_dg_mtd_erase(struct mtd_info *mtd, struct erase_info *info) 492 + { 493 + struct intel_dg_nvm *nvm = mtd->priv; 494 + size_t total_len; 495 + unsigned int idx; 496 + ssize_t bytes; 497 + loff_t from; 498 + size_t len; 499 + u8 region; 500 + u64 addr; 501 + 502 + if (WARN_ON(!nvm)) 503 + return -EINVAL; 504 + 505 + if (!IS_ALIGNED(info->addr, SZ_4K) || !IS_ALIGNED(info->len, SZ_4K)) { 506 + dev_err(&mtd->dev, "unaligned erase %llx %llx\n", 507 + info->addr, info->len); 508 + info->fail_addr = MTD_FAIL_ADDR_UNKNOWN; 509 + return -EINVAL; 510 + } 511 + 512 + total_len = info->len; 513 + addr = info->addr; 514 + 515 + guard(mutex)(&nvm->lock); 516 + 517 + while (total_len > 0) { 518 + if (!IS_ALIGNED(addr, SZ_4K) || !IS_ALIGNED(total_len, SZ_4K)) { 519 + dev_err(&mtd->dev, "unaligned erase %llx %zx\n", addr, total_len); 520 + info->fail_addr = addr; 521 + return -ERANGE; 522 + } 523 + 524 + idx = idg_nvm_get_region(nvm, addr); 525 + if (idx >= nvm->nregions) { 526 + dev_err(&mtd->dev, "out of range"); 527 + info->fail_addr = MTD_FAIL_ADDR_UNKNOWN; 528 + return -ERANGE; 529 + } 530 + 531 + from = addr - nvm->regions[idx].offset; 532 + region = nvm->regions[idx].id; 533 + len = total_len; 534 + if (len > nvm->regions[idx].size - from) 535 + len = nvm->regions[idx].size - from; 536 + 537 + dev_dbg(&mtd->dev, "erasing region[%d] %s from %llx len %zx\n", 538 + region, nvm->regions[idx].name, from, len); 539 + 540 + bytes = idg_erase(nvm, region, from, len, &info->fail_addr); 541 + if (bytes < 0) { 542 + dev_dbg(&mtd->dev, "erase failed with %zd\n", bytes); 543 + info->fail_addr += nvm->regions[idx].offset; 544 + return bytes; 545 + } 546 + 547 + addr += len; 548 + total_len -= len; 549 + } 550 + 551 + return 0; 552 + } 553 + 554 + static int intel_dg_mtd_read(struct mtd_info *mtd, loff_t from, size_t len, 555 + size_t *retlen, u_char *buf) 556 + { 557 + struct intel_dg_nvm *nvm = mtd->priv; 558 + unsigned int idx; 559 + ssize_t ret; 560 + u8 region; 561 + 562 + if (WARN_ON(!nvm)) 563 + return -EINVAL; 564 + 565 + idx = idg_nvm_get_region(nvm, from); 566 + 567 + dev_dbg(&mtd->dev, "reading region[%d] %s from %lld len %zd\n", 568 + nvm->regions[idx].id, nvm->regions[idx].name, from, len); 569 + 570 + if (idx >= nvm->nregions) { 571 + dev_err(&mtd->dev, "out of range"); 572 + return -ERANGE; 573 + } 574 + 575 + from -= nvm->regions[idx].offset; 576 + region = nvm->regions[idx].id; 577 + if (len > nvm->regions[idx].size - from) 578 + len = nvm->regions[idx].size - from; 579 + 580 + guard(mutex)(&nvm->lock); 581 + 582 + ret = idg_read(nvm, region, from, len, buf); 583 + if (ret < 0) { 584 + dev_dbg(&mtd->dev, "read failed with %zd\n", ret); 585 + return ret; 586 + } 587 + 588 + *retlen = ret; 589 + 590 + return 0; 591 + } 592 + 593 + static int intel_dg_mtd_write(struct mtd_info *mtd, loff_t to, size_t len, 594 + size_t *retlen, const u_char *buf) 595 + { 596 + struct intel_dg_nvm *nvm = mtd->priv; 597 + unsigned int idx; 598 + ssize_t ret; 599 + u8 region; 600 + 601 + if (WARN_ON(!nvm)) 602 + return -EINVAL; 603 + 604 + idx = idg_nvm_get_region(nvm, to); 605 + 606 + dev_dbg(&mtd->dev, "writing region[%d] %s to %lld len %zd\n", 607 + nvm->regions[idx].id, nvm->regions[idx].name, to, len); 608 + 609 + if (idx >= nvm->nregions) { 610 + dev_err(&mtd->dev, "out of range"); 611 + return -ERANGE; 612 + } 613 + 614 + to -= nvm->regions[idx].offset; 615 + region = nvm->regions[idx].id; 616 + if (len > nvm->regions[idx].size - to) 617 + len = nvm->regions[idx].size - to; 618 + 619 + guard(mutex)(&nvm->lock); 620 + 621 + ret = idg_write(nvm, region, to, len, buf); 622 + if (ret < 0) { 623 + dev_dbg(&mtd->dev, "write failed with %zd\n", ret); 624 + return ret; 625 + } 626 + 627 + *retlen = ret; 628 + 629 + return 0; 630 + } 631 + 632 + static void intel_dg_nvm_release(struct kref *kref) 633 + { 634 + struct intel_dg_nvm *nvm = container_of(kref, struct intel_dg_nvm, refcnt); 635 + int i; 636 + 637 + pr_debug("freeing intel_dg nvm\n"); 638 + for (i = 0; i < nvm->nregions; i++) 639 + kfree(nvm->regions[i].name); 640 + mutex_destroy(&nvm->lock); 641 + kfree(nvm); 642 + } 643 + 644 + static int intel_dg_mtd_get_device(struct mtd_info *mtd) 645 + { 646 + struct mtd_info *master = mtd_get_master(mtd); 647 + struct intel_dg_nvm *nvm = master->priv; 648 + 649 + if (WARN_ON(!nvm)) 650 + return -EINVAL; 651 + pr_debug("get mtd %s %d\n", mtd->name, kref_read(&nvm->refcnt)); 652 + kref_get(&nvm->refcnt); 653 + 654 + return 0; 655 + } 656 + 657 + static void intel_dg_mtd_put_device(struct mtd_info *mtd) 658 + { 659 + struct mtd_info *master = mtd_get_master(mtd); 660 + struct intel_dg_nvm *nvm = master->priv; 661 + 662 + if (WARN_ON(!nvm)) 663 + return; 664 + pr_debug("put mtd %s %d\n", mtd->name, kref_read(&nvm->refcnt)); 665 + kref_put(&nvm->refcnt, intel_dg_nvm_release); 666 + } 667 + 668 + static int intel_dg_nvm_init_mtd(struct intel_dg_nvm *nvm, struct device *device, 669 + unsigned int nparts, bool writable_override) 670 + { 671 + struct mtd_partition *parts = NULL; 672 + unsigned int i, n; 673 + int ret; 674 + 675 + dev_dbg(device, "registering with mtd\n"); 676 + 677 + nvm->mtd.owner = THIS_MODULE; 678 + nvm->mtd.dev.parent = device; 679 + nvm->mtd.flags = MTD_CAP_NORFLASH; 680 + nvm->mtd.type = MTD_DATAFLASH; 681 + nvm->mtd.priv = nvm; 682 + nvm->mtd._write = intel_dg_mtd_write; 683 + nvm->mtd._read = intel_dg_mtd_read; 684 + nvm->mtd._erase = intel_dg_mtd_erase; 685 + nvm->mtd._get_device = intel_dg_mtd_get_device; 686 + nvm->mtd._put_device = intel_dg_mtd_put_device; 687 + nvm->mtd.writesize = SZ_1; /* 1 byte granularity */ 688 + nvm->mtd.erasesize = SZ_4K; /* 4K bytes granularity */ 689 + nvm->mtd.size = nvm->size; 690 + 691 + parts = kcalloc(nvm->nregions, sizeof(*parts), GFP_KERNEL); 692 + if (!parts) 693 + return -ENOMEM; 694 + 695 + for (i = 0, n = 0; i < nvm->nregions && n < nparts; i++) { 696 + if (!nvm->regions[i].is_readable) 697 + continue; 698 + parts[n].name = nvm->regions[i].name; 699 + parts[n].offset = nvm->regions[i].offset; 700 + parts[n].size = nvm->regions[i].size; 701 + if (!nvm->regions[i].is_writable && !writable_override) 702 + parts[n].mask_flags = MTD_WRITEABLE; 703 + n++; 704 + } 705 + 706 + ret = mtd_device_register(&nvm->mtd, parts, n); 707 + 708 + kfree(parts); 709 + return ret; 710 + } 711 + 712 + static int intel_dg_mtd_probe(struct auxiliary_device *aux_dev, 713 + const struct auxiliary_device_id *aux_dev_id) 714 + { 715 + struct intel_dg_nvm_dev *invm = auxiliary_dev_to_intel_dg_nvm_dev(aux_dev); 716 + struct intel_dg_nvm *nvm; 717 + struct device *device; 718 + unsigned int nregions; 719 + unsigned int i, n; 720 + int ret; 721 + 722 + device = &aux_dev->dev; 723 + 724 + /* count available regions */ 725 + for (nregions = 0, i = 0; i < INTEL_DG_NVM_REGIONS; i++) { 726 + if (invm->regions[i].name) 727 + nregions++; 728 + } 729 + 730 + if (!nregions) { 731 + dev_err(device, "no regions defined\n"); 732 + return -ENODEV; 733 + } 734 + 735 + nvm = kzalloc(struct_size(nvm, regions, nregions), GFP_KERNEL); 736 + if (!nvm) 737 + return -ENOMEM; 738 + 739 + kref_init(&nvm->refcnt); 740 + mutex_init(&nvm->lock); 741 + 742 + for (n = 0, i = 0; i < INTEL_DG_NVM_REGIONS; i++) { 743 + if (!invm->regions[i].name) 744 + continue; 745 + 746 + char *name = kasprintf(GFP_KERNEL, "%s.%s", 747 + dev_name(&aux_dev->dev), invm->regions[i].name); 748 + if (!name) 749 + continue; 750 + nvm->regions[n].name = name; 751 + nvm->regions[n].id = i; 752 + n++; 753 + } 754 + nvm->nregions = n; /* in case where kasprintf fail */ 755 + 756 + nvm->base = devm_ioremap_resource(device, &invm->bar); 757 + if (IS_ERR(nvm->base)) { 758 + ret = PTR_ERR(nvm->base); 759 + goto err; 760 + } 761 + 762 + if (invm->non_posted_erase) { 763 + nvm->base2 = devm_ioremap_resource(device, &invm->bar2); 764 + if (IS_ERR(nvm->base2)) { 765 + ret = PTR_ERR(nvm->base2); 766 + goto err; 767 + } 768 + } 769 + 770 + ret = intel_dg_nvm_init(nvm, device, invm->non_posted_erase); 771 + if (ret < 0) { 772 + dev_err(device, "cannot initialize nvm %d\n", ret); 773 + goto err; 774 + } 775 + 776 + ret = intel_dg_nvm_init_mtd(nvm, device, ret, invm->writable_override); 777 + if (ret) { 778 + dev_err(device, "failed init mtd %d\n", ret); 779 + goto err; 780 + } 781 + 782 + dev_set_drvdata(&aux_dev->dev, nvm); 783 + 784 + return 0; 785 + 786 + err: 787 + kref_put(&nvm->refcnt, intel_dg_nvm_release); 788 + return ret; 789 + } 790 + 791 + static void intel_dg_mtd_remove(struct auxiliary_device *aux_dev) 792 + { 793 + struct intel_dg_nvm *nvm = dev_get_drvdata(&aux_dev->dev); 794 + 795 + if (!nvm) 796 + return; 797 + 798 + mtd_device_unregister(&nvm->mtd); 799 + 800 + dev_set_drvdata(&aux_dev->dev, NULL); 801 + 802 + kref_put(&nvm->refcnt, intel_dg_nvm_release); 803 + } 804 + 805 + static const struct auxiliary_device_id intel_dg_mtd_id_table[] = { 806 + { 807 + .name = "i915.nvm", 808 + }, 809 + { 810 + .name = "xe.nvm", 811 + }, 812 + { 813 + /* sentinel */ 814 + } 815 + }; 816 + MODULE_DEVICE_TABLE(auxiliary, intel_dg_mtd_id_table); 817 + 818 + static struct auxiliary_driver intel_dg_mtd_driver = { 819 + .probe = intel_dg_mtd_probe, 820 + .remove = intel_dg_mtd_remove, 821 + .driver = { 822 + /* auxiliary_driver_register() sets .name to be the modname */ 823 + }, 824 + .id_table = intel_dg_mtd_id_table 825 + }; 826 + module_auxiliary_driver(intel_dg_mtd_driver); 827 + 828 + MODULE_LICENSE("GPL"); 829 + MODULE_AUTHOR("Intel Corporation"); 830 + MODULE_DESCRIPTION("Intel DGFX MTD driver");

-96

include/drm/drm_gpusvm.h

··· 16 16 struct drm_gpusvm_notifier; 17 17 struct drm_gpusvm_ops; 18 18 struct drm_gpusvm_range; 19 - struct drm_gpusvm_devmem; 20 19 struct drm_pagemap; 21 20 struct drm_pagemap_device_addr; 22 - 23 - /** 24 - * struct drm_gpusvm_devmem_ops - Operations structure for GPU SVM device memory 25 - * 26 - * This structure defines the operations for GPU Shared Virtual Memory (SVM) 27 - * device memory. These operations are provided by the GPU driver to manage device memory 28 - * allocations and perform operations such as migration between device memory and system 29 - * RAM. 30 - */ 31 - struct drm_gpusvm_devmem_ops { 32 - /** 33 - * @devmem_release: Release device memory allocation (optional) 34 - * @devmem_allocation: device memory allocation 35 - * 36 - * Release device memory allocation and drop a reference to device 37 - * memory allocation. 38 - */ 39 - void (*devmem_release)(struct drm_gpusvm_devmem *devmem_allocation); 40 - 41 - /** 42 - * @populate_devmem_pfn: Populate device memory PFN (required for migration) 43 - * @devmem_allocation: device memory allocation 44 - * @npages: Number of pages to populate 45 - * @pfn: Array of page frame numbers to populate 46 - * 47 - * Populate device memory page frame numbers (PFN). 48 - * 49 - * Return: 0 on success, a negative error code on failure. 50 - */ 51 - int (*populate_devmem_pfn)(struct drm_gpusvm_devmem *devmem_allocation, 52 - unsigned long npages, unsigned long *pfn); 53 - 54 - /** 55 - * @copy_to_devmem: Copy to device memory (required for migration) 56 - * @pages: Pointer to array of device memory pages (destination) 57 - * @dma_addr: Pointer to array of DMA addresses (source) 58 - * @npages: Number of pages to copy 59 - * 60 - * Copy pages to device memory. 61 - * 62 - * Return: 0 on success, a negative error code on failure. 63 - */ 64 - int (*copy_to_devmem)(struct page **pages, 65 - dma_addr_t *dma_addr, 66 - unsigned long npages); 67 - 68 - /** 69 - * @copy_to_ram: Copy to system RAM (required for migration) 70 - * @pages: Pointer to array of device memory pages (source) 71 - * @dma_addr: Pointer to array of DMA addresses (destination) 72 - * @npages: Number of pages to copy 73 - * 74 - * Copy pages to system RAM. 75 - * 76 - * Return: 0 on success, a negative error code on failure. 77 - */ 78 - int (*copy_to_ram)(struct page **pages, 79 - dma_addr_t *dma_addr, 80 - unsigned long npages); 81 - }; 82 - 83 - /** 84 - * struct drm_gpusvm_devmem - Structure representing a GPU SVM device memory allocation 85 - * 86 - * @dev: Pointer to the device structure which device memory allocation belongs to 87 - * @mm: Pointer to the mm_struct for the address space 88 - * @detached: device memory allocations is detached from device pages 89 - * @ops: Pointer to the operations structure for GPU SVM device memory 90 - * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. 91 - * @size: Size of device memory allocation 92 - * @timeslice_expiration: Timeslice expiration in jiffies 93 - */ 94 - struct drm_gpusvm_devmem { 95 - struct device *dev; 96 - struct mm_struct *mm; 97 - struct completion detached; 98 - const struct drm_gpusvm_devmem_ops *ops; 99 - struct drm_pagemap *dpagemap; 100 - size_t size; 101 - u64 timeslice_expiration; 102 - }; 103 21 104 22 /** 105 23 * struct drm_gpusvm_ops - Operations structure for GPU SVM ··· 279 361 struct drm_gpusvm_range *range, 280 362 const struct drm_gpusvm_ctx *ctx); 281 363 282 - int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, 283 - struct drm_gpusvm_range *range, 284 - struct drm_gpusvm_devmem *devmem_allocation, 285 - const struct drm_gpusvm_ctx *ctx); 286 - 287 - int drm_gpusvm_evict_to_ram(struct drm_gpusvm_devmem *devmem_allocation); 288 - 289 - const struct dev_pagemap_ops *drm_gpusvm_pagemap_ops_get(void); 290 - 291 364 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, 292 365 unsigned long end); 293 366 ··· 288 379 289 380 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, 290 381 const struct mmu_notifier_range *mmu_range); 291 - 292 - void drm_gpusvm_devmem_init(struct drm_gpusvm_devmem *devmem_allocation, 293 - struct device *dev, struct mm_struct *mm, 294 - const struct drm_gpusvm_devmem_ops *ops, 295 - struct drm_pagemap *dpagemap, size_t size); 296 382 297 383 #ifdef CONFIG_LOCKDEP 298 384 /**

+135

include/drm/drm_pagemap.h

··· 7 7 #include <linux/types.h> 8 8 9 9 struct drm_pagemap; 10 + struct drm_pagemap_zdd; 10 11 struct device; 11 12 12 13 /** ··· 92 91 struct device *dev, 93 92 struct drm_pagemap_device_addr addr); 94 93 94 + /** 95 + * @populate_mm: Populate part of the mm with @dpagemap memory, 96 + * migrating existing data. 97 + * @dpagemap: The struct drm_pagemap managing the memory. 98 + * @start: The virtual start address in @mm 99 + * @end: The virtual end address in @mm 100 + * @mm: Pointer to a live mm. The caller must have an mmget() 101 + * reference. 102 + * 103 + * The caller will have the mm lock at least in read mode. 104 + * Note that there is no guarantee that the memory is resident 105 + * after the function returns, it's best effort only. 106 + * When the mm is not using the memory anymore, 107 + * it will be released. The struct drm_pagemap might have a 108 + * mechanism in place to reclaim the memory and the data will 109 + * then be migrated. Typically to system memory. 110 + * The implementation should hold sufficient runtime power- 111 + * references while pages are used in an address space and 112 + * should ideally guard against hardware device unbind in 113 + * a way such that device pages are migrated back to system 114 + * followed by device page removal. The implementation should 115 + * return -ENODEV after device removal. 116 + * 117 + * Return: 0 if successful. Negative error code on error. 118 + */ 119 + int (*populate_mm)(struct drm_pagemap *dpagemap, 120 + unsigned long start, unsigned long end, 121 + struct mm_struct *mm, 122 + unsigned long timeslice_ms); 95 123 }; 96 124 97 125 /** ··· 133 103 const struct drm_pagemap_ops *ops; 134 104 struct device *dev; 135 105 }; 106 + 107 + struct drm_pagemap_devmem; 108 + 109 + /** 110 + * struct drm_pagemap_devmem_ops - Operations structure for GPU SVM device memory 111 + * 112 + * This structure defines the operations for GPU Shared Virtual Memory (SVM) 113 + * device memory. These operations are provided by the GPU driver to manage device memory 114 + * allocations and perform operations such as migration between device memory and system 115 + * RAM. 116 + */ 117 + struct drm_pagemap_devmem_ops { 118 + /** 119 + * @devmem_release: Release device memory allocation (optional) 120 + * @devmem_allocation: device memory allocation 121 + * 122 + * Release device memory allocation and drop a reference to device 123 + * memory allocation. 124 + */ 125 + void (*devmem_release)(struct drm_pagemap_devmem *devmem_allocation); 126 + 127 + /** 128 + * @populate_devmem_pfn: Populate device memory PFN (required for migration) 129 + * @devmem_allocation: device memory allocation 130 + * @npages: Number of pages to populate 131 + * @pfn: Array of page frame numbers to populate 132 + * 133 + * Populate device memory page frame numbers (PFN). 134 + * 135 + * Return: 0 on success, a negative error code on failure. 136 + */ 137 + int (*populate_devmem_pfn)(struct drm_pagemap_devmem *devmem_allocation, 138 + unsigned long npages, unsigned long *pfn); 139 + 140 + /** 141 + * @copy_to_devmem: Copy to device memory (required for migration) 142 + * @pages: Pointer to array of device memory pages (destination) 143 + * @dma_addr: Pointer to array of DMA addresses (source) 144 + * @npages: Number of pages to copy 145 + * 146 + * Copy pages to device memory. 147 + * 148 + * Return: 0 on success, a negative error code on failure. 149 + */ 150 + int (*copy_to_devmem)(struct page **pages, 151 + dma_addr_t *dma_addr, 152 + unsigned long npages); 153 + 154 + /** 155 + * @copy_to_ram: Copy to system RAM (required for migration) 156 + * @pages: Pointer to array of device memory pages (source) 157 + * @dma_addr: Pointer to array of DMA addresses (destination) 158 + * @npages: Number of pages to copy 159 + * 160 + * Copy pages to system RAM. 161 + * 162 + * Return: 0 on success, a negative error code on failure. 163 + */ 164 + int (*copy_to_ram)(struct page **pages, 165 + dma_addr_t *dma_addr, 166 + unsigned long npages); 167 + }; 168 + 169 + /** 170 + * struct drm_pagemap_devmem - Structure representing a GPU SVM device memory allocation 171 + * 172 + * @dev: Pointer to the device structure which device memory allocation belongs to 173 + * @mm: Pointer to the mm_struct for the address space 174 + * @detached: device memory allocations is detached from device pages 175 + * @ops: Pointer to the operations structure for GPU SVM device memory 176 + * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. 177 + * @size: Size of device memory allocation 178 + * @timeslice_expiration: Timeslice expiration in jiffies 179 + */ 180 + struct drm_pagemap_devmem { 181 + struct device *dev; 182 + struct mm_struct *mm; 183 + struct completion detached; 184 + const struct drm_pagemap_devmem_ops *ops; 185 + struct drm_pagemap *dpagemap; 186 + size_t size; 187 + u64 timeslice_expiration; 188 + }; 189 + 190 + int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, 191 + struct mm_struct *mm, 192 + unsigned long start, unsigned long end, 193 + unsigned long timeslice_ms, 194 + void *pgmap_owner); 195 + 196 + int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation); 197 + 198 + const struct dev_pagemap_ops *drm_pagemap_pagemap_ops_get(void); 199 + 200 + struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page); 201 + 202 + void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, 203 + struct device *dev, struct mm_struct *mm, 204 + const struct drm_pagemap_devmem_ops *ops, 205 + struct drm_pagemap *dpagemap, size_t size); 206 + 207 + int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, 208 + unsigned long start, unsigned long end, 209 + struct mm_struct *mm, 210 + unsigned long timeslice_ms); 136 211 137 212 #endif

+4 -1

include/drm/intel/pciids.h

··· 846 846 /* BMG */ 847 847 #define INTEL_BMG_IDS(MACRO__, ...) \ 848 848 MACRO__(0xE202, ## __VA_ARGS__), \ 849 + MACRO__(0xE209, ## __VA_ARGS__), \ 849 850 MACRO__(0xE20B, ## __VA_ARGS__), \ 850 851 MACRO__(0xE20C, ## __VA_ARGS__), \ 851 852 MACRO__(0xE20D, ## __VA_ARGS__), \ ··· 872 871 MACRO__(0xB08F, ## __VA_ARGS__), \ 873 872 MACRO__(0xB090, ## __VA_ARGS__), \ 874 873 MACRO__(0xB0A0, ## __VA_ARGS__), \ 875 - MACRO__(0xB0B0, ## __VA_ARGS__) 874 + MACRO__(0xB0B0, ## __VA_ARGS__), \ 875 + MACRO__(0xFD80, ## __VA_ARGS__), \ 876 + MACRO__(0xFD81, ## __VA_ARGS__) 876 877 877 878 #endif /* __PCIIDS_H__ */

+32

include/linux/intel_dg_nvm_aux.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. 4 + */ 5 + 6 + #ifndef __INTEL_DG_NVM_AUX_H__ 7 + #define __INTEL_DG_NVM_AUX_H__ 8 + 9 + #include <linux/auxiliary_bus.h> 10 + #include <linux/container_of.h> 11 + #include <linux/ioport.h> 12 + #include <linux/types.h> 13 + 14 + #define INTEL_DG_NVM_REGIONS 13 15 + 16 + struct intel_dg_nvm_region { 17 + const char *name; 18 + }; 19 + 20 + struct intel_dg_nvm_dev { 21 + struct auxiliary_device aux_dev; 22 + bool writable_override; 23 + bool non_posted_erase; 24 + struct resource bar; 25 + struct resource bar2; 26 + const struct intel_dg_nvm_region *regions; 27 + }; 28 + 29 + #define auxiliary_dev_to_intel_dg_nvm_dev(auxiliary_dev) \ 30 + container_of(auxiliary_dev, struct intel_dg_nvm_dev, aux_dev) 31 + 32 + #endif /* __INTEL_DG_NVM_AUX_H__ */

+4 -4

include/uapi/drm/xe_drm.h

··· 925 925 * - %DRM_XE_VM_CREATE_FLAG_LR_MODE - An LR, or Long Running VM accepts 926 926 * exec submissions to its exec_queues that don't have an upper time 927 927 * limit on the job execution time. But exec submissions to these 928 - * don't allow any of the flags DRM_XE_SYNC_FLAG_SYNCOBJ, 929 - * DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ, DRM_XE_SYNC_FLAG_DMA_BUF, 930 - * used as out-syncobjs, that is, together with DRM_XE_SYNC_FLAG_SIGNAL. 928 + * don't allow any of the sync types DRM_XE_SYNC_TYPE_SYNCOBJ, 929 + * DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ, used as out-syncobjs, that is, 930 + * together with sync flag DRM_XE_SYNC_FLAG_SIGNAL. 931 931 * LR VMs can be created in recoverable page-fault mode using 932 932 * DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it. 933 933 * If that flag is omitted, the UMD can not rely on the slightly ··· 1394 1394 1395 1395 /** 1396 1396 * @timeline_value: Input for the timeline sync object. Needs to be 1397 - * different than 0 when used with %DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ. 1397 + * different than 0 when used with %DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ. 1398 1398 */ 1399 1399 __u64 timeline_value; 1400 1400