Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe: Convert the CPU fault handler for exhaustive eviction

The CPU fault handler may populate bos and migrate, and in doing
so might interfere with other tasks validating.

Rework the CPU fault handler completely into a fastpath
and a slowpath. The fastpath trylocks only the validation lock
in read-mode. If that fails, there's a fallback to the
slowpath, where we do a full validation transaction.

This mandates open-coding of bo locking, bo idling and
bo populating, but we still call into TTM for fault
finalizing.

v2:
- Rework the CPU fault handler to actually take part in
the exhaustive eviction scheme (Matthew Brost).
v3:
- Don't return anything but VM_FAULT_RETRY if we've dropped the
mmap_lock. Not even if a signal is pending.
- Rebase on gpu_madvise() and split out fault migration.
- Wait for idle after migration.
- Check whether the resource manager uses tts to determine
whether to map the tt or iomem.
- Add a number of asserts.
- Allow passing a ttm_operation_ctx to xe_bo_migrate() so that
it's possible to try non-blocking migration.
- Don't fall through to TTM on migration / population error
Instead remove the gfp_retry_mayfail in mode 2 where we
must succeed. (Matthew Brost)
v5:
- Don't allow faulting in the imported bo case (Matthew Brost)

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthews Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250908101246.65025-7-thomas.hellstrom@linux.intel.com

+226 -52
+1 -1
drivers/gpu/drm/xe/display/xe_fb_pin.c
··· 314 314 goto err; 315 315 316 316 if (IS_DGFX(xe)) 317 - ret = xe_bo_migrate(bo, XE_PL_VRAM0, exec); 317 + ret = xe_bo_migrate(bo, XE_PL_VRAM0, NULL, exec); 318 318 else 319 319 ret = xe_bo_validate(bo, NULL, true, exec); 320 320 if (!ret)
+216 -45
drivers/gpu/drm/xe/xe_bo.c
··· 1716 1716 bo->attr.atomic_access == DRM_XE_ATOMIC_CPU; 1717 1717 } 1718 1718 1719 - static vm_fault_t xe_gem_fault(struct vm_fault *vmf) 1719 + /* Populate the bo if swapped out, or migrate if the access mode requires that. */ 1720 + static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx, 1721 + struct drm_exec *exec) 1722 + { 1723 + struct ttm_buffer_object *tbo = &bo->ttm; 1724 + int err = 0; 1725 + 1726 + if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) { 1727 + xe_assert(xe_bo_device(bo), 1728 + dma_resv_test_signaled(tbo->base.resv, DMA_RESV_USAGE_KERNEL) || 1729 + (tbo->ttm && ttm_tt_is_populated(tbo->ttm))); 1730 + err = ttm_bo_populate(&bo->ttm, ctx); 1731 + } else if (should_migrate_to_smem(bo)) { 1732 + xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM); 1733 + err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec); 1734 + } 1735 + 1736 + return err; 1737 + } 1738 + 1739 + /* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */ 1740 + static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo) 1741 + { 1742 + vm_fault_t ret; 1743 + 1744 + trace_xe_bo_cpu_fault(bo); 1745 + 1746 + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, 1747 + TTM_BO_VM_NUM_PREFAULT); 1748 + /* 1749 + * When TTM is actually called to insert PTEs, ensure no blocking conditions 1750 + * remain, in which case TTM may drop locks and return VM_FAULT_RETRY. 1751 + */ 1752 + xe_assert(xe, ret != VM_FAULT_RETRY); 1753 + 1754 + if (ret == VM_FAULT_NOPAGE && 1755 + mem_type_is_vram(bo->ttm.resource->mem_type)) { 1756 + mutex_lock(&xe->mem_access.vram_userfault.lock); 1757 + if (list_empty(&bo->vram_userfault_link)) 1758 + list_add(&bo->vram_userfault_link, 1759 + &xe->mem_access.vram_userfault.list); 1760 + mutex_unlock(&xe->mem_access.vram_userfault.lock); 1761 + } 1762 + 1763 + return ret; 1764 + } 1765 + 1766 + static vm_fault_t xe_err_to_fault_t(int err) 1767 + { 1768 + switch (err) { 1769 + case 0: 1770 + case -EINTR: 1771 + case -ERESTARTSYS: 1772 + case -EAGAIN: 1773 + return VM_FAULT_NOPAGE; 1774 + case -ENOMEM: 1775 + case -ENOSPC: 1776 + return VM_FAULT_OOM; 1777 + default: 1778 + break; 1779 + } 1780 + return VM_FAULT_SIGBUS; 1781 + } 1782 + 1783 + static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo) 1784 + { 1785 + dma_resv_assert_held(tbo->base.resv); 1786 + 1787 + return tbo->ttm && 1788 + (tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) == 1789 + TTM_TT_FLAG_EXTERNAL; 1790 + } 1791 + 1792 + static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe, 1793 + struct xe_bo *bo, bool needs_rpm) 1794 + { 1795 + struct ttm_buffer_object *tbo = &bo->ttm; 1796 + vm_fault_t ret = VM_FAULT_RETRY; 1797 + struct xe_validation_ctx ctx; 1798 + struct ttm_operation_ctx tctx = { 1799 + .interruptible = true, 1800 + .no_wait_gpu = true, 1801 + .gfp_retry_mayfail = true, 1802 + 1803 + }; 1804 + int err; 1805 + 1806 + if (needs_rpm && !xe_pm_runtime_get_if_active(xe)) 1807 + return VM_FAULT_RETRY; 1808 + 1809 + err = xe_validation_ctx_init(&ctx, &xe->val, NULL, 1810 + (struct xe_val_flags) { 1811 + .interruptible = true, 1812 + .no_block = true 1813 + }); 1814 + if (err) 1815 + goto out_pm; 1816 + 1817 + if (!dma_resv_trylock(tbo->base.resv)) 1818 + goto out_validation; 1819 + 1820 + if (xe_ttm_bo_is_imported(tbo)) { 1821 + ret = VM_FAULT_SIGBUS; 1822 + drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); 1823 + goto out_unlock; 1824 + } 1825 + 1826 + err = xe_bo_fault_migrate(bo, &tctx, NULL); 1827 + if (err) { 1828 + /* Return VM_FAULT_RETRY on these errors. */ 1829 + if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY) 1830 + ret = xe_err_to_fault_t(err); 1831 + goto out_unlock; 1832 + } 1833 + 1834 + if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) 1835 + ret = __xe_bo_cpu_fault(vmf, xe, bo); 1836 + 1837 + out_unlock: 1838 + dma_resv_unlock(tbo->base.resv); 1839 + out_validation: 1840 + xe_validation_ctx_fini(&ctx); 1841 + out_pm: 1842 + if (needs_rpm) 1843 + xe_pm_runtime_put(xe); 1844 + 1845 + return ret; 1846 + } 1847 + 1848 + static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) 1720 1849 { 1721 1850 struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; 1722 1851 struct drm_device *ddev = tbo->base.dev; 1723 1852 struct xe_device *xe = to_xe_device(ddev); 1724 1853 struct xe_bo *bo = ttm_to_xe_bo(tbo); 1725 1854 bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK; 1726 - struct drm_exec *exec; 1855 + bool retry_after_wait = false; 1856 + struct xe_validation_ctx ctx; 1857 + struct drm_exec exec; 1727 1858 vm_fault_t ret; 1728 - int idx, r = 0; 1859 + int err = 0; 1860 + int idx; 1861 + 1862 + if (!drm_dev_enter(&xe->drm, &idx)) 1863 + return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); 1864 + 1865 + ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm); 1866 + if (ret != VM_FAULT_RETRY) 1867 + goto out; 1868 + 1869 + if (fault_flag_allow_retry_first(vmf->flags)) { 1870 + if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) 1871 + goto out; 1872 + retry_after_wait = true; 1873 + xe_bo_get(bo); 1874 + mmap_read_unlock(vmf->vma->vm_mm); 1875 + } else { 1876 + ret = VM_FAULT_NOPAGE; 1877 + } 1878 + 1879 + /* 1880 + * The fastpath failed and we were not required to return and retry immediately. 1881 + * We're now running in one of two modes: 1882 + * 1883 + * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying 1884 + * to resolve blocking waits. But we can't resolve the fault since the 1885 + * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath 1886 + * should succeed. But it may fail since we drop the bo lock. 1887 + * 1888 + * 2) retry_after_wait == false: The fastpath failed, typically even after 1889 + * a retry. Do whatever's necessary to resolve the fault. 1890 + * 1891 + * This construct is recommended to avoid excessive waits under the mmap_lock. 1892 + */ 1729 1893 1730 1894 if (needs_rpm) 1731 1895 xe_pm_runtime_get(xe); 1732 1896 1733 - exec = XE_VALIDATION_UNIMPLEMENTED; 1734 - ret = ttm_bo_vm_reserve(tbo, vmf); 1735 - if (ret) 1736 - goto out; 1897 + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, 1898 + err) { 1899 + struct ttm_operation_ctx tctx = { 1900 + .interruptible = true, 1901 + .no_wait_gpu = false, 1902 + .gfp_retry_mayfail = retry_after_wait, 1903 + }; 1904 + long lerr; 1737 1905 1738 - if (drm_dev_enter(ddev, &idx)) { 1739 - trace_xe_bo_cpu_fault(bo); 1906 + err = drm_exec_lock_obj(&exec, &tbo->base); 1907 + drm_exec_retry_on_contention(&exec); 1908 + if (err) 1909 + break; 1740 1910 1741 - xe_validation_assert_exec(xe, exec, &tbo->base); 1742 - if (should_migrate_to_smem(bo)) { 1743 - xe_assert(xe, bo->flags & XE_BO_FLAG_SYSTEM); 1744 - 1745 - r = xe_bo_migrate(bo, XE_PL_TT, exec); 1746 - if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) 1747 - ret = VM_FAULT_NOPAGE; 1748 - else if (r) 1749 - ret = VM_FAULT_SIGBUS; 1911 + if (xe_ttm_bo_is_imported(tbo)) { 1912 + err = -EFAULT; 1913 + drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); 1914 + break; 1750 1915 } 1751 - if (!ret) 1752 - ret = ttm_bo_vm_fault_reserved(vmf, 1753 - vmf->vma->vm_page_prot, 1754 - TTM_BO_VM_NUM_PREFAULT); 1755 - drm_dev_exit(idx); 1756 1916 1757 - if (ret == VM_FAULT_RETRY && 1758 - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 1759 - goto out; 1760 - 1761 - /* 1762 - * ttm_bo_vm_reserve() already has dma_resv_lock. 1763 - */ 1764 - if (ret == VM_FAULT_NOPAGE && 1765 - mem_type_is_vram(tbo->resource->mem_type)) { 1766 - mutex_lock(&xe->mem_access.vram_userfault.lock); 1767 - if (list_empty(&bo->vram_userfault_link)) 1768 - list_add(&bo->vram_userfault_link, 1769 - &xe->mem_access.vram_userfault.list); 1770 - mutex_unlock(&xe->mem_access.vram_userfault.lock); 1917 + err = xe_bo_fault_migrate(bo, &tctx, &exec); 1918 + if (err) { 1919 + drm_exec_retry_on_contention(&exec); 1920 + xe_validation_retry_on_oom(&ctx, &err); 1921 + break; 1771 1922 } 1772 - } else { 1773 - ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); 1923 + 1924 + lerr = dma_resv_wait_timeout(tbo->base.resv, 1925 + DMA_RESV_USAGE_KERNEL, true, 1926 + MAX_SCHEDULE_TIMEOUT); 1927 + if (lerr < 0) { 1928 + err = lerr; 1929 + break; 1930 + } 1931 + 1932 + if (!retry_after_wait) 1933 + ret = __xe_bo_cpu_fault(vmf, xe, bo); 1774 1934 } 1935 + /* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */ 1936 + if (err && !retry_after_wait) 1937 + ret = xe_err_to_fault_t(err); 1775 1938 1776 - dma_resv_unlock(tbo->base.resv); 1777 - out: 1778 1939 if (needs_rpm) 1779 1940 xe_pm_runtime_put(xe); 1941 + 1942 + if (retry_after_wait) 1943 + xe_bo_put(bo); 1944 + out: 1945 + drm_dev_exit(idx); 1780 1946 1781 1947 return ret; 1782 1948 } ··· 1987 1821 } 1988 1822 1989 1823 static const struct vm_operations_struct xe_gem_vm_ops = { 1990 - .fault = xe_gem_fault, 1824 + .fault = xe_bo_cpu_fault, 1991 1825 .open = ttm_bo_vm_open, 1992 1826 .close = ttm_bo_vm_close, 1993 1827 .access = xe_bo_vm_access, ··· 3223 3057 * xe_bo_migrate - Migrate an object to the desired region id 3224 3058 * @bo: The buffer object to migrate. 3225 3059 * @mem_type: The TTM region type to migrate to. 3060 + * @tctx: A pointer to a struct ttm_operation_ctx or NULL if 3061 + * a default interruptibe ctx is to be used. 3226 3062 * @exec: The drm_exec transaction to use for exhaustive eviction. 3227 3063 * 3228 3064 * Attempt to migrate the buffer object to the desired memory region. The ··· 3237 3069 * Return: 0 on success. Negative error code on failure. In particular may 3238 3070 * return -EINTR or -ERESTARTSYS if signal pending. 3239 3071 */ 3240 - int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct drm_exec *exec) 3072 + int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx, 3073 + struct drm_exec *exec) 3241 3074 { 3242 3075 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); 3243 3076 struct ttm_operation_ctx ctx = { ··· 3250 3081 struct ttm_place requested; 3251 3082 3252 3083 xe_bo_assert_held(bo); 3084 + tctx = tctx ? tctx : &ctx; 3253 3085 3254 3086 if (bo->ttm.resource->mem_type == mem_type) 3255 3087 return 0; ··· 3277 3107 add_vram(xe, bo, &requested, bo->flags, mem_type, &c); 3278 3108 } 3279 3109 3280 - xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base); 3281 - return ttm_bo_validate(&bo->ttm, &placement, &ctx); 3110 + if (!tctx->no_wait_gpu) 3111 + xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base); 3112 + return ttm_bo_validate(&bo->ttm, &placement, tctx); 3282 3113 } 3283 3114 3284 3115 /**
+2 -1
drivers/gpu/drm/xe/xe_bo.h
··· 284 284 285 285 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); 286 286 287 - int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct drm_exec *exec); 287 + int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc, 288 + struct drm_exec *exec); 288 289 int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec); 289 290 290 291 int xe_bo_evict_pinned(struct xe_bo *bo);
+3 -3
drivers/gpu/drm/xe/xe_dma_buf.c
··· 64 64 return -EINVAL; 65 65 } 66 66 67 - ret = xe_bo_migrate(bo, XE_PL_TT, exec); 67 + ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); 68 68 if (ret) { 69 69 if (ret != -EINTR && ret != -ERESTARTSYS) 70 70 drm_dbg(&xe->drm, ··· 102 102 103 103 if (!xe_bo_is_pinned(bo)) { 104 104 if (!attach->peer2peer) 105 - r = xe_bo_migrate(bo, XE_PL_TT, exec); 105 + r = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); 106 106 else 107 107 r = xe_bo_validate(bo, NULL, false, exec); 108 108 if (r) ··· 170 170 171 171 /* Can we do interruptible lock here? */ 172 172 xe_bo_lock(bo, false); 173 - (void)xe_bo_migrate(bo, XE_PL_TT, exec); 173 + (void)xe_bo_migrate(bo, XE_PL_TT, NULL, exec); 174 174 xe_bo_unlock(bo); 175 175 176 176 return 0;
+1 -1
drivers/gpu/drm/xe/xe_gt_pagefault.c
··· 87 87 if (!bo) 88 88 return 0; 89 89 90 - return need_vram_move ? xe_bo_migrate(bo, vram->placement, exec) : 90 + return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : 91 91 xe_bo_validate(bo, vm, true, exec); 92 92 } 93 93
+2 -1
drivers/gpu/drm/xe/xe_validation.c
··· 241 241 */ 242 242 void xe_validation_ctx_fini(struct xe_validation_ctx *ctx) 243 243 { 244 - drm_exec_fini(ctx->exec); 244 + if (ctx->exec) 245 + drm_exec_fini(ctx->exec); 245 246 xe_validation_unlock(ctx); 246 247 } 247 248
+1
drivers/gpu/drm/xe/xe_vm.c
··· 2934 2934 if (!err && !xe_vma_has_no_bo(vma)) 2935 2935 err = xe_bo_migrate(xe_vma_bo(vma), 2936 2936 region_to_mem_type[region], 2937 + NULL, 2937 2938 exec); 2938 2939 break; 2939 2940 }