drm/xe/bo: Update atomic_access attribute on madvise

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Update the bo_atomic_access based on user-provided input and determine
the migration to smem during a CPU fault

v2 (Matthew Brost)
- Avoid cpu unmapping if bo is already in smem
- check atomics on smem too for ioctl
- Add comments

v3
- Avoid migration in prefetch

v4 (Matthew Brost)
- make sanity check function bool
- add assert for smem placement
- fix doc

v5 (Matthew Brost)
- NACK atomic fault with DRM_XE_ATOMIC_CPU

Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250821173104.3030148-16-himal.prasad.ghimiray@intel.com
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>

Himal Prasad Ghimiray 7 months ago 293032ee 072e2999

+102 -28

4 changed files

expand all

drivers

gpu

drm

xe_bo.c

xe_gt_pagefault.c

xe_vm.c

xe_vm_madvise.c

+26 -3

drivers/gpu/drm/xe/xe_bo.c

··· 1712 1712 } 1713 1713 } 1714 1714 1715 + static bool should_migrate_to_smem(struct xe_bo *bo) 1716 + { 1717 + /* 1718 + * NOTE: The following atomic checks are platform-specific. For example, 1719 + * if a device supports CXL atomics, these may not be necessary or 1720 + * may behave differently. 1721 + */ 1722 + 1723 + return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL || 1724 + bo->attr.atomic_access == DRM_XE_ATOMIC_CPU; 1725 + } 1726 + 1715 1727 static vm_fault_t xe_gem_fault(struct vm_fault *vmf) 1716 1728 { 1717 1729 struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; ··· 1732 1720 struct xe_bo *bo = ttm_to_xe_bo(tbo); 1733 1721 bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK; 1734 1722 vm_fault_t ret; 1735 - int idx; 1723 + int idx, r = 0; 1736 1724 1737 1725 if (needs_rpm) 1738 1726 xe_pm_runtime_get(xe); ··· 1744 1732 if (drm_dev_enter(ddev, &idx)) { 1745 1733 trace_xe_bo_cpu_fault(bo); 1746 1734 1747 - ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, 1748 - TTM_BO_VM_NUM_PREFAULT); 1735 + if (should_migrate_to_smem(bo)) { 1736 + xe_assert(xe, bo->flags & XE_BO_FLAG_SYSTEM); 1737 + 1738 + r = xe_bo_migrate(bo, XE_PL_TT); 1739 + if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) 1740 + ret = VM_FAULT_NOPAGE; 1741 + else if (r) 1742 + ret = VM_FAULT_SIGBUS; 1743 + } 1744 + if (!ret) 1745 + ret = ttm_bo_vm_fault_reserved(vmf, 1746 + vmf->vma->vm_page_prot, 1747 + TTM_BO_VM_NUM_PREFAULT); 1749 1748 drm_dev_exit(idx); 1750 1749 1751 1750 if (ret == VM_FAULT_RETRY &&

+13 -22

drivers/gpu/drm/xe/xe_gt_pagefault.c

··· 75 75 } 76 76 77 77 static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, 78 - bool atomic, struct xe_vram_region *vram) 78 + bool need_vram_move, struct xe_vram_region *vram) 79 79 { 80 80 struct xe_bo *bo = xe_vma_bo(vma); 81 81 struct xe_vm *vm = xe_vma_vm(vma); ··· 85 85 if (err) 86 86 return err; 87 87 88 - if (atomic && vram) { 89 - xe_assert(vm->xe, IS_DGFX(vm->xe)); 88 + if (!bo) 89 + return 0; 90 90 91 - if (xe_vma_is_userptr(vma)) { 92 - err = -EACCES; 93 - return err; 94 - } 91 + err = need_vram_move ? xe_bo_migrate(bo, vram->placement) : 92 + xe_bo_validate(bo, vm, true); 95 93 96 - /* Migrate to VRAM, move should invalidate the VMA first */ 97 - err = xe_bo_migrate(bo, vram->placement); 98 - if (err) 99 - return err; 100 - } else if (bo) { 101 - /* Create backing store if needed */ 102 - err = xe_bo_validate(bo, vm, true); 103 - if (err) 104 - return err; 105 - } 106 - 107 - return 0; 94 + return err; 108 95 } 109 96 110 97 static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, ··· 102 115 struct drm_exec exec; 103 116 struct dma_fence *fence; 104 117 ktime_t end = 0; 105 - int err; 118 + int err, needs_vram; 106 119 107 120 lockdep_assert_held_write(&vm->lock); 121 + 122 + needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); 123 + if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma))) 124 + return needs_vram < 0 ? needs_vram : -EACCES; 108 125 109 126 xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); 110 127 xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024); ··· 132 141 /* Lock VM and BOs dma-resv */ 133 142 drm_exec_init(&exec, 0, 0); 134 143 drm_exec_until_all_locked(&exec) { 135 - err = xe_pf_begin(&exec, vma, atomic, tile->mem.vram); 144 + err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram); 136 145 drm_exec_retry_on_contention(&exec); 137 146 if (xe_vm_validate_should_retry(&exec, err, &end)) 138 147 err = -EAGAIN; ··· 567 576 /* Lock VM and BOs dma-resv */ 568 577 drm_exec_init(&exec, 0, 0); 569 578 drm_exec_until_all_locked(&exec) { 570 - ret = xe_pf_begin(&exec, vma, true, tile->mem.vram); 579 + ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram); 571 580 drm_exec_retry_on_contention(&exec); 572 581 if (ret) 573 582 break;

+5 -2

drivers/gpu/drm/xe/xe_vm.c

··· 4242 4242 */ 4243 4243 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) 4244 4244 { 4245 + u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : 4246 + vma->attr.atomic_access; 4247 + 4245 4248 if (!IS_DGFX(xe) || !is_atomic) 4246 - return 0; 4249 + return false; 4247 4250 4248 4251 /* 4249 4252 * NOTE: The checks implemented here are platform-specific. For 4250 4253 * instance, on a device supporting CXL atomics, these would ideally 4251 4254 * work universally without additional handling. 4252 4255 */ 4253 - switch (vma->attr.atomic_access) { 4256 + switch (atomic_access) { 4254 4257 case DRM_XE_ATOMIC_DEVICE: 4255 4258 return !xe->info.has_device_atomics_on_smem; 4256 4259

+58 -1

drivers/gpu/drm/xe/xe_vm_madvise.c

··· 102 102 struct xe_vma **vmas, int num_vmas, 103 103 struct drm_xe_madvise *op) 104 104 { 105 + struct xe_bo *bo; 105 106 int i; 106 107 107 108 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC); ··· 115 114 continue; 116 115 117 116 vmas[i]->attr.atomic_access = op->atomic.val; 118 - /*TODO: handle bo backed vmas */ 117 + 118 + bo = xe_vma_bo(vmas[i]); 119 + if (!bo) 120 + continue; 121 + 122 + xe_bo_assert_held(bo); 123 + bo->attr.atomic_access = op->atomic.val; 124 + 125 + /* Invalidate cpu page table, so bo can migrate to smem in next access */ 126 + if (xe_bo_is_vram(bo) && 127 + (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU || 128 + bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL)) 129 + ttm_bo_unmap_virtual(&bo->ttm); 119 130 } 120 131 } 121 132 ··· 275 262 return true; 276 263 } 277 264 265 + static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, 266 + int num_vmas, u32 atomic_val) 267 + { 268 + struct xe_device *xe = vm->xe; 269 + struct xe_bo *bo; 270 + int i; 271 + 272 + for (i = 0; i < num_vmas; i++) { 273 + bo = xe_vma_bo(vmas[i]); 274 + if (!bo) 275 + continue; 276 + /* 277 + * NOTE: The following atomic checks are platform-specific. For example, 278 + * if a device supports CXL atomics, these may not be necessary or 279 + * may behave differently. 280 + */ 281 + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU && 282 + !(bo->flags & XE_BO_FLAG_SYSTEM))) 283 + return false; 284 + 285 + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE && 286 + !(bo->flags & XE_BO_FLAG_VRAM0) && 287 + !(bo->flags & XE_BO_FLAG_VRAM1) && 288 + !(bo->flags & XE_BO_FLAG_SYSTEM && 289 + xe->info.has_device_atomics_on_smem))) 290 + return false; 291 + 292 + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL && 293 + (!(bo->flags & XE_BO_FLAG_SYSTEM) || 294 + (!(bo->flags & XE_BO_FLAG_VRAM0) && 295 + !(bo->flags & XE_BO_FLAG_VRAM1))))) 296 + return false; 297 + } 298 + return true; 299 + } 278 300 /** 279 301 * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM 280 302 * @dev: DRM device pointer ··· 361 313 goto unlock_vm; 362 314 363 315 if (madvise_range.has_bo_vmas) { 316 + if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { 317 + if (!check_bo_args_are_sane(vm, madvise_range.vmas, 318 + madvise_range.num_vmas, 319 + args->atomic.val)) { 320 + err = -EINVAL; 321 + goto unlock_vm; 322 + } 323 + } 324 + 364 325 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 365 326 drm_exec_until_all_locked(&exec) { 366 327 for (int i = 0; i < madvise_range.num_vmas; i++) {