Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe: Add staging tree for VM binds

Concurrent VM bind staging and zapping of PTEs from a userptr notifier
do not work because the view of PTEs is not stable. VM binds cannot
acquire the notifier lock during staging, as memory allocations are
required. To resolve this race condition, use a staging tree for VM
binds that is committed only under the userptr notifier lock during the
final step of the bind. This ensures a consistent view of the PTEs in
the userptr notifier.

A follow up may only use staging for VM in fault mode as this is the
only mode in which the above race exists.

v3:
- Drop zap PTE change (Thomas)
- s/xe_pt_entry/xe_pt_entry_staging (Thomas)

Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: <stable@vger.kernel.org>
Fixes: e8babb280b5e ("drm/xe: Convert multiple bind ops into single job")
Fixes: a708f6501c69 ("drm/xe: Update PT layer with better error handling")
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-5-thomas.hellstrom@linux.intel.com
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
(cherry picked from commit 6f39b0c5ef0385eae586760d10b9767168037aa5)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

authored by

Matthew Brost and committed by
Rodrigo Vivi
ae482ec8 84211b1c

+46 -19
+40 -18
drivers/gpu/drm/xe/xe_pt.c
··· 28 28 struct xe_pt pt; 29 29 /** @children: Array of page-table child nodes */ 30 30 struct xe_ptw *children[XE_PDES]; 31 + /** @staging: Array of page-table staging nodes */ 32 + struct xe_ptw *staging[XE_PDES]; 31 33 }; 32 34 33 35 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) ··· 50 48 return container_of(pt, struct xe_pt_dir, pt); 51 49 } 52 50 53 - static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) 51 + static struct xe_pt * 52 + xe_pt_entry_staging(struct xe_pt_dir *pt_dir, unsigned int index) 54 53 { 55 - return container_of(pt_dir->children[index], struct xe_pt, base); 54 + return container_of(pt_dir->staging[index], struct xe_pt, base); 56 55 } 57 56 58 57 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, ··· 128 125 } 129 126 pt->bo = bo; 130 127 pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; 128 + pt->base.staging = level ? as_xe_pt_dir(pt)->staging : NULL; 131 129 132 130 if (vm->xef) 133 131 xe_drm_client_add_bo(vm->xef->client, pt->bo); ··· 210 206 struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); 211 207 212 208 for (i = 0; i < XE_PDES; i++) { 213 - if (xe_pt_entry(pt_dir, i)) 214 - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, 209 + if (xe_pt_entry_staging(pt_dir, i)) 210 + xe_pt_destroy(xe_pt_entry_staging(pt_dir, i), flags, 215 211 deferred); 216 212 } 217 213 } ··· 380 376 /* Continue building a non-connected subtree. */ 381 377 struct iosys_map *map = &parent->bo->vmap; 382 378 383 - if (unlikely(xe_child)) 379 + if (unlikely(xe_child)) { 384 380 parent->base.children[offset] = &xe_child->base; 381 + parent->base.staging[offset] = &xe_child->base; 382 + } 385 383 386 384 xe_pt_write(xe_walk->vm->xe, map, offset, pte); 387 385 parent->num_live++; ··· 620 614 .ops = &xe_pt_stage_bind_ops, 621 615 .shifts = xe_normal_pt_shifts, 622 616 .max_level = XE_PT_HIGHEST_LEVEL, 617 + .staging = true, 623 618 }, 624 619 .vm = xe_vma_vm(vma), 625 620 .tile = tile, ··· 880 873 } 881 874 } 882 875 883 - static void xe_pt_commit_locks_assert(struct xe_vma *vma) 876 + static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma) 884 877 { 885 878 struct xe_vm *vm = xe_vma_vm(vma); 886 879 ··· 890 883 dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); 891 884 892 885 xe_vm_assert_held(vm); 886 + } 887 + 888 + static void xe_pt_commit_locks_assert(struct xe_vma *vma) 889 + { 890 + struct xe_vm *vm = xe_vma_vm(vma); 891 + 892 + xe_pt_commit_prepare_locks_assert(vma); 893 + 894 + if (xe_vma_is_userptr(vma)) 895 + lockdep_assert_held_read(&vm->userptr.notifier_lock); 893 896 } 894 897 895 898 static void xe_pt_commit(struct xe_vma *vma, ··· 912 895 913 896 for (i = 0; i < num_entries; i++) { 914 897 struct xe_pt *pt = entries[i].pt; 898 + struct xe_pt_dir *pt_dir; 915 899 916 900 if (!pt->level) 917 901 continue; 918 902 903 + pt_dir = as_xe_pt_dir(pt); 919 904 for (j = 0; j < entries[i].qwords; j++) { 920 905 struct xe_pt *oldpte = entries[i].pt_entries[j].pt; 906 + int j_ = j + entries[i].ofs; 921 907 908 + pt_dir->children[j_] = pt_dir->staging[j_]; 922 909 xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred); 923 910 } 924 911 } ··· 934 913 { 935 914 int i, j; 936 915 937 - xe_pt_commit_locks_assert(vma); 916 + xe_pt_commit_prepare_locks_assert(vma); 938 917 939 918 for (i = num_entries - 1; i >= 0; --i) { 940 919 struct xe_pt *pt = entries[i].pt; ··· 949 928 pt_dir = as_xe_pt_dir(pt); 950 929 for (j = 0; j < entries[i].qwords; j++) { 951 930 u32 j_ = j + entries[i].ofs; 952 - struct xe_pt *newpte = xe_pt_entry(pt_dir, j_); 931 + struct xe_pt *newpte = xe_pt_entry_staging(pt_dir, j_); 953 932 struct xe_pt *oldpte = entries[i].pt_entries[j].pt; 954 933 955 - pt_dir->children[j_] = oldpte ? &oldpte->base : 0; 934 + pt_dir->staging[j_] = oldpte ? &oldpte->base : 0; 956 935 xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL); 957 936 } 958 937 } ··· 964 943 { 965 944 u32 i, j; 966 945 967 - xe_pt_commit_locks_assert(vma); 946 + xe_pt_commit_prepare_locks_assert(vma); 968 947 969 948 for (i = 0; i < num_entries; i++) { 970 949 struct xe_pt *pt = entries[i].pt; ··· 982 961 struct xe_pt *newpte = entries[i].pt_entries[j].pt; 983 962 struct xe_pt *oldpte = NULL; 984 963 985 - if (xe_pt_entry(pt_dir, j_)) 986 - oldpte = xe_pt_entry(pt_dir, j_); 964 + if (xe_pt_entry_staging(pt_dir, j_)) 965 + oldpte = xe_pt_entry_staging(pt_dir, j_); 987 966 988 - pt_dir->children[j_] = &newpte->base; 967 + pt_dir->staging[j_] = &newpte->base; 989 968 entries[i].pt_entries[j].pt = oldpte; 990 969 } 991 970 } ··· 1515 1494 .ops = &xe_pt_stage_unbind_ops, 1516 1495 .shifts = xe_normal_pt_shifts, 1517 1496 .max_level = XE_PT_HIGHEST_LEVEL, 1497 + .staging = true, 1518 1498 }, 1519 1499 .tile = tile, 1520 1500 .modified_start = xe_vma_start(vma), ··· 1557 1535 { 1558 1536 int i, j; 1559 1537 1560 - xe_pt_commit_locks_assert(vma); 1538 + xe_pt_commit_prepare_locks_assert(vma); 1561 1539 1562 1540 for (i = num_entries - 1; i >= 0; --i) { 1563 1541 struct xe_vm_pgtable_update *entry = &entries[i]; ··· 1570 1548 continue; 1571 1549 1572 1550 for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) 1573 - pt_dir->children[j] = 1551 + pt_dir->staging[j] = 1574 1552 entries[i].pt_entries[j - entry->ofs].pt ? 1575 1553 &entries[i].pt_entries[j - entry->ofs].pt->base : NULL; 1576 1554 } ··· 1583 1561 { 1584 1562 int i, j; 1585 1563 1586 - xe_pt_commit_locks_assert(vma); 1564 + xe_pt_commit_prepare_locks_assert(vma); 1587 1565 1588 1566 for (i = 0; i < num_entries; ++i) { 1589 1567 struct xe_vm_pgtable_update *entry = &entries[i]; ··· 1597 1575 pt_dir = as_xe_pt_dir(pt); 1598 1576 for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) { 1599 1577 entry->pt_entries[j - entry->ofs].pt = 1600 - xe_pt_entry(pt_dir, j); 1601 - pt_dir->children[j] = NULL; 1578 + xe_pt_entry_staging(pt_dir, j); 1579 + pt_dir->staging[j] = NULL; 1602 1580 } 1603 1581 } 1604 1582 }
+2 -1
drivers/gpu/drm/xe/xe_pt_walk.c
··· 74 74 u64 addr, u64 end, struct xe_pt_walk *walk) 75 75 { 76 76 pgoff_t offset = xe_pt_offset(addr, level, walk); 77 - struct xe_ptw **entries = parent->children ? parent->children : NULL; 77 + struct xe_ptw **entries = walk->staging ? (parent->staging ?: NULL) : 78 + (parent->children ?: NULL); 78 79 const struct xe_pt_walk_ops *ops = walk->ops; 79 80 enum page_walk_action action; 80 81 struct xe_ptw *child;
+4
drivers/gpu/drm/xe/xe_pt_walk.h
··· 11 11 /** 12 12 * struct xe_ptw - base class for driver pagetable subclassing. 13 13 * @children: Pointer to an array of children if any. 14 + * @staging: Pointer to an array of staging if any. 14 15 * 15 16 * Drivers could subclass this, and if it's a page-directory, typically 16 17 * embed an array of xe_ptw pointers. 17 18 */ 18 19 struct xe_ptw { 19 20 struct xe_ptw **children; 21 + struct xe_ptw **staging; 20 22 }; 21 23 22 24 /** ··· 43 41 * as shared pagetables. 44 42 */ 45 43 bool shared_pt_mode; 44 + /** @staging: Walk staging PT structure */ 45 + bool staging; 46 46 }; 47 47 48 48 /**