drm/xe: Split TLB invalidation code in frontend and backend

+1

drivers/gpu/drm/xe/Makefile

··· 75 75 xe_guc_log.o \ 76 76 xe_guc_pc.o \ 77 77 xe_guc_submit.o \ 78 + xe_guc_tlb_inval.o \ 78 79 xe_heci_gsc.o \ 79 80 xe_huc.o \ 80 81 xe_hw_engine.o \

-2

drivers/gpu/drm/xe/xe_gt.c

··· 603 603 struct xe_gt *gt = arg; 604 604 int i; 605 605 606 - xe_gt_tlb_inval_fini(gt); 607 - 608 606 for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) 609 607 xe_hw_fence_irq_finish(&gt->fence_irq[i]); 610 608

+1 -1

drivers/gpu/drm/xe/xe_guc_ct.c

··· 30 30 #include "xe_guc_log.h" 31 31 #include "xe_guc_relay.h" 32 32 #include "xe_guc_submit.h" 33 + #include "xe_guc_tlb_inval.h" 33 34 #include "xe_map.h" 34 35 #include "xe_pm.h" 35 - #include "xe_tlb_inval.h" 36 36 #include "xe_trace_guc.h" 37 37 38 38 static void receive_g2h(struct xe_guc_ct *ct);

+242

drivers/gpu/drm/xe/xe_guc_tlb_inval.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include "abi/guc_actions_abi.h" 7 + 8 + #include "xe_device.h" 9 + #include "xe_gt_stats.h" 10 + #include "xe_gt_types.h" 11 + #include "xe_guc.h" 12 + #include "xe_guc_ct.h" 13 + #include "xe_guc_tlb_inval.h" 14 + #include "xe_force_wake.h" 15 + #include "xe_mmio.h" 16 + #include "xe_tlb_inval.h" 17 + 18 + #include "regs/xe_guc_regs.h" 19 + 20 + /* 21 + * XXX: The seqno algorithm relies on TLB invalidation being processed in order 22 + * which they currently are by the GuC, if that changes the algorithm will need 23 + * to be updated. 24 + */ 25 + 26 + static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len) 27 + { 28 + struct xe_gt *gt = guc_to_gt(guc); 29 + 30 + xe_gt_assert(gt, action[1]); /* Seqno */ 31 + 32 + xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); 33 + return xe_guc_ct_send(&guc->ct, action, len, 34 + G2H_LEN_DW_TLB_INVALIDATE, 1); 35 + } 36 + 37 + #define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ 38 + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ 39 + XE_GUC_TLB_INVAL_FLUSH_CACHE) 40 + 41 + static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno) 42 + { 43 + struct xe_guc *guc = tlb_inval->private; 44 + u32 action[] = { 45 + XE_GUC_ACTION_TLB_INVALIDATION_ALL, 46 + seqno, 47 + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), 48 + }; 49 + 50 + return send_tlb_inval(guc, action, ARRAY_SIZE(action)); 51 + } 52 + 53 + static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno) 54 + { 55 + struct xe_guc *guc = tlb_inval->private; 56 + struct xe_gt *gt = guc_to_gt(guc); 57 + struct xe_device *xe = guc_to_xe(guc); 58 + 59 + /* 60 + * Returning -ECANCELED in this function is squashed at the caller and 61 + * signals waiters. 62 + */ 63 + 64 + if (xe_guc_ct_enabled(&guc->ct) && guc->submission_state.enabled) { 65 + u32 action[] = { 66 + XE_GUC_ACTION_TLB_INVALIDATION, 67 + seqno, 68 + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), 69 + }; 70 + 71 + return send_tlb_inval(guc, action, ARRAY_SIZE(action)); 72 + } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { 73 + struct xe_mmio *mmio = &gt->mmio; 74 + unsigned int fw_ref; 75 + 76 + if (IS_SRIOV_VF(xe)) 77 + return -ECANCELED; 78 + 79 + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 80 + if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { 81 + xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, 82 + PVC_GUC_TLB_INV_DESC1_INVALIDATE); 83 + xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, 84 + PVC_GUC_TLB_INV_DESC0_VALID); 85 + } else { 86 + xe_mmio_write32(mmio, GUC_TLB_INV_CR, 87 + GUC_TLB_INV_CR_INVALIDATE); 88 + } 89 + xe_force_wake_put(gt_to_fw(gt), fw_ref); 90 + } 91 + 92 + return -ECANCELED; 93 + } 94 + 95 + /* 96 + * Ensure that roundup_pow_of_two(length) doesn't overflow. 97 + * Note that roundup_pow_of_two() operates on unsigned long, 98 + * not on u64. 99 + */ 100 + #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) 101 + 102 + static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, 103 + u64 start, u64 end, u32 asid) 104 + { 105 + #define MAX_TLB_INVALIDATION_LEN 7 106 + struct xe_guc *guc = tlb_inval->private; 107 + struct xe_gt *gt = guc_to_gt(guc); 108 + u32 action[MAX_TLB_INVALIDATION_LEN]; 109 + u64 length = end - start; 110 + int len = 0; 111 + 112 + if (guc_to_xe(guc)->info.force_execlist) 113 + return -ECANCELED; 114 + 115 + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; 116 + action[len++] = seqno; 117 + if (!gt_to_xe(gt)->info.has_range_tlb_inval || 118 + length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { 119 + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); 120 + } else { 121 + u64 orig_start = start; 122 + u64 align; 123 + 124 + if (length < SZ_4K) 125 + length = SZ_4K; 126 + 127 + /* 128 + * We need to invalidate a higher granularity if start address 129 + * is not aligned to length. When start is not aligned with 130 + * length we need to find the length large enough to create an 131 + * address mask covering the required range. 132 + */ 133 + align = roundup_pow_of_two(length); 134 + start = ALIGN_DOWN(start, align); 135 + end = ALIGN(end, align); 136 + length = align; 137 + while (start + length < end) { 138 + length <<= 1; 139 + start = ALIGN_DOWN(orig_start, length); 140 + } 141 + 142 + /* 143 + * Minimum invalidation size for a 2MB page that the hardware 144 + * expects is 16MB 145 + */ 146 + if (length >= SZ_2M) { 147 + length = max_t(u64, SZ_16M, length); 148 + start = ALIGN_DOWN(orig_start, length); 149 + } 150 + 151 + xe_gt_assert(gt, length >= SZ_4K); 152 + xe_gt_assert(gt, is_power_of_2(length)); 153 + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, 154 + ilog2(SZ_2M) + 1))); 155 + xe_gt_assert(gt, IS_ALIGNED(start, length)); 156 + 157 + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); 158 + action[len++] = asid; 159 + action[len++] = lower_32_bits(start); 160 + action[len++] = upper_32_bits(start); 161 + action[len++] = ilog2(length) - ilog2(SZ_4K); 162 + } 163 + 164 + xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); 165 + 166 + return send_tlb_inval(guc, action, len); 167 + } 168 + 169 + static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval) 170 + { 171 + struct xe_guc *guc = tlb_inval->private; 172 + 173 + return xe_guc_ct_initialized(&guc->ct); 174 + } 175 + 176 + static void tlb_inval_flush(struct xe_tlb_inval *tlb_inval) 177 + { 178 + struct xe_guc *guc = tlb_inval->private; 179 + 180 + LNL_FLUSH_WORK(&guc->ct.g2h_worker); 181 + } 182 + 183 + static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval) 184 + { 185 + struct xe_guc *guc = tlb_inval->private; 186 + 187 + /* this reflects what HW/GuC needs to process TLB inv request */ 188 + const long hw_tlb_timeout = HZ / 4; 189 + 190 + /* this estimates actual delay caused by the CTB transport */ 191 + long delay = xe_guc_ct_queue_proc_time_jiffies(&guc->ct); 192 + 193 + return hw_tlb_timeout + 2 * delay; 194 + } 195 + 196 + static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { 197 + .all = send_tlb_inval_all, 198 + .ggtt = send_tlb_inval_ggtt, 199 + .ppgtt = send_tlb_inval_ppgtt, 200 + .initialized = tlb_inval_initialized, 201 + .flush = tlb_inval_flush, 202 + .timeout_delay = tlb_inval_timeout_delay, 203 + }; 204 + 205 + /** 206 + * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early 207 + * @guc: GuC object 208 + * @tlb_inval: TLB invalidation client 209 + * 210 + * Inititialize GuC TLB invalidation by setting back pointer in TLB invalidation 211 + * client to the GuC and setting GuC backend ops. 212 + */ 213 + void xe_guc_tlb_inval_init_early(struct xe_guc *guc, 214 + struct xe_tlb_inval *tlb_inval) 215 + { 216 + tlb_inval->private = guc; 217 + tlb_inval->ops = &guc_tlb_inval_ops; 218 + } 219 + 220 + /** 221 + * xe_guc_tlb_inval_done_handler() - TLB invalidation done handler 222 + * @guc: guc 223 + * @msg: message indicating TLB invalidation done 224 + * @len: length of message 225 + * 226 + * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any 227 + * invalidation fences for seqno. Algorithm for this depends on seqno being 228 + * received in-order and asserts this assumption. 229 + * 230 + * Return: 0 on success, -EPROTO for malformed messages. 231 + */ 232 + int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 233 + { 234 + struct xe_gt *gt = guc_to_gt(guc); 235 + 236 + if (unlikely(len != 1)) 237 + return -EPROTO; 238 + 239 + xe_tlb_inval_done_handler(&gt->tlb_inval, msg[0]); 240 + 241 + return 0; 242 + }

+19

drivers/gpu/drm/xe/xe_guc_tlb_inval.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GUC_TLB_INVAL_H_ 7 + #define _XE_GUC_TLB_INVAL_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct xe_guc; 12 + struct xe_tlb_inval; 13 + 14 + void xe_guc_tlb_inval_init_early(struct xe_guc *guc, 15 + struct xe_tlb_inval *tlb_inval); 16 + 17 + int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len); 18 + 19 + #endif

+161 -376

drivers/gpu/drm/xe/xe_tlb_inval.c

··· 12 12 #include "xe_gt_printk.h" 13 13 #include "xe_guc.h" 14 14 #include "xe_guc_ct.h" 15 + #include "xe_guc_tlb_inval.h" 15 16 #include "xe_gt_stats.h" 16 17 #include "xe_tlb_inval.h" 17 18 #include "xe_mmio.h" 18 19 #include "xe_pm.h" 19 - #include "xe_sriov.h" 20 + #include "xe_tlb_inval.h" 20 21 #include "xe_trace.h" 21 - #include "regs/xe_guc_regs.h" 22 + 23 + /** 24 + * DOC: Xe TLB invalidation 25 + * 26 + * Xe TLB invalidation is implemented in two layers. The first is the frontend 27 + * API, which provides an interface for TLB invalidations to the driver code. 28 + * The frontend handles seqno assignment, synchronization (fences), and the 29 + * timeout mechanism. The frontend is implemented via an embedded structure 30 + * xe_tlb_inval that includes a set of ops hooking into the backend. The backend 31 + * interacts with the hardware (or firmware) to perform the actual invalidation. 32 + */ 22 33 23 34 #define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS 24 35 25 - /* 26 - * TLB inval depends on pending commands in the CT queue and then the real 27 - * invalidation time. Double up the time to process full CT queue 28 - * just to be on the safe side. 29 - */ 30 - static long tlb_timeout_jiffies(struct xe_gt *gt) 31 - { 32 - /* this reflects what HW/GuC needs to process TLB inv request */ 33 - const long hw_tlb_timeout = HZ / 4; 34 - 35 - /* this estimates actual delay caused by the CTB transport */ 36 - long delay = xe_guc_ct_queue_proc_time_jiffies(&gt->uc.guc.ct); 37 - 38 - return hw_tlb_timeout + 2 * delay; 39 - } 40 - 41 36 static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence) 42 37 { 43 - struct xe_gt *gt; 44 - 45 38 if (WARN_ON_ONCE(!fence->tlb_inval)) 46 39 return; 47 40 48 - gt = fence->tlb_inval->private; 49 - 50 - xe_pm_runtime_put(gt_to_xe(gt)); 41 + xe_pm_runtime_put(fence->tlb_inval->xe); 51 42 fence->tlb_inval = NULL; /* fini() should be called once */ 52 43 } 53 44 54 45 static void 55 - __inval_fence_signal(struct xe_device *xe, struct xe_tlb_inval_fence *fence) 46 + xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence) 56 47 { 57 48 bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); 58 49 59 - trace_xe_tlb_inval_fence_signal(xe, fence); 50 + lockdep_assert_held(&fence->tlb_inval->pending_lock); 51 + 52 + list_del(&fence->link); 53 + trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence); 60 54 xe_tlb_inval_fence_fini(fence); 61 55 dma_fence_signal(&fence->base); 62 56 if (!stack) ··· 58 64 } 59 65 60 66 static void 61 - inval_fence_signal(struct xe_device *xe, struct xe_tlb_inval_fence *fence) 67 + xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence *fence) 62 68 { 63 - lockdep_assert_held(&fence->tlb_inval->pending_lock); 69 + struct xe_tlb_inval *tlb_inval = fence->tlb_inval; 64 70 65 - list_del(&fence->link); 66 - __inval_fence_signal(xe, fence); 71 + spin_lock_irq(&tlb_inval->pending_lock); 72 + xe_tlb_inval_fence_signal(fence); 73 + spin_unlock_irq(&tlb_inval->pending_lock); 67 74 } 68 75 69 - static void 70 - inval_fence_signal_unlocked(struct xe_device *xe, 71 - struct xe_tlb_inval_fence *fence) 76 + static void xe_tlb_inval_fence_timeout(struct work_struct *work) 72 77 { 73 - spin_lock_irq(&fence->tlb_inval->pending_lock); 74 - inval_fence_signal(xe, fence); 75 - spin_unlock_irq(&fence->tlb_inval->pending_lock); 76 - } 77 - 78 - static void xe_gt_tlb_fence_timeout(struct work_struct *work) 79 - { 80 - struct xe_gt *gt = container_of(work, struct xe_gt, 81 - tlb_inval.fence_tdr.work); 82 - struct xe_device *xe = gt_to_xe(gt); 78 + struct xe_tlb_inval *tlb_inval = container_of(work, struct xe_tlb_inval, 79 + fence_tdr.work); 80 + struct xe_device *xe = tlb_inval->xe; 83 81 struct xe_tlb_inval_fence *fence, *next; 82 + long timeout_delay = tlb_inval->ops->timeout_delay(tlb_inval); 84 83 85 - LNL_FLUSH_WORK(&gt->uc.guc.ct.g2h_worker); 84 + tlb_inval->ops->flush(tlb_inval); 86 85 87 - spin_lock_irq(&gt->tlb_inval.pending_lock); 86 + spin_lock_irq(&tlb_inval->pending_lock); 88 87 list_for_each_entry_safe(fence, next, 89 - &gt->tlb_inval.pending_fences, link) { 88 + &tlb_inval->pending_fences, link) { 90 89 s64 since_inval_ms = ktime_ms_delta(ktime_get(), 91 90 fence->inval_time); 92 91 93 - if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt)) 92 + if (msecs_to_jiffies(since_inval_ms) < timeout_delay) 94 93 break; 95 94 96 95 trace_xe_tlb_inval_fence_timeout(xe, fence); 97 - xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", 98 - fence->seqno, gt->tlb_inval.seqno_recv); 96 + drm_err(&xe->drm, 97 + "TLB invalidation fence timeout, seqno=%d recv=%d", 98 + fence->seqno, tlb_inval->seqno_recv); 99 99 100 100 fence->base.error = -ETIME; 101 - inval_fence_signal(xe, fence); 101 + xe_tlb_inval_fence_signal(fence); 102 102 } 103 - if (!list_empty(&gt->tlb_inval.pending_fences)) 104 - queue_delayed_work(system_wq, 105 - &gt->tlb_inval.fence_tdr, 106 - tlb_timeout_jiffies(gt)); 107 - spin_unlock_irq(&gt->tlb_inval.pending_lock); 103 + if (!list_empty(&tlb_inval->pending_fences)) 104 + queue_delayed_work(system_wq, &tlb_inval->fence_tdr, 105 + timeout_delay); 106 + spin_unlock_irq(&tlb_inval->pending_lock); 108 107 } 109 108 110 109 /** 111 - * xe_gt_tlb_inval_init_early - Initialize GT TLB invalidation state 110 + * tlb_inval_fini - Clean up TLB invalidation state 111 + * @drm: @drm_device 112 + * @arg: pointer to struct @xe_tlb_inval 113 + * 114 + * Cancel pending fence workers and clean up any additional 115 + * TLB invalidation state. 116 + */ 117 + static void tlb_inval_fini(struct drm_device *drm, void *arg) 118 + { 119 + struct xe_tlb_inval *tlb_inval = arg; 120 + 121 + xe_tlb_inval_reset(tlb_inval); 122 + } 123 + 124 + /** 125 + * xe_gt_tlb_inval_init - Initialize TLB invalidation state 112 126 * @gt: GT structure 113 127 * 114 128 * Initialize TLB invalidation state, purely software initialization, should ··· 127 125 int xe_gt_tlb_inval_init_early(struct xe_gt *gt) 128 126 { 129 127 struct xe_device *xe = gt_to_xe(gt); 128 + struct xe_tlb_inval *tlb_inval = &gt->tlb_inval; 130 129 int err; 131 130 132 - gt->tlb_inval.private = gt; 133 - gt->tlb_inval.seqno = 1; 134 - INIT_LIST_HEAD(&gt->tlb_inval.pending_fences); 135 - spin_lock_init(&gt->tlb_inval.pending_lock); 136 - spin_lock_init(&gt->tlb_inval.lock); 137 - INIT_DELAYED_WORK(&gt->tlb_inval.fence_tdr, 138 - xe_gt_tlb_fence_timeout); 131 + tlb_inval->xe = xe; 132 + tlb_inval->seqno = 1; 133 + INIT_LIST_HEAD(&tlb_inval->pending_fences); 134 + spin_lock_init(&tlb_inval->pending_lock); 135 + spin_lock_init(&tlb_inval->lock); 136 + INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout); 139 137 140 - err = drmm_mutex_init(&xe->drm, &gt->tlb_inval.seqno_lock); 138 + err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock); 141 139 if (err) 142 140 return err; 143 141 144 - gt->tlb_inval.job_wq = 145 - drmm_alloc_ordered_workqueue(&gt_to_xe(gt)->drm, "gt-tbl-inval-job-wq", 146 - WQ_MEM_RECLAIM); 147 - if (IS_ERR(gt->tlb_inval.job_wq)) 148 - return PTR_ERR(gt->tlb_inval.job_wq); 142 + tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm, 143 + "gt-tbl-inval-job-wq", 144 + WQ_MEM_RECLAIM); 145 + if (IS_ERR(tlb_inval->job_wq)) 146 + return PTR_ERR(tlb_inval->job_wq); 149 147 150 - return 0; 148 + /* XXX: Blindly setting up backend to GuC */ 149 + xe_guc_tlb_inval_init_early(&gt->uc.guc, tlb_inval); 150 + 151 + return drmm_add_action_or_reset(&xe->drm, tlb_inval_fini, tlb_inval); 151 152 } 152 153 153 154 /** 154 - * xe_tlb_inval_reset - Initialize TLB invalidation reset 155 + * xe_tlb_inval_reset() - TLB invalidation reset 155 156 * @tlb_inval: TLB invalidation client 156 157 * 157 158 * Signal any pending invalidation fences, should be called during a GT reset 158 159 */ 159 160 void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval) 160 161 { 161 - struct xe_gt *gt = tlb_inval->private; 162 162 struct xe_tlb_inval_fence *fence, *next; 163 163 int pending_seqno; 164 164 165 165 /* 166 - * we can get here before the CTs are even initialized if we're wedging 167 - * very early, in which case there are not going to be any pending 168 - * fences so we can bail immediately. 166 + * we can get here before the backends are even initialized if we're 167 + * wedging very early, in which case there are not going to be any 168 + * pendind fences so we can bail immediately. 169 169 */ 170 - if (!xe_guc_ct_initialized(&gt->uc.guc.ct)) 170 + if (!tlb_inval->ops->initialized(tlb_inval)) 171 171 return; 172 172 173 173 /* 174 - * CT channel is already disabled at this point. No new TLB requests can 174 + * Backend is already disabled at this point. No new TLB requests can 175 175 * appear. 176 176 */ 177 177 178 - mutex_lock(&gt->tlb_inval.seqno_lock); 179 - spin_lock_irq(&gt->tlb_inval.pending_lock); 180 - cancel_delayed_work(&gt->tlb_inval.fence_tdr); 178 + mutex_lock(&tlb_inval->seqno_lock); 179 + spin_lock_irq(&tlb_inval->pending_lock); 180 + cancel_delayed_work(&tlb_inval->fence_tdr); 181 181 /* 182 182 * We might have various kworkers waiting for TLB flushes to complete 183 183 * which are not tracked with an explicit TLB fence, however at this 184 - * stage that will never happen since the CT is already disabled, so 185 - * make sure we signal them here under the assumption that we have 184 + * stage that will never happen since the backend is already disabled, 185 + * so make sure we signal them here under the assumption that we have 186 186 * completed a full GT reset. 187 187 */ 188 - if (gt->tlb_inval.seqno == 1) 188 + if (tlb_inval->seqno == 1) 189 189 pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; 190 190 else 191 - pending_seqno = gt->tlb_inval.seqno - 1; 192 - WRITE_ONCE(gt->tlb_inval.seqno_recv, pending_seqno); 191 + pending_seqno = tlb_inval->seqno - 1; 192 + WRITE_ONCE(tlb_inval->seqno_recv, pending_seqno); 193 193 194 194 list_for_each_entry_safe(fence, next, 195 - &gt->tlb_inval.pending_fences, link) 196 - inval_fence_signal(gt_to_xe(gt), fence); 197 - spin_unlock_irq(&gt->tlb_inval.pending_lock); 198 - mutex_unlock(&gt->tlb_inval.seqno_lock); 195 + &tlb_inval->pending_fences, link) 196 + xe_tlb_inval_fence_signal(fence); 197 + spin_unlock_irq(&tlb_inval->pending_lock); 198 + mutex_unlock(&tlb_inval->seqno_lock); 199 199 } 200 200 201 - /** 202 - * 203 - * xe_gt_tlb_inval_fini - Clean up GT TLB invalidation state 204 - * 205 - * Cancel pending fence workers and clean up any additional 206 - * GT TLB invalidation state. 207 - */ 208 - void xe_gt_tlb_inval_fini(struct xe_gt *gt) 201 + static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno) 209 202 { 210 - xe_gt_tlb_inval_reset(gt); 211 - } 203 + int seqno_recv = READ_ONCE(tlb_inval->seqno_recv); 212 204 213 - static bool tlb_inval_seqno_past(struct xe_gt *gt, int seqno) 214 - { 215 - int seqno_recv = READ_ONCE(gt->tlb_inval.seqno_recv); 205 + lockdep_assert_held(&tlb_inval->pending_lock); 216 206 217 207 if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) 218 208 return false; ··· 215 221 return seqno_recv >= seqno; 216 222 } 217 223 218 - static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len) 219 - { 220 - struct xe_gt *gt = guc_to_gt(guc); 221 - 222 - xe_gt_assert(gt, action[1]); /* Seqno */ 223 - 224 - /* 225 - * XXX: The seqno algorithm relies on TLB invalidation being processed 226 - * in order which they currently are, if that changes the algorithm will 227 - * need to be updated. 228 - */ 229 - 230 - xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); 231 - 232 - return xe_guc_ct_send(&guc->ct, action, len, 233 - G2H_LEN_DW_TLB_INVALIDATE, 1); 234 - } 235 - 236 224 static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence) 237 225 { 238 226 struct xe_tlb_inval *tlb_inval = fence->tlb_inval; 239 - struct xe_gt *gt = tlb_inval->private; 240 - struct xe_device *xe = gt_to_xe(gt); 241 227 242 228 fence->seqno = tlb_inval->seqno; 243 - trace_xe_tlb_inval_fence_send(xe, fence); 229 + trace_xe_tlb_inval_fence_send(tlb_inval->xe, fence); 244 230 245 231 spin_lock_irq(&tlb_inval->pending_lock); 246 232 fence->inval_time = ktime_get(); 247 233 list_add_tail(&fence->link, &tlb_inval->pending_fences); 248 234 249 235 if (list_is_singular(&tlb_inval->pending_fences)) 250 - queue_delayed_work(system_wq, 251 - &tlb_inval->fence_tdr, 252 - tlb_timeout_jiffies(gt)); 236 + queue_delayed_work(system_wq, &tlb_inval->fence_tdr, 237 + tlb_inval->ops->timeout_delay(tlb_inval)); 253 238 spin_unlock_irq(&tlb_inval->pending_lock); 254 239 255 240 tlb_inval->seqno = (tlb_inval->seqno + 1) % ··· 237 264 tlb_inval->seqno = 1; 238 265 } 239 266 240 - #define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ 241 - XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ 242 - XE_GUC_TLB_INVAL_FLUSH_CACHE) 243 - 244 - static int send_tlb_inval_ggtt(struct xe_gt *gt, int seqno) 245 - { 246 - u32 action[] = { 247 - XE_GUC_ACTION_TLB_INVALIDATION, 248 - seqno, 249 - MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), 250 - }; 251 - 252 - return send_tlb_inval(&gt->uc.guc, action, ARRAY_SIZE(action)); 253 - } 254 - 255 - static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, 256 - struct xe_tlb_inval_fence *fence) 257 - { 258 - u32 action[] = { 259 - XE_GUC_ACTION_TLB_INVALIDATION_ALL, 260 - 0, /* seqno, replaced in send_tlb_inval */ 261 - MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), 262 - }; 263 - struct xe_gt *gt = tlb_inval->private; 264 - 265 - xe_gt_assert(gt, fence); 266 - 267 - return send_tlb_inval(&gt->uc.guc, action, ARRAY_SIZE(action)); 268 - } 267 + #define xe_tlb_inval_issue(__tlb_inval, __fence, op, args...) \ 268 + ({ \ 269 + int __ret; \ 270 + \ 271 + xe_assert((__tlb_inval)->xe, (__tlb_inval)->ops); \ 272 + xe_assert((__tlb_inval)->xe, (__fence)); \ 273 + \ 274 + mutex_lock(&(__tlb_inval)->seqno_lock); \ 275 + xe_tlb_inval_fence_prep((__fence)); \ 276 + __ret = op((__tlb_inval), (__fence)->seqno, ##args); \ 277 + if (__ret < 0) \ 278 + xe_tlb_inval_fence_signal_unlocked((__fence)); \ 279 + mutex_unlock(&(__tlb_inval)->seqno_lock); \ 280 + \ 281 + __ret == -ECANCELED ? 0 : __ret; \ 282 + }) 269 283 270 284 /** 271 - * xe_gt_tlb_invalidation_all - Invalidate all TLBs across PF and all VFs. 272 - * @gt: the &xe_gt structure 273 - * @fence: the &xe_tlb_inval_fence to be signaled on completion 285 + * xe_tlb_inval_all() - Issue a TLB invalidation for all TLBs 286 + * @tlb_inval: TLB invalidation client 287 + * @fence: invalidation fence which will be signal on TLB invalidation 288 + * completion 274 289 * 275 - * Send a request to invalidate all TLBs across PF and all VFs. 290 + * Issue a TLB invalidation for all TLBs. Completion of TLB is asynchronous and 291 + * caller can use the invalidation fence to wait for completion. 276 292 * 277 293 * Return: 0 on success, negative error code on error 278 294 */ 279 295 int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, 280 296 struct xe_tlb_inval_fence *fence) 281 297 { 282 - struct xe_gt *gt = tlb_inval->private; 283 - int err; 284 - 285 - err = send_tlb_inval_all(tlb_inval, fence); 286 - if (err) 287 - xe_gt_err(gt, "TLB invalidation request failed (%pe)", ERR_PTR(err)); 288 - 289 - return err; 290 - } 291 - 292 - /* 293 - * Ensure that roundup_pow_of_two(length) doesn't overflow. 294 - * Note that roundup_pow_of_two() operates on unsigned long, 295 - * not on u64. 296 - */ 297 - #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) 298 - 299 - static int send_tlb_inval_ppgtt(struct xe_gt *gt, u64 start, u64 end, 300 - u32 asid, int seqno) 301 - { 302 - #define MAX_TLB_INVALIDATION_LEN 7 303 - u32 action[MAX_TLB_INVALIDATION_LEN]; 304 - u64 length = end - start; 305 - int len = 0; 306 - 307 - action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; 308 - action[len++] = seqno; 309 - if (!gt_to_xe(gt)->info.has_range_tlb_inval || 310 - length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { 311 - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); 312 - } else { 313 - u64 orig_start = start; 314 - u64 align; 315 - 316 - if (length < SZ_4K) 317 - length = SZ_4K; 318 - 319 - /* 320 - * We need to invalidate a higher granularity if start address 321 - * is not aligned to length. When start is not aligned with 322 - * length we need to find the length large enough to create an 323 - * address mask covering the required range. 324 - */ 325 - align = roundup_pow_of_two(length); 326 - start = ALIGN_DOWN(start, align); 327 - end = ALIGN(end, align); 328 - length = align; 329 - while (start + length < end) { 330 - length <<= 1; 331 - start = ALIGN_DOWN(orig_start, length); 332 - } 333 - 334 - /* 335 - * Minimum invalidation size for a 2MB page that the hardware 336 - * expects is 16MB 337 - */ 338 - if (length >= SZ_2M) { 339 - length = max_t(u64, SZ_16M, length); 340 - start = ALIGN_DOWN(orig_start, length); 341 - } 342 - 343 - xe_gt_assert(gt, length >= SZ_4K); 344 - xe_gt_assert(gt, is_power_of_2(length)); 345 - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, 346 - ilog2(SZ_2M) + 1))); 347 - xe_gt_assert(gt, IS_ALIGNED(start, length)); 348 - 349 - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); 350 - action[len++] = asid; 351 - action[len++] = lower_32_bits(start); 352 - action[len++] = upper_32_bits(start); 353 - action[len++] = ilog2(length) - ilog2(SZ_4K); 354 - } 355 - 356 - xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); 357 - 358 - return send_tlb_inval(&gt->uc.guc, action, len); 359 - } 360 - 361 - static int __xe_tlb_inval_ggtt(struct xe_gt *gt, 362 - struct xe_tlb_inval_fence *fence) 363 - { 364 - int ret; 365 - 366 - mutex_lock(&gt->tlb_inval.seqno_lock); 367 - xe_tlb_inval_fence_prep(fence); 368 - 369 - ret = send_tlb_inval_ggtt(gt, fence->seqno); 370 - if (ret < 0) 371 - inval_fence_signal_unlocked(gt_to_xe(gt), fence); 372 - mutex_unlock(&gt->tlb_inval.seqno_lock); 373 - 374 - /* 375 - * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches 376 - * should be nuked on a GT reset so this error can be ignored. 377 - */ 378 - if (ret == -ECANCELED) 379 - return 0; 380 - 381 - return ret; 298 + return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->all); 382 299 } 383 300 384 301 /** 385 - * xe_tlb_inval_ggtt - Issue a TLB invalidation on this GT for the GGTT 302 + * xe_tlb_inval_ggtt() - Issue a TLB invalidation for the GGTT 386 303 * @tlb_inval: TLB invalidation client 387 304 * 388 - * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is 389 - * synchronous. 305 + * Issue a TLB invalidation for the GGTT. Completion of TLB is asynchronous and 306 + * caller can use the invalidation fence to wait for completion. 390 307 * 391 308 * Return: 0 on success, negative error code on error 392 309 */ 393 310 int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval) 394 311 { 395 - struct xe_gt *gt = tlb_inval->private; 396 - struct xe_device *xe = gt_to_xe(gt); 397 - unsigned int fw_ref; 312 + struct xe_tlb_inval_fence fence, *fence_ptr = &fence; 313 + int ret; 398 314 399 - if (xe_guc_ct_enabled(&gt->uc.guc.ct) && 400 - gt->uc.guc.submission_state.enabled) { 401 - struct xe_tlb_inval_fence fence; 402 - int ret; 315 + xe_tlb_inval_fence_init(tlb_inval, fence_ptr, true); 316 + ret = xe_tlb_inval_issue(tlb_inval, fence_ptr, tlb_inval->ops->ggtt); 317 + xe_tlb_inval_fence_wait(fence_ptr); 403 318 404 - xe_tlb_inval_fence_init(tlb_inval, &fence, true); 405 - ret = __xe_tlb_inval_ggtt(gt, &fence); 406 - if (ret) 407 - return ret; 408 - 409 - xe_tlb_inval_fence_wait(&fence); 410 - } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { 411 - struct xe_mmio *mmio = &gt->mmio; 412 - 413 - if (IS_SRIOV_VF(xe)) 414 - return 0; 415 - 416 - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 417 - if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { 418 - xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, 419 - PVC_GUC_TLB_INV_DESC1_INVALIDATE); 420 - xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, 421 - PVC_GUC_TLB_INV_DESC0_VALID); 422 - } else { 423 - xe_mmio_write32(mmio, GUC_TLB_INV_CR, 424 - GUC_TLB_INV_CR_INVALIDATE); 425 - } 426 - xe_force_wake_put(gt_to_fw(gt), fw_ref); 427 - } 428 - 429 - return 0; 319 + return ret; 430 320 } 431 321 432 322 /** 433 - * xe_tlb_inval_range - Issue a TLB invalidation on this GT for an address range 323 + * xe_tlb_inval_range() - Issue a TLB invalidation for an address range 434 324 * @tlb_inval: TLB invalidation client 435 325 * @fence: invalidation fence which will be signal on TLB invalidation 436 326 * completion ··· 311 475 struct xe_tlb_inval_fence *fence, u64 start, u64 end, 312 476 u32 asid) 313 477 { 314 - struct xe_gt *gt = tlb_inval->private; 315 - struct xe_device *xe = gt_to_xe(gt); 316 - int ret; 317 - 318 - xe_gt_assert(gt, fence); 319 - 320 - /* Execlists not supported */ 321 - if (xe->info.force_execlist) { 322 - __inval_fence_signal(xe, fence); 323 - return 0; 324 - } 325 - 326 - mutex_lock(&gt->tlb_inval.seqno_lock); 327 - xe_tlb_inval_fence_prep(fence); 328 - 329 - ret = send_tlb_inval_ppgtt(gt, start, end, asid, fence->seqno); 330 - if (ret < 0) 331 - inval_fence_signal_unlocked(xe, fence); 332 - mutex_unlock(&gt->tlb_inval.seqno_lock); 333 - 334 - return ret; 478 + return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt, 479 + start, end, asid); 335 480 } 336 481 337 482 /** 338 - * xe_tlb_inval_vm - Issue a TLB invalidation on this GT for a VM 483 + * xe_tlb_inval_vm() - Issue a TLB invalidation for a VM 339 484 * @tlb_inval: TLB invalidation client 340 485 * @vm: VM to invalidate 341 486 * ··· 326 509 { 327 510 struct xe_tlb_inval_fence fence; 328 511 u64 range = 1ull << vm->xe->info.va_bits; 329 - int ret; 330 512 331 513 xe_tlb_inval_fence_init(tlb_inval, &fence, true); 332 - 333 - ret = xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid); 334 - if (ret < 0) 335 - return; 336 - 514 + xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid); 337 515 xe_tlb_inval_fence_wait(&fence); 338 516 } 339 517 340 518 /** 341 - * xe_tlb_inval_done_handler - TLB invalidation done handler 342 - * @gt: gt 519 + * xe_tlb_inval_done_handler() - TLB invalidation done handler 520 + * @tlb_inval: TLB invalidation client 343 521 * @seqno: seqno of invalidation that is done 344 522 * 345 523 * Update recv seqno, signal any TLB invalidation fences, and restart TDR 346 524 */ 347 - static void xe_tlb_inval_done_handler(struct xe_gt *gt, int seqno) 525 + void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno) 348 526 { 349 - struct xe_device *xe = gt_to_xe(gt); 527 + struct xe_device *xe = tlb_inval->xe; 350 528 struct xe_tlb_inval_fence *fence, *next; 351 529 unsigned long flags; 352 530 ··· 360 548 * officially process the CT message like if racing against 361 549 * process_g2h_msg(). 362 550 */ 363 - spin_lock_irqsave(&gt->tlb_inval.pending_lock, flags); 364 - if (tlb_inval_seqno_past(gt, seqno)) { 365 - spin_unlock_irqrestore(&gt->tlb_inval.pending_lock, flags); 551 + spin_lock_irqsave(&tlb_inval->pending_lock, flags); 552 + if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) { 553 + spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); 366 554 return; 367 555 } 368 556 369 - WRITE_ONCE(gt->tlb_inval.seqno_recv, seqno); 557 + WRITE_ONCE(tlb_inval->seqno_recv, seqno); 370 558 371 559 list_for_each_entry_safe(fence, next, 372 - &gt->tlb_inval.pending_fences, link) { 560 + &tlb_inval->pending_fences, link) { 373 561 trace_xe_tlb_inval_fence_recv(xe, fence); 374 562 375 - if (!tlb_inval_seqno_past(gt, fence->seqno)) 563 + if (!xe_tlb_inval_seqno_past(tlb_inval, fence->seqno)) 376 564 break; 377 565 378 - inval_fence_signal(xe, fence); 566 + xe_tlb_inval_fence_signal(fence); 379 567 } 380 568 381 - if (!list_empty(&gt->tlb_inval.pending_fences)) 569 + if (!list_empty(&tlb_inval->pending_fences)) 382 570 mod_delayed_work(system_wq, 383 - &gt->tlb_inval.fence_tdr, 384 - tlb_timeout_jiffies(gt)); 571 + &tlb_inval->fence_tdr, 572 + tlb_inval->ops->timeout_delay(tlb_inval)); 385 573 else 386 - cancel_delayed_work(&gt->tlb_inval.fence_tdr); 574 + cancel_delayed_work(&tlb_inval->fence_tdr); 387 575 388 - spin_unlock_irqrestore(&gt->tlb_inval.pending_lock, flags); 389 - } 390 - 391 - /** 392 - * xe_guc_tlb_inval_done_handler - TLB invalidation done handler 393 - * @guc: guc 394 - * @msg: message indicating TLB invalidation done 395 - * @len: length of message 396 - * 397 - * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any 398 - * invalidation fences for seqno. Algorithm for this depends on seqno being 399 - * received in-order and asserts this assumption. 400 - * 401 - * Return: 0 on success, -EPROTO for malformed messages. 402 - */ 403 - int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 404 - { 405 - struct xe_gt *gt = guc_to_gt(guc); 406 - 407 - if (unlikely(len != 1)) 408 - return -EPROTO; 409 - 410 - xe_tlb_inval_done_handler(gt, msg[0]); 411 - 412 - return 0; 576 + spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); 413 577 } 414 578 415 579 static const char * 416 - inval_fence_get_driver_name(struct dma_fence *dma_fence) 580 + xe_inval_fence_get_driver_name(struct dma_fence *dma_fence) 417 581 { 418 582 return "xe"; 419 583 } 420 584 421 585 static const char * 422 - inval_fence_get_timeline_name(struct dma_fence *dma_fence) 586 + xe_inval_fence_get_timeline_name(struct dma_fence *dma_fence) 423 587 { 424 - return "inval_fence"; 588 + return "tlb_inval_fence"; 425 589 } 426 590 427 591 static const struct dma_fence_ops inval_fence_ops = { 428 - .get_driver_name = inval_fence_get_driver_name, 429 - .get_timeline_name = inval_fence_get_timeline_name, 592 + .get_driver_name = xe_inval_fence_get_driver_name, 593 + .get_timeline_name = xe_inval_fence_get_timeline_name, 430 594 }; 431 595 432 596 /** 433 - * xe_tlb_inval_fence_init - Initialize TLB invalidation fence 597 + * xe_tlb_inval_fence_init() - Initialize TLB invalidation fence 434 598 * @tlb_inval: TLB invalidation client 435 599 * @fence: TLB invalidation fence to initialize 436 600 * @stack: fence is stack variable ··· 419 631 struct xe_tlb_inval_fence *fence, 420 632 bool stack) 421 633 { 422 - struct xe_gt *gt = tlb_inval->private; 634 + xe_pm_runtime_get_noresume(tlb_inval->xe); 423 635 424 - xe_pm_runtime_get_noresume(gt_to_xe(gt)); 425 - 426 - spin_lock_irq(&gt->tlb_inval.lock); 427 - dma_fence_init(&fence->base, &inval_fence_ops, 428 - &gt->tlb_inval.lock, 636 + spin_lock_irq(&tlb_inval->lock); 637 + dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock, 429 638 dma_fence_context_alloc(1), 1); 430 - spin_unlock_irq(&gt->tlb_inval.lock); 639 + spin_unlock_irq(&tlb_inval->lock); 431 640 INIT_LIST_HEAD(&fence->link); 432 641 if (stack) 433 642 set_bit(FENCE_STACK_BIT, &fence->base.flags);

+10 -5

drivers/gpu/drm/xe/xe_tlb_inval.h

··· 15 15 struct xe_vm; 16 16 17 17 int xe_gt_tlb_inval_init_early(struct xe_gt *gt); 18 - void xe_gt_tlb_inval_fini(struct xe_gt *gt); 19 18 20 19 void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval); 21 - int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval); 22 - void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm); 23 20 int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, 24 21 struct xe_tlb_inval_fence *fence); 22 + int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval); 23 + void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm); 25 24 int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, 26 25 struct xe_tlb_inval_fence *fence, 27 26 u64 start, u64 end, u32 asid); 28 - int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len); 29 27 30 28 void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, 31 29 struct xe_tlb_inval_fence *fence, 32 30 bool stack); 33 - void xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence); 34 31 32 + /** 33 + * xe_tlb_inval_fence_wait() - TLB invalidiation fence wait 34 + * @fence: TLB invalidation fence to wait on 35 + * 36 + * Wait on a TLB invalidiation fence until it signals, non interruptable 37 + */ 35 38 static inline void 36 39 xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence) 37 40 { 38 41 dma_fence_wait(&fence->base, false); 39 42 } 43 + 44 + void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno); 40 45 41 46 #endif /* _XE_TLB_INVAL_ */

+66 -1

drivers/gpu/drm/xe/xe_tlb_inval_types.h

··· 9 9 #include <linux/workqueue.h> 10 10 #include <linux/dma-fence.h> 11 11 12 - /** struct xe_tlb_inval - TLB invalidation client */ 12 + struct xe_tlb_inval; 13 + 14 + /** struct xe_tlb_inval_ops - TLB invalidation ops (backend) */ 15 + struct xe_tlb_inval_ops { 16 + /** 17 + * @all: Invalidate all TLBs 18 + * @tlb_inval: TLB invalidation client 19 + * @seqno: Seqno of TLB invalidation 20 + * 21 + * Return 0 on success, -ECANCELED if backend is mid-reset, error on 22 + * failure 23 + */ 24 + int (*all)(struct xe_tlb_inval *tlb_inval, u32 seqno); 25 + 26 + /** 27 + * @ggtt: Invalidate global translation TLBs 28 + * @tlb_inval: TLB invalidation client 29 + * @seqno: Seqno of TLB invalidation 30 + * 31 + * Return 0 on success, -ECANCELED if backend is mid-reset, error on 32 + * failure 33 + */ 34 + int (*ggtt)(struct xe_tlb_inval *tlb_inval, u32 seqno); 35 + 36 + /** 37 + * @ppgtt: Invalidate per-process translation TLBs 38 + * @tlb_inval: TLB invalidation client 39 + * @seqno: Seqno of TLB invalidation 40 + * @start: Start address 41 + * @end: End address 42 + * @asid: Address space ID 43 + * 44 + * Return 0 on success, -ECANCELED if backend is mid-reset, error on 45 + * failure 46 + */ 47 + int (*ppgtt)(struct xe_tlb_inval *tlb_inval, u32 seqno, u64 start, 48 + u64 end, u32 asid); 49 + 50 + /** 51 + * @initialized: Backend is initialized 52 + * @tlb_inval: TLB invalidation client 53 + * 54 + * Return: True if back is initialized, False otherwise 55 + */ 56 + bool (*initialized)(struct xe_tlb_inval *tlb_inval); 57 + 58 + /** 59 + * @flush: Flush pending TLB invalidations 60 + * @tlb_inval: TLB invalidation client 61 + */ 62 + void (*flush)(struct xe_tlb_inval *tlb_inval); 63 + 64 + /** 65 + * @timeout_delay: Timeout delay for TLB invalidation 66 + * @tlb_inval: TLB invalidation client 67 + * 68 + * Return: Timeout delay for TLB invalidation in jiffies 69 + */ 70 + long (*timeout_delay)(struct xe_tlb_inval *tlb_inval); 71 + }; 72 + 73 + /** struct xe_tlb_inval - TLB invalidation client (frontend) */ 13 74 struct xe_tlb_inval { 14 75 /** @private: Backend private pointer */ 15 76 void *private; 77 + /** @xe: Pointer to Xe device */ 78 + struct xe_device *xe; 79 + /** @ops: TLB invalidation ops */ 80 + const struct xe_tlb_inval_ops *ops; 16 81 /** @tlb_inval.seqno: TLB invalidation seqno, protected by CT lock */ 17 82 #define TLB_INVALIDATION_SEQNO_MAX 0x100000 18 83 int seqno;