drm/xe: Drop xe_gt_tlb_invalidation_wait

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Having two methods to wait on GT TLB invalidations is not ideal. Remove
xe_gt_tlb_invalidation_wait and only use GT TLB invalidation fences.

In addition to two methods being less than ideal, once GT TLB
invalidations are coalesced the seqno cannot be assigned during
xe_gt_tlb_invalidation_ggtt/range. Thus xe_gt_tlb_invalidation_wait
would not have a seqno to wait one. A fence however can be armed and
later signaled.

v3:
- Add explaination about coalescing to commit message
v4:
- Don't put dma fence if defined on stack (CI)
v5:
- Initialize ret to zero (CI)
v6:
- Use invalidation_fence_signal helper in tlb timeout (Matthew Auld)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240719172905.1527927-3-matthew.brost@intel.com
(cherry picked from commit 61ac035361ae555ee5a17a7667fe96afdde3d59a)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

authored by

Matthew Brost and committed by

Rodrigo Vivi 2 years ago 58bfe667 90be4cc6

+80 -110

4 changed files

expand all

drivers

gpu

drm

xe_gt_tlb_invalidation.c

xe_gt_tlb_invalidation.h

xe_pt.c

xe_vm.c

+55 -93

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c

··· 17 17 #include "xe_trace.h" 18 18 #include "regs/xe_guc_regs.h" 19 19 20 + #define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS 21 + 20 22 /* 21 23 * TLB inval depends on pending commands in the CT queue and then the real 22 24 * invalidation time. Double up the time to process full CT queue ··· 35 33 return hw_tlb_timeout + 2 * delay; 36 34 } 37 35 36 + static void 37 + __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) 38 + { 39 + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); 40 + 41 + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); 42 + dma_fence_signal(&fence->base); 43 + if (!stack) 44 + dma_fence_put(&fence->base); 45 + } 46 + 47 + static void 48 + invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) 49 + { 50 + list_del(&fence->link); 51 + __invalidation_fence_signal(xe, fence); 52 + } 38 53 39 54 static void xe_gt_tlb_fence_timeout(struct work_struct *work) 40 55 { ··· 73 54 xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", 74 55 fence->seqno, gt->tlb_invalidation.seqno_recv); 75 56 76 - list_del(&fence->link); 77 57 fence->base.error = -ETIME; 78 - dma_fence_signal(&fence->base); 79 - dma_fence_put(&fence->base); 58 + invalidation_fence_signal(xe, fence); 80 59 } 81 60 if (!list_empty(&gt->tlb_invalidation.pending_fences)) 82 61 queue_delayed_work(system_wq, ··· 104 87 return 0; 105 88 } 106 89 107 - static void 108 - __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) 109 - { 110 - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); 111 - dma_fence_signal(&fence->base); 112 - dma_fence_put(&fence->base); 113 - } 114 - 115 - static void 116 - invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) 117 - { 118 - list_del(&fence->link); 119 - __invalidation_fence_signal(xe, fence); 120 - } 121 - 122 90 /** 123 91 * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset 124 92 * @gt: graphics tile ··· 113 111 void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) 114 112 { 115 113 struct xe_gt_tlb_invalidation_fence *fence, *next; 116 - struct xe_guc *guc = &gt->uc.guc; 117 114 int pending_seqno; 118 115 119 116 /* ··· 135 134 else 136 135 pending_seqno = gt->tlb_invalidation.seqno - 1; 137 136 WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); 138 - wake_up_all(&guc->ct.wq); 139 137 140 138 list_for_each_entry_safe(fence, next, 141 139 &gt->tlb_invalidation.pending_fences, link) ··· 165 165 int seqno; 166 166 int ret; 167 167 168 + xe_gt_assert(gt, fence); 169 + 168 170 /* 169 171 * XXX: The seqno algorithm relies on TLB invalidation being processed 170 172 * in order which they currently are, if that changes the algorithm will ··· 175 173 176 174 mutex_lock(&guc->ct.lock); 177 175 seqno = gt->tlb_invalidation.seqno; 178 - if (fence) { 179 - fence->seqno = seqno; 180 - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); 181 - } 176 + fence->seqno = seqno; 177 + trace_xe_gt_tlb_invalidation_fence_send(xe, fence); 182 178 action[1] = seqno; 183 179 ret = xe_guc_ct_send_locked(&guc->ct, action, len, 184 180 G2H_LEN_DW_TLB_INVALIDATE, 1); ··· 209 209 TLB_INVALIDATION_SEQNO_MAX; 210 210 if (!gt->tlb_invalidation.seqno) 211 211 gt->tlb_invalidation.seqno = 1; 212 - ret = seqno; 213 212 } 214 213 mutex_unlock(&guc->ct.lock); 215 214 ··· 222 223 /** 223 224 * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC 224 225 * @gt: graphics tile 226 + * @fence: invalidation fence which will be signal on TLB invalidation 227 + * completion 225 228 * 226 229 * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and 227 - * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion. 230 + * caller can use the invalidation fence to wait for completion. 228 231 * 229 - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, 230 - * negative error code on error. 232 + * Return: 0 on success, negative error code on error 231 233 */ 232 - static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) 234 + static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, 235 + struct xe_gt_tlb_invalidation_fence *fence) 233 236 { 234 237 u32 action[] = { 235 238 XE_GUC_ACTION_TLB_INVALIDATION, ··· 239 238 MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), 240 239 }; 241 240 242 - return send_tlb_invalidation(&gt->uc.guc, NULL, action, 241 + return send_tlb_invalidation(&gt->uc.guc, fence, action, 243 242 ARRAY_SIZE(action)); 244 243 } 245 244 ··· 258 257 259 258 if (xe_guc_ct_enabled(&gt->uc.guc.ct) && 260 259 gt->uc.guc.submission_state.enabled) { 261 - int seqno; 260 + struct xe_gt_tlb_invalidation_fence fence; 261 + int ret; 262 262 263 - seqno = xe_gt_tlb_invalidation_guc(gt); 264 - if (seqno <= 0) 265 - return seqno; 263 + xe_gt_tlb_invalidation_fence_init(gt, &fence, true); 264 + ret = xe_gt_tlb_invalidation_guc(gt, &fence); 265 + if (ret < 0) 266 + return ret; 266 267 267 - xe_gt_tlb_invalidation_wait(gt, seqno); 268 + xe_gt_tlb_invalidation_fence_wait(&fence); 268 269 } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { 269 270 if (IS_SRIOV_VF(xe)) 270 271 return 0; ··· 293 290 * 294 291 * @gt: graphics tile 295 292 * @fence: invalidation fence which will be signal on TLB invalidation 296 - * completion, can be NULL 293 + * completion 297 294 * @start: start address 298 295 * @end: end address 299 296 * @asid: address space id 300 297 * 301 298 * Issue a range based TLB invalidation if supported, if not fallback to a full 302 - * TLB invalidation. Completion of TLB is asynchronous and caller can either use 303 - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for 304 - * completion. 299 + * TLB invalidation. Completion of TLB is asynchronous and caller can use 300 + * the invalidation fence to wait for completion. 305 301 * 306 - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, 307 - * negative error code on error. 302 + * Return: Negative error code on error, 0 on success 308 303 */ 309 304 int xe_gt_tlb_invalidation_range(struct xe_gt *gt, 310 305 struct xe_gt_tlb_invalidation_fence *fence, ··· 313 312 u32 action[MAX_TLB_INVALIDATION_LEN]; 314 313 int len = 0; 315 314 315 + xe_gt_assert(gt, fence); 316 + 316 317 /* Execlists not supported */ 317 318 if (gt_to_xe(gt)->info.force_execlist) { 318 - if (fence) 319 - __invalidation_fence_signal(xe, fence); 320 - 319 + __invalidation_fence_signal(xe, fence); 321 320 return 0; 322 321 } 323 322 ··· 383 382 * @vma: VMA to invalidate 384 383 * 385 384 * Issue a range based TLB invalidation if supported, if not fallback to a full 386 - * TLB invalidation. Completion of TLB is asynchronous and caller can either use 387 - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for 388 - * completion. 385 + * TLB invalidation. Completion of TLB is asynchronous and caller can use 386 + * the invalidation fence to wait for completion. 389 387 * 390 - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, 391 - * negative error code on error. 388 + * Return: Negative error code on error, 0 on success 392 389 */ 393 390 int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, 394 391 struct xe_gt_tlb_invalidation_fence *fence, ··· 397 398 return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma), 398 399 xe_vma_end(vma), 399 400 xe_vma_vm(vma)->usm.asid); 400 - } 401 - 402 - /** 403 - * xe_gt_tlb_invalidation_wait - Wait for TLB to complete 404 - * @gt: graphics tile 405 - * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation 406 - * 407 - * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete. 408 - * 409 - * Return: 0 on success, -ETIME on TLB invalidation timeout 410 - */ 411 - int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) 412 - { 413 - struct xe_guc *guc = &gt->uc.guc; 414 - int ret; 415 - 416 - /* Execlists not supported */ 417 - if (gt_to_xe(gt)->info.force_execlist) 418 - return 0; 419 - 420 - /* 421 - * XXX: See above, this algorithm only works if seqno are always in 422 - * order 423 - */ 424 - ret = wait_event_timeout(guc->ct.wq, 425 - tlb_invalidation_seqno_past(gt, seqno), 426 - tlb_timeout_jiffies(gt)); 427 - if (!ret) { 428 - struct drm_printer p = xe_gt_err_printer(gt); 429 - 430 - xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n", 431 - seqno, gt->tlb_invalidation.seqno_recv); 432 - xe_guc_ct_print(&guc->ct, &p, true); 433 - return -ETIME; 434 - } 435 - 436 - return 0; 437 401 } 438 402 439 403 /** ··· 442 480 return 0; 443 481 } 444 482 445 - /* 446 - * wake_up_all() and wait_event_timeout() already have the correct 447 - * barriers. 448 - */ 449 483 WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); 450 - wake_up_all(&guc->ct.wq); 451 484 452 485 list_for_each_entry_safe(fence, next, 453 486 &gt->tlb_invalidation.pending_fences, link) { ··· 487 530 * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence 488 531 * @gt: GT 489 532 * @fence: TLB invalidation fence to initialize 533 + * @stack: fence is stack variable 490 534 * 491 535 * Initialize TLB invalidation fence for use 492 536 */ 493 537 void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, 494 - struct xe_gt_tlb_invalidation_fence *fence) 538 + struct xe_gt_tlb_invalidation_fence *fence, 539 + bool stack) 495 540 { 496 541 spin_lock_irq(&gt->tlb_invalidation.lock); 497 542 dma_fence_init(&fence->base, &invalidation_fence_ops, ··· 501 542 dma_fence_context_alloc(1), 1); 502 543 spin_unlock_irq(&gt->tlb_invalidation.lock); 503 544 INIT_LIST_HEAD(&fence->link); 504 - dma_fence_get(&fence->base); 545 + if (stack) 546 + set_bit(FENCE_STACK_BIT, &fence->base.flags); 547 + else 548 + dma_fence_get(&fence->base); 505 549 }

+8 -2

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h

··· 23 23 int xe_gt_tlb_invalidation_range(struct xe_gt *gt, 24 24 struct xe_gt_tlb_invalidation_fence *fence, 25 25 u64 start, u64 end, u32 asid); 26 - int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); 27 26 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); 28 27 29 28 void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, 30 - struct xe_gt_tlb_invalidation_fence *fence); 29 + struct xe_gt_tlb_invalidation_fence *fence, 30 + bool stack); 31 + 32 + static inline void 33 + xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) 34 + { 35 + dma_fence_wait(&fence->base, false); 36 + } 31 37 32 38 #endif /* _XE_GT_TLB_INVALIDATION_ */

+1 -1

drivers/gpu/drm/xe/xe_pt.c

··· 1153 1153 1154 1154 trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); 1155 1155 1156 - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base); 1156 + xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); 1157 1157 1158 1158 ifence->fence = fence; 1159 1159 ifence->gt = gt;

+16 -14

drivers/gpu/drm/xe/xe_vm.c

··· 3341 3341 { 3342 3342 struct xe_device *xe = xe_vma_vm(vma)->xe; 3343 3343 struct xe_tile *tile; 3344 + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; 3344 3345 u32 tile_needs_invalidate = 0; 3345 - int seqno[XE_MAX_TILES_PER_DEVICE]; 3346 3346 u8 id; 3347 - int ret; 3347 + int ret = 0; 3348 3348 3349 3349 xe_assert(xe, !xe_vma_is_null(vma)); 3350 3350 trace_xe_vma_invalidate(vma); ··· 3369 3369 3370 3370 for_each_tile(tile, xe, id) { 3371 3371 if (xe_pt_zap_ptes(tile, vma)) { 3372 - tile_needs_invalidate |= BIT(id); 3373 3372 xe_device_wmb(xe); 3373 + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 3374 + &fence[id], true); 3375 + 3374 3376 /* 3375 3377 * FIXME: We potentially need to invalidate multiple 3376 3378 * GTs within the tile 3377 3379 */ 3378 - seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); 3379 - if (seqno[id] < 0) 3380 - return seqno[id]; 3380 + ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, 3381 + &fence[id], vma); 3382 + if (ret < 0) 3383 + goto wait; 3384 + 3385 + tile_needs_invalidate |= BIT(id); 3381 3386 } 3382 3387 } 3383 3388 3384 - for_each_tile(tile, xe, id) { 3385 - if (tile_needs_invalidate & BIT(id)) { 3386 - ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]); 3387 - if (ret < 0) 3388 - return ret; 3389 - } 3390 - } 3389 + wait: 3390 + for_each_tile(tile, xe, id) 3391 + if (tile_needs_invalidate & BIT(id)) 3392 + xe_gt_tlb_invalidation_fence_wait(&fence[id]); 3391 3393 3392 3394 vma->tile_invalidated = vma->tile_mask; 3393 3395 3394 - return 0; 3396 + return ret; 3395 3397 } 3396 3398 3397 3399 struct xe_vm_snapshot {