drm/xe: Decouple job seqno and lrc seqno

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Tightly coupling these seqno presents problems if alternative fences for
jobs are used. Decouple these for correctness.

v2:
- Slightly reword commit message (Thomas)
- Make sure the lrc fence ops are used in comparison (Thomas)
- Assume seqno is unsigned rather than signed in format string (Thomas)

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527135912.152156-2-thomas.hellstrom@linux.intel.com

authored by

Matthew Brost and committed by

Thomas Hellström 2 years ago 08f72008 d79e8cab

+30 -19

7 changed files

expand all

drivers

gpu

drm

xe_exec_queue.c

xe_guc_submit.c

xe_ring_ops.c

xe_sched_job.c

xe_sched_job.h

xe_sched_job_types.h

xe_trace.h

+1 -1

drivers/gpu/drm/xe/xe_exec_queue.c

··· 98 98 99 99 if (xe_exec_queue_is_parallel(q)) { 100 100 q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); 101 - q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; 101 + q->parallel.composite_fence_seqno = 0; 102 102 } 103 103 104 104 return q;

+3 -2

drivers/gpu/drm/xe/xe_guc_submit.c

··· 940 940 return DRM_GPU_SCHED_STAT_NOMINAL; 941 941 } 942 942 943 - drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", 944 - xe_sched_job_seqno(job), q->guc->id, q->flags); 943 + drm_notice(&xe->drm, "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 944 + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 945 + q->guc->id, q->flags); 945 946 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 946 947 "Kernel-submitted job timed out\n"); 947 948 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),

+6 -6

drivers/gpu/drm/xe/xe_ring_ops.c

··· 398 398 399 399 __emit_job_gen12_simple(job, job->q->lrc, 400 400 job->batch_addr[0], 401 - xe_sched_job_seqno(job)); 401 + xe_sched_job_lrc_seqno(job)); 402 402 } 403 403 404 404 static void emit_job_gen12_copy(struct xe_sched_job *job) ··· 407 407 408 408 if (xe_sched_job_is_migration(job->q)) { 409 409 emit_migration_job_gen12(job, job->q->lrc, 410 - xe_sched_job_seqno(job)); 410 + xe_sched_job_lrc_seqno(job)); 411 411 return; 412 412 } 413 413 414 414 for (i = 0; i < job->q->width; ++i) 415 415 __emit_job_gen12_simple(job, job->q->lrc + i, 416 - job->batch_addr[i], 417 - xe_sched_job_seqno(job)); 416 + job->batch_addr[i], 417 + xe_sched_job_lrc_seqno(job)); 418 418 } 419 419 420 420 static void emit_job_gen12_video(struct xe_sched_job *job) ··· 425 425 for (i = 0; i < job->q->width; ++i) 426 426 __emit_job_gen12_video(job, job->q->lrc + i, 427 427 job->batch_addr[i], 428 - xe_sched_job_seqno(job)); 428 + xe_sched_job_lrc_seqno(job)); 429 429 } 430 430 431 431 static void emit_job_gen12_render_compute(struct xe_sched_job *job) ··· 435 435 for (i = 0; i < job->q->width; ++i) 436 436 __emit_job_gen12_render_compute(job, job->q->lrc + i, 437 437 job->batch_addr[i], 438 - xe_sched_job_seqno(job)); 438 + xe_sched_job_lrc_seqno(job)); 439 439 } 440 440 441 441 static const struct xe_ring_ops ring_ops_gen12_gsc = {

+8 -8

drivers/gpu/drm/xe/xe_sched_job.c

··· 117 117 err = PTR_ERR(job->fence); 118 118 goto err_sched_job; 119 119 } 120 + job->lrc_seqno = job->fence->seqno; 120 121 } else { 121 122 struct dma_fence_array *cf; 122 123 ··· 133 132 err = PTR_ERR(fences[j]); 134 133 goto err_fences; 135 134 } 135 + if (!j) 136 + job->lrc_seqno = fences[0]->seqno; 136 137 } 137 138 138 139 cf = dma_fence_array_create(q->width, fences, ··· 146 143 err = -ENOMEM; 147 144 goto err_fences; 148 145 } 149 - 150 - /* Sanity check */ 151 - for (j = 0; j < q->width; ++j) 152 - xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno); 153 146 154 147 job->fence = &cf->base; 155 148 } ··· 228 229 { 229 230 struct xe_lrc *lrc = job->q->lrc; 230 231 231 - return !__dma_fence_is_later(xe_sched_job_seqno(job), 232 + return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job), 232 233 xe_lrc_start_seqno(lrc), 233 - job->fence->ops); 234 + dma_fence_array_first(job->fence)->ops); 234 235 } 235 236 236 237 bool xe_sched_job_completed(struct xe_sched_job *job) ··· 242 243 * parallel handshake is done. 243 244 */ 244 245 245 - return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc), 246 - job->fence->ops); 246 + return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job), 247 + xe_lrc_seqno(lrc), 248 + dma_fence_array_first(job->fence)->ops); 247 249 } 248 250 249 251 void xe_sched_job_arm(struct xe_sched_job *job)

drivers/gpu/drm/xe/xe_sched_job.h

··· 73 73 return job->fence->seqno; 74 74 } 75 75 76 + static inline u32 xe_sched_job_lrc_seqno(struct xe_sched_job *job) 77 + { 78 + return job->lrc_seqno; 79 + } 80 + 76 81 static inline void 77 82 xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags) 78 83 {

drivers/gpu/drm/xe/xe_sched_job_types.h

··· 37 37 /** @user_fence.value: write back value */ 38 38 u64 value; 39 39 } user_fence; 40 + /** @lrc_seqno: LRC seqno */ 41 + u32 lrc_seqno; 40 42 /** @migrate_flush_flags: Additional flush flags for migration jobs */ 41 43 u32 migrate_flush_flags; 42 44 /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */

+5 -2

drivers/gpu/drm/xe/xe_trace.h

··· 254 254 255 255 TP_STRUCT__entry( 256 256 __field(u32, seqno) 257 + __field(u32, lrc_seqno) 257 258 __field(u16, guc_id) 258 259 __field(u32, guc_state) 259 260 __field(u32, flags) ··· 265 264 266 265 TP_fast_assign( 267 266 __entry->seqno = xe_sched_job_seqno(job); 267 + __entry->lrc_seqno = xe_sched_job_lrc_seqno(job); 268 268 __entry->guc_id = job->q->guc->id; 269 269 __entry->guc_state = 270 270 atomic_read(&job->q->guc->state); ··· 275 273 __entry->batch_addr = (u64)job->batch_addr[0]; 276 274 ), 277 275 278 - TP_printk("fence=%p, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", 279 - __entry->fence, __entry->seqno, __entry->guc_id, 276 + TP_printk("fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", 277 + __entry->fence, __entry->seqno, 278 + __entry->lrc_seqno, __entry->guc_id, 280 279 __entry->batch_addr, __entry->guc_state, 281 280 __entry->flags, __entry->error) 282 281 );