drm/xe: standardize vm-less kernel submissions

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

The current only submission in the driver that doesn't use a vm is the
WA setup. We still pass a vm structure (the migration one), but we don't
actually use it at submission time and we instead have an hack to use
GGTT for this particular engine.
Instead of special-casing the WA engine, we can skip providing a VM and
use that as selector for whether to use GGTT or PPGTT. As part of this
change, we can drop the special engine flag for the WA engine and switch
the WA submission to use the standard job functions instead of dedicated
ones.

v2: rebased on s/engine/exec_queue

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20230822173334.1664332-4-daniele.ceraolospurio@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

authored by

Daniele Ceraolo Spurio and committed by

Rodrigo Vivi 2 years ago 9e952635 923e4238

+12 -37

7 changed files

expand all

drivers

gpu

drm

xe_bb.c

xe_bb.h

xe_exec_queue.c

xe_exec_queue_types.h

xe_gt.c

xe_ring_ops.c

xe_sched_job.c

-10

drivers/gpu/drm/xe/xe_bb.c

··· 73 73 return xe_sched_job_create(q, addr); 74 74 } 75 75 76 - struct xe_sched_job *xe_bb_create_wa_job(struct xe_exec_queue *q, 77 - struct xe_bb *bb, u64 batch_base_ofs) 78 - { 79 - u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo); 80 - 81 - XE_WARN_ON(!(q->vm->flags & XE_VM_FLAG_MIGRATION)); 82 - 83 - return __xe_bb_create_job(q, bb, &addr); 84 - } 85 - 86 76 struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, 87 77 struct xe_bb *bb, 88 78 u64 batch_base_ofs,

-2

drivers/gpu/drm/xe/xe_bb.h

··· 20 20 struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, 21 21 struct xe_bb *bb, u64 batch_ofs, 22 22 u32 second_idx); 23 - struct xe_sched_job *xe_bb_create_wa_job(struct xe_exec_queue *q, 24 - struct xe_bb *bb, u64 batch_ofs); 25 23 void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence); 26 24 27 25 #endif

+2 -2

drivers/gpu/drm/xe/xe_exec_queue.c

··· 95 95 * can perform GuC CT actions when needed. Caller is expected to 96 96 * have already grabbed the rpm ref outside any sensitive locks. 97 97 */ 98 - if (q->flags & EXEC_QUEUE_FLAG_VM) 98 + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM)) 99 99 drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); 100 100 101 101 return q; ··· 174 174 xe_lrc_finish(q->lrc + i); 175 175 if (q->vm) 176 176 xe_vm_put(q->vm); 177 - if (q->flags & EXEC_QUEUE_FLAG_VM) 177 + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM)) 178 178 xe_device_mem_access_put(gt_to_xe(q->gt)); 179 179 180 180 kfree(q);

-2

drivers/gpu/drm/xe/xe_exec_queue_types.h

··· 79 79 #define EXEC_QUEUE_FLAG_VM BIT(5) 80 80 /* child of VM queue for multi-tile VM jobs */ 81 81 #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(6) 82 - /* queue used for WA setup */ 83 - #define EXEC_QUEUE_FLAG_WA BIT(7) 84 82 85 83 /** 86 84 * @flags: flags for this exec queue, should statically setup aside from ban

+7 -16

drivers/gpu/drm/xe/xe_gt.c

··· 87 87 struct xe_sched_job *job; 88 88 struct xe_bb *bb; 89 89 struct dma_fence *fence; 90 - u64 batch_ofs; 91 90 long timeout; 92 91 93 92 bb = xe_bb_new(gt, 4, false); 94 93 if (IS_ERR(bb)) 95 94 return PTR_ERR(bb); 96 95 97 - batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo); 98 - job = xe_bb_create_wa_job(q, bb, batch_ofs); 96 + job = xe_bb_create_job(q, bb); 99 97 if (IS_ERR(job)) { 100 98 xe_bb_free(bb, NULL); 101 99 return PTR_ERR(job); ··· 122 124 struct xe_sched_job *job; 123 125 struct xe_bb *bb; 124 126 struct dma_fence *fence; 125 - u64 batch_ofs; 126 127 long timeout; 127 128 int count = 0; 128 129 ··· 140 143 } 141 144 } 142 145 143 - batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo); 144 - job = xe_bb_create_wa_job(q, bb, batch_ofs); 146 + job = xe_bb_create_job(q, bb); 145 147 if (IS_ERR(job)) { 146 148 xe_bb_free(bb, NULL); 147 149 return PTR_ERR(job); ··· 164 168 int xe_gt_record_default_lrcs(struct xe_gt *gt) 165 169 { 166 170 struct xe_device *xe = gt_to_xe(gt); 167 - struct xe_tile *tile = gt_to_tile(gt); 168 171 struct xe_hw_engine *hwe; 169 172 enum xe_hw_engine_id id; 170 173 int err = 0; 171 174 172 175 for_each_hw_engine(hwe, gt, id) { 173 176 struct xe_exec_queue *q, *nop_q; 174 - struct xe_vm *vm; 175 177 void *default_lrc; 176 178 177 179 if (gt->default_lrc[hwe->class]) ··· 186 192 if (!default_lrc) 187 193 return -ENOMEM; 188 194 189 - vm = xe_migrate_get_vm(tile->migrate); 190 - q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, 191 - hwe, EXEC_QUEUE_FLAG_WA); 195 + q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, 196 + hwe, EXEC_QUEUE_FLAG_KERNEL); 192 197 if (IS_ERR(q)) { 193 198 err = PTR_ERR(q); 194 199 xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n", 195 200 hwe->name, q); 196 - goto put_vm; 201 + return err; 197 202 } 198 203 199 204 /* Prime golden LRC with known good state */ ··· 203 210 goto put_exec_queue; 204 211 } 205 212 206 - nop_q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 207 - 1, hwe, EXEC_QUEUE_FLAG_WA); 213 + nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 214 + 1, hwe, EXEC_QUEUE_FLAG_KERNEL); 208 215 if (IS_ERR(nop_q)) { 209 216 err = PTR_ERR(nop_q); 210 217 xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n", ··· 238 245 xe_exec_queue_put(nop_q); 239 246 put_exec_queue: 240 247 xe_exec_queue_put(q); 241 - put_vm: 242 - xe_vm_put(vm); 243 248 if (err) 244 249 break; 245 250 }

+1 -1

drivers/gpu/drm/xe/xe_ring_ops.c

··· 202 202 203 203 static u32 get_ppgtt_flag(struct xe_sched_job *job) 204 204 { 205 - return !(job->q->flags & EXEC_QUEUE_FLAG_WA) ? BIT(8) : 0; 205 + return job->q->vm ? BIT(8) : 0; 206 206 } 207 207 208 208 /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */

+2 -4

drivers/gpu/drm/xe/xe_sched_job.c

··· 59 59 60 60 bool xe_sched_job_is_migration(struct xe_exec_queue *q) 61 61 { 62 - return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION) && 63 - !(q->flags & EXEC_QUEUE_FLAG_WA); 62 + return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION); 64 63 } 65 64 66 65 static void job_free(struct xe_sched_job *job) ··· 90 91 XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); 91 92 92 93 /* Migration and kernel engines have their own locking */ 93 - if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM | 94 - EXEC_QUEUE_FLAG_WA))) { 94 + if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 95 95 lockdep_assert_held(&q->vm->lock); 96 96 if (!xe_vm_no_dma_fences(q->vm)) 97 97 xe_vm_assert_held(q->vm);