Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_sched_job.h"
7
8#include <uapi/drm/xe_drm.h>
9#include <linux/dma-fence-chain.h>
10#include <linux/slab.h>
11
12#include "xe_device.h"
13#include "xe_exec_queue.h"
14#include "xe_gt.h"
15#include "xe_hw_engine_types.h"
16#include "xe_hw_fence.h"
17#include "xe_lrc.h"
18#include "xe_macros.h"
19#include "xe_pm.h"
20#include "xe_sync_types.h"
21#include "xe_trace.h"
22#include "xe_vm.h"
23
24static struct kmem_cache *xe_sched_job_slab;
25static struct kmem_cache *xe_sched_job_parallel_slab;
26
27int __init xe_sched_job_module_init(void)
28{
29 xe_sched_job_slab =
30 kmem_cache_create("xe_sched_job",
31 sizeof(struct xe_sched_job) +
32 sizeof(struct xe_job_ptrs), 0,
33 SLAB_HWCACHE_ALIGN, NULL);
34 if (!xe_sched_job_slab)
35 return -ENOMEM;
36
37 xe_sched_job_parallel_slab =
38 kmem_cache_create("xe_sched_job_parallel",
39 sizeof(struct xe_sched_job) +
40 sizeof(struct xe_job_ptrs) *
41 XE_HW_ENGINE_MAX_INSTANCE, 0,
42 SLAB_HWCACHE_ALIGN, NULL);
43 if (!xe_sched_job_parallel_slab) {
44 kmem_cache_destroy(xe_sched_job_slab);
45 return -ENOMEM;
46 }
47
48 return 0;
49}
50
51void xe_sched_job_module_exit(void)
52{
53 kmem_cache_destroy(xe_sched_job_slab);
54 kmem_cache_destroy(xe_sched_job_parallel_slab);
55}
56
57static struct xe_sched_job *job_alloc(bool parallel)
58{
59 return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
60 xe_sched_job_slab, GFP_KERNEL);
61}
62
63bool xe_sched_job_is_migration(struct xe_exec_queue *q)
64{
65 return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION);
66}
67
68static void job_free(struct xe_sched_job *job)
69{
70 struct xe_exec_queue *q = job->q;
71 bool is_migration = xe_sched_job_is_migration(q);
72
73 kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ?
74 xe_sched_job_parallel_slab : xe_sched_job_slab, job);
75}
76
77static struct xe_device *job_to_xe(struct xe_sched_job *job)
78{
79 return gt_to_xe(job->q->gt);
80}
81
82/* Free unused pre-allocated fences */
83static void xe_sched_job_free_fences(struct xe_sched_job *job)
84{
85 int i;
86
87 for (i = 0; i < job->q->width; ++i) {
88 struct xe_job_ptrs *ptrs = &job->ptrs[i];
89
90 if (ptrs->lrc_fence)
91 xe_lrc_free_seqno_fence(ptrs->lrc_fence);
92 dma_fence_chain_free(ptrs->chain_fence);
93 }
94}
95
96struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
97 u64 *batch_addr)
98{
99 bool is_migration = xe_sched_job_is_migration(q);
100 struct xe_sched_job *job;
101 int err;
102 int i;
103 u32 width;
104
105 /* only a kernel context can submit a vm-less job */
106 XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
107
108 job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
109 if (!job)
110 return ERR_PTR(-ENOMEM);
111
112 job->q = q;
113 kref_init(&job->refcount);
114 xe_exec_queue_get(job->q);
115
116 err = drm_sched_job_init(&job->drm, q->entity, 1, NULL,
117 q->xef ? q->xef->drm->client_id : 0);
118 if (err)
119 goto err_free;
120
121 for (i = 0; i < q->width; ++i) {
122 struct dma_fence *fence = xe_lrc_alloc_seqno_fence();
123 struct dma_fence_chain *chain;
124
125 if (IS_ERR(fence)) {
126 err = PTR_ERR(fence);
127 goto err_sched_job;
128 }
129 job->ptrs[i].lrc_fence = fence;
130
131 if (i + 1 == q->width)
132 continue;
133
134 chain = dma_fence_chain_alloc();
135 if (!chain) {
136 err = -ENOMEM;
137 goto err_sched_job;
138 }
139 job->ptrs[i].chain_fence = chain;
140 }
141
142 width = q->width;
143 if (is_migration)
144 width = 2;
145
146 for (i = 0; i < width; ++i)
147 job->ptrs[i].batch_addr = batch_addr[i];
148
149 atomic_inc(&q->job_cnt);
150 xe_pm_runtime_get_noresume(job_to_xe(job));
151 trace_xe_sched_job_create(job);
152 return job;
153
154err_sched_job:
155 xe_sched_job_free_fences(job);
156 drm_sched_job_cleanup(&job->drm);
157err_free:
158 xe_exec_queue_put(q);
159 job_free(job);
160 return ERR_PTR(err);
161}
162
163/**
164 * xe_sched_job_destroy - Destroy Xe schedule job
165 * @ref: reference to Xe schedule job
166 *
167 * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
168 * base DRM schedule job, and free memory for Xe schedule job.
169 */
170void xe_sched_job_destroy(struct kref *ref)
171{
172 struct xe_sched_job *job =
173 container_of(ref, struct xe_sched_job, refcount);
174 struct xe_device *xe = job_to_xe(job);
175 struct xe_exec_queue *q = job->q;
176
177 xe_sched_job_free_fences(job);
178 dma_fence_put(job->fence);
179 drm_sched_job_cleanup(&job->drm);
180 job_free(job);
181 atomic_dec(&q->job_cnt);
182 xe_exec_queue_put(q);
183 xe_pm_runtime_put(xe);
184}
185
186/* Set the error status under the fence to avoid racing with signaling */
187static bool xe_fence_set_error(struct dma_fence *fence, int error)
188{
189 unsigned long irq_flags;
190 bool signaled;
191
192 spin_lock_irqsave(fence->lock, irq_flags);
193 signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
194 if (!signaled)
195 dma_fence_set_error(fence, error);
196 spin_unlock_irqrestore(fence->lock, irq_flags);
197
198 return signaled;
199}
200
201void xe_sched_job_set_error(struct xe_sched_job *job, int error)
202{
203 if (xe_fence_set_error(job->fence, error))
204 return;
205
206 if (dma_fence_is_chain(job->fence)) {
207 struct dma_fence *iter;
208
209 dma_fence_chain_for_each(iter, job->fence)
210 xe_fence_set_error(dma_fence_chain_contained(iter),
211 error);
212 }
213
214 trace_xe_sched_job_set_error(job);
215
216 dma_fence_enable_sw_signaling(job->fence);
217 xe_hw_fence_irq_run(job->q->fence_irq);
218}
219
220bool xe_sched_job_started(struct xe_sched_job *job)
221{
222 struct dma_fence *fence = dma_fence_chain_contained(job->fence);
223 struct xe_lrc *lrc = job->q->lrc[0];
224
225 return !__dma_fence_is_later(fence,
226 xe_sched_job_lrc_seqno(job),
227 xe_lrc_start_seqno(lrc));
228}
229
230bool xe_sched_job_completed(struct xe_sched_job *job)
231{
232 struct dma_fence *fence = dma_fence_chain_contained(job->fence);
233 struct xe_lrc *lrc = job->q->lrc[0];
234
235 /*
236 * Can safely check just LRC[0] seqno as that is last seqno written when
237 * parallel handshake is done.
238 */
239
240 return !__dma_fence_is_later(fence,
241 xe_sched_job_lrc_seqno(job),
242 xe_lrc_seqno(lrc));
243}
244
245void xe_sched_job_arm(struct xe_sched_job *job)
246{
247 struct xe_exec_queue *q = job->q;
248 struct dma_fence *fence, *prev;
249 struct xe_vm *vm = q->vm;
250 u64 seqno = 0;
251 int i;
252
253 /* Migration and kernel engines have their own locking */
254 if (IS_ENABLED(CONFIG_LOCKDEP) &&
255 !(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
256 lockdep_assert_held(&q->vm->lock);
257 if (!xe_vm_in_lr_mode(q->vm))
258 xe_vm_assert_held(q->vm);
259 }
260
261 if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
262 (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
263 xe_vm_assert_held(vm);
264 q->tlb_flush_seqno = vm->tlb_flush_seqno;
265 job->ring_ops_flush_tlb = true;
266 }
267
268 /* Arm the pre-allocated fences */
269 for (i = 0; i < q->width; prev = fence, ++i) {
270 struct dma_fence_chain *chain;
271
272 fence = job->ptrs[i].lrc_fence;
273 xe_lrc_init_seqno_fence(q->lrc[i], fence);
274 job->ptrs[i].lrc_fence = NULL;
275 if (!i) {
276 job->lrc_seqno = fence->seqno;
277 continue;
278 } else {
279 xe_assert(gt_to_xe(q->gt), job->lrc_seqno == fence->seqno);
280 }
281
282 chain = job->ptrs[i - 1].chain_fence;
283 dma_fence_chain_init(chain, prev, fence, seqno++);
284 job->ptrs[i - 1].chain_fence = NULL;
285 fence = &chain->base;
286 }
287
288 job->fence = dma_fence_get(fence); /* Pairs with put in scheduler */
289 drm_sched_job_arm(&job->drm);
290}
291
292void xe_sched_job_push(struct xe_sched_job *job)
293{
294 xe_sched_job_get(job);
295 trace_xe_sched_job_exec(job);
296 drm_sched_entity_push_job(&job->drm);
297 xe_sched_job_put(job);
298}
299
300/**
301 * xe_sched_job_init_user_fence - Initialize user_fence for the job
302 * @job: job whose user_fence needs an init
303 * @sync: sync to be use to init user_fence
304 */
305void xe_sched_job_init_user_fence(struct xe_sched_job *job,
306 struct xe_sync_entry *sync)
307{
308 if (sync->type != DRM_XE_SYNC_TYPE_USER_FENCE)
309 return;
310
311 job->user_fence.used = true;
312 job->user_fence.addr = sync->addr;
313 job->user_fence.value = sync->timeline_value;
314}
315
316struct xe_sched_job_snapshot *
317xe_sched_job_snapshot_capture(struct xe_sched_job *job)
318{
319 struct xe_exec_queue *q = job->q;
320 struct xe_device *xe = q->gt->tile->xe;
321 struct xe_sched_job_snapshot *snapshot;
322 size_t len = sizeof(*snapshot) + (sizeof(u64) * q->width);
323 u16 i;
324
325 snapshot = kzalloc(len, GFP_ATOMIC);
326 if (!snapshot)
327 return NULL;
328
329 snapshot->batch_addr_len = q->width;
330 for (i = 0; i < q->width; i++)
331 snapshot->batch_addr[i] =
332 xe_device_uncanonicalize_addr(xe, job->ptrs[i].batch_addr);
333
334 return snapshot;
335}
336
337void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot)
338{
339 kfree(snapshot);
340}
341
342void
343xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot,
344 struct drm_printer *p)
345{
346 u16 i;
347
348 if (!snapshot)
349 return;
350
351 for (i = 0; i < snapshot->batch_addr_len; i++)
352 drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]);
353}
354
355int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv,
356 enum dma_resv_usage usage)
357{
358 return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage);
359}