Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_execlist.h"
7
8#include <drm/drm_managed.h>
9
10#include "instructions/xe_mi_commands.h"
11#include "regs/xe_engine_regs.h"
12#include "regs/xe_gt_regs.h"
13#include "regs/xe_lrc_layout.h"
14#include "xe_assert.h"
15#include "xe_bo.h"
16#include "xe_device.h"
17#include "xe_exec_queue.h"
18#include "xe_gt.h"
19#include "xe_hw_fence.h"
20#include "xe_lrc.h"
21#include "xe_macros.h"
22#include "xe_mmio.h"
23#include "xe_mocs.h"
24#include "xe_ring_ops_types.h"
25#include "xe_sched_job.h"
26
27#define XE_EXECLIST_HANG_LIMIT 1
28
29#define SW_CTX_ID_SHIFT 37
30#define SW_CTX_ID_WIDTH 11
31#define XEHP_SW_CTX_ID_SHIFT 39
32#define XEHP_SW_CTX_ID_WIDTH 16
33
34#define SW_CTX_ID \
35 GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
36 SW_CTX_ID_SHIFT)
37
38#define XEHP_SW_CTX_ID \
39 GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
40 XEHP_SW_CTX_ID_SHIFT)
41
42
43static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
44 u32 ctx_id)
45{
46 struct xe_gt *gt = hwe->gt;
47 struct xe_device *xe = gt_to_xe(gt);
48 u64 lrc_desc;
49
50 lrc_desc = xe_lrc_descriptor(lrc);
51
52 if (GRAPHICS_VERx100(xe) >= 1250) {
53 xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
54 lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
55 } else {
56 xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
57 lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
58 }
59
60 if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
61 xe_mmio_write32(hwe->gt, RCU_MODE,
62 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
63
64 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
65 lrc->ring.old_tail = lrc->ring.tail;
66
67 /*
68 * Make sure the context image is complete before we submit it to HW.
69 *
70 * Ostensibly, writes (including the WCB) should be flushed prior to
71 * an uncached write such as our mmio register access, the empirical
72 * evidence (esp. on Braswell) suggests that the WC write into memory
73 * may not be visible to the HW prior to the completion of the UC
74 * register write and that we may begin execution from the context
75 * before its image is complete leading to invalid PD chasing.
76 */
77 wmb();
78
79 xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
80 xe_bo_ggtt_addr(hwe->hwsp));
81 xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
82 xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
83 _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
84
85 xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
86 lower_32_bits(lrc_desc));
87 xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
88 upper_32_bits(lrc_desc));
89 xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
90 EL_CTRL_LOAD);
91}
92
93static void __xe_execlist_port_start(struct xe_execlist_port *port,
94 struct xe_execlist_exec_queue *exl)
95{
96 struct xe_device *xe = gt_to_xe(port->hwe->gt);
97 int max_ctx = FIELD_MAX(SW_CTX_ID);
98
99 if (GRAPHICS_VERx100(xe) >= 1250)
100 max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
101
102 xe_execlist_port_assert_held(port);
103
104 if (port->running_exl != exl || !exl->has_run) {
105 port->last_ctx_id++;
106
107 /* 0 is reserved for the kernel context */
108 if (port->last_ctx_id > max_ctx)
109 port->last_ctx_id = 1;
110 }
111
112 __start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id);
113 port->running_exl = exl;
114 exl->has_run = true;
115}
116
117static void __xe_execlist_port_idle(struct xe_execlist_port *port)
118{
119 u32 noop[2] = { MI_NOOP, MI_NOOP };
120
121 xe_execlist_port_assert_held(port);
122
123 if (!port->running_exl)
124 return;
125
126 xe_lrc_write_ring(port->lrc, noop, sizeof(noop));
127 __start_lrc(port->hwe, port->lrc, 0);
128 port->running_exl = NULL;
129}
130
131static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
132{
133 struct xe_lrc *lrc = exl->q->lrc[0];
134
135 return lrc->ring.tail == lrc->ring.old_tail;
136}
137
138static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
139{
140 struct xe_execlist_exec_queue *exl = NULL;
141 int i;
142
143 xe_execlist_port_assert_held(port);
144
145 for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
146 while (!list_empty(&port->active[i])) {
147 exl = list_first_entry(&port->active[i],
148 struct xe_execlist_exec_queue,
149 active_link);
150 list_del(&exl->active_link);
151
152 if (xe_execlist_is_idle(exl)) {
153 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
154 continue;
155 }
156
157 list_add_tail(&exl->active_link, &port->active[i]);
158 __xe_execlist_port_start(port, exl);
159 return;
160 }
161 }
162
163 __xe_execlist_port_idle(port);
164}
165
166static u64 read_execlist_status(struct xe_hw_engine *hwe)
167{
168 struct xe_gt *gt = hwe->gt;
169 u32 hi, lo;
170
171 lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
172 hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
173
174 return lo | (u64)hi << 32;
175}
176
177static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
178{
179 u64 status;
180
181 xe_execlist_port_assert_held(port);
182
183 status = read_execlist_status(port->hwe);
184 if (status & BIT(7))
185 return;
186
187 __xe_execlist_port_start_next_active(port);
188}
189
190static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
191 u16 intr_vec)
192{
193 struct xe_execlist_port *port = hwe->exl_port;
194
195 spin_lock(&port->lock);
196 xe_execlist_port_irq_handler_locked(port);
197 spin_unlock(&port->lock);
198}
199
200static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
201 enum xe_exec_queue_priority priority)
202{
203 xe_execlist_port_assert_held(port);
204
205 if (port->running_exl && port->running_exl->active_priority >= priority)
206 return;
207
208 __xe_execlist_port_start_next_active(port);
209}
210
211static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
212{
213 struct xe_execlist_port *port = exl->port;
214 enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
215
216 XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
217 XE_WARN_ON(priority < 0);
218 XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
219
220 spin_lock_irq(&port->lock);
221
222 if (exl->active_priority != priority &&
223 exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
224 /* Priority changed, move it to the right list */
225 list_del(&exl->active_link);
226 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
227 }
228
229 if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
230 exl->active_priority = priority;
231 list_add_tail(&exl->active_link, &port->active[priority]);
232 }
233
234 xe_execlist_port_wake_locked(exl->port, priority);
235
236 spin_unlock_irq(&port->lock);
237}
238
239static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
240{
241 struct xe_execlist_port *port =
242 container_of(timer, struct xe_execlist_port, irq_fail);
243
244 spin_lock_irq(&port->lock);
245 xe_execlist_port_irq_handler_locked(port);
246 spin_unlock_irq(&port->lock);
247
248 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
249 add_timer(&port->irq_fail);
250}
251
252struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
253 struct xe_hw_engine *hwe)
254{
255 struct drm_device *drm = &xe->drm;
256 struct xe_execlist_port *port;
257 int i, err;
258
259 port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
260 if (!port) {
261 err = -ENOMEM;
262 goto err;
263 }
264
265 port->hwe = hwe;
266
267 port->lrc = xe_lrc_create(hwe, NULL, SZ_16K);
268 if (IS_ERR(port->lrc)) {
269 err = PTR_ERR(port->lrc);
270 goto err;
271 }
272
273 spin_lock_init(&port->lock);
274 for (i = 0; i < ARRAY_SIZE(port->active); i++)
275 INIT_LIST_HEAD(&port->active[i]);
276
277 port->last_ctx_id = 1;
278 port->running_exl = NULL;
279
280 hwe->irq_handler = xe_execlist_port_irq_handler;
281
282 /* TODO: Fix the interrupt code so it doesn't race like mad */
283 timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
284 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
285 add_timer(&port->irq_fail);
286
287 return port;
288
289err:
290 return ERR_PTR(err);
291}
292
293void xe_execlist_port_destroy(struct xe_execlist_port *port)
294{
295 del_timer(&port->irq_fail);
296
297 /* Prevent an interrupt while we're destroying */
298 spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock);
299 port->hwe->irq_handler = NULL;
300 spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock);
301
302 xe_lrc_put(port->lrc);
303}
304
305static struct dma_fence *
306execlist_run_job(struct drm_sched_job *drm_job)
307{
308 struct xe_sched_job *job = to_xe_sched_job(drm_job);
309 struct xe_exec_queue *q = job->q;
310 struct xe_execlist_exec_queue *exl = job->q->execlist;
311
312 q->ring_ops->emit_job(job);
313 xe_execlist_make_active(exl);
314
315 return dma_fence_get(job->fence);
316}
317
318static void execlist_job_free(struct drm_sched_job *drm_job)
319{
320 struct xe_sched_job *job = to_xe_sched_job(drm_job);
321
322 xe_exec_queue_update_run_ticks(job->q);
323 xe_sched_job_put(job);
324}
325
326static const struct drm_sched_backend_ops drm_sched_ops = {
327 .run_job = execlist_run_job,
328 .free_job = execlist_job_free,
329};
330
331static int execlist_exec_queue_init(struct xe_exec_queue *q)
332{
333 struct drm_gpu_scheduler *sched;
334 struct xe_execlist_exec_queue *exl;
335 struct xe_device *xe = gt_to_xe(q->gt);
336 int err;
337
338 xe_assert(xe, !xe_device_uc_enabled(xe));
339
340 drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
341
342 exl = kzalloc(sizeof(*exl), GFP_KERNEL);
343 if (!exl)
344 return -ENOMEM;
345
346 exl->q = q;
347
348 err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
349 q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES,
350 XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
351 NULL, NULL, q->hwe->name,
352 gt_to_xe(q->gt)->drm.dev);
353 if (err)
354 goto err_free;
355
356 sched = &exl->sched;
357 err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
358 if (err)
359 goto err_sched;
360
361 exl->port = q->hwe->exl_port;
362 exl->has_run = false;
363 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
364 q->execlist = exl;
365 q->entity = &exl->entity;
366
367 xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
368
369 return 0;
370
371err_sched:
372 drm_sched_fini(&exl->sched);
373err_free:
374 kfree(exl);
375 return err;
376}
377
378static void execlist_exec_queue_fini_async(struct work_struct *w)
379{
380 struct xe_execlist_exec_queue *ee =
381 container_of(w, struct xe_execlist_exec_queue, fini_async);
382 struct xe_exec_queue *q = ee->q;
383 struct xe_execlist_exec_queue *exl = q->execlist;
384 struct xe_device *xe = gt_to_xe(q->gt);
385 unsigned long flags;
386
387 xe_assert(xe, !xe_device_uc_enabled(xe));
388
389 spin_lock_irqsave(&exl->port->lock, flags);
390 if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
391 list_del(&exl->active_link);
392 spin_unlock_irqrestore(&exl->port->lock, flags);
393
394 drm_sched_entity_fini(&exl->entity);
395 drm_sched_fini(&exl->sched);
396 kfree(exl);
397
398 xe_exec_queue_fini(q);
399}
400
401static void execlist_exec_queue_kill(struct xe_exec_queue *q)
402{
403 /* NIY */
404}
405
406static void execlist_exec_queue_fini(struct xe_exec_queue *q)
407{
408 INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
409 queue_work(system_unbound_wq, &q->execlist->fini_async);
410}
411
412static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
413 enum xe_exec_queue_priority priority)
414{
415 /* NIY */
416 return 0;
417}
418
419static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
420{
421 /* NIY */
422 return 0;
423}
424
425static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
426 u32 preempt_timeout_us)
427{
428 /* NIY */
429 return 0;
430}
431
432static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
433{
434 /* NIY */
435 return 0;
436}
437
438static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
439
440{
441 /* NIY */
442 return 0;
443}
444
445static void execlist_exec_queue_resume(struct xe_exec_queue *q)
446{
447 /* NIY */
448}
449
450static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
451{
452 /* NIY */
453 return false;
454}
455
456static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
457 .init = execlist_exec_queue_init,
458 .kill = execlist_exec_queue_kill,
459 .fini = execlist_exec_queue_fini,
460 .set_priority = execlist_exec_queue_set_priority,
461 .set_timeslice = execlist_exec_queue_set_timeslice,
462 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
463 .suspend = execlist_exec_queue_suspend,
464 .suspend_wait = execlist_exec_queue_suspend_wait,
465 .resume = execlist_exec_queue_resume,
466 .reset_status = execlist_exec_queue_reset_status,
467};
468
469int xe_execlist_init(struct xe_gt *gt)
470{
471 /* GuC submission enabled, nothing to do */
472 if (xe_device_uc_enabled(gt_to_xe(gt)))
473 return 0;
474
475 gt->exec_queue_ops = &execlist_exec_queue_ops;
476
477 return 0;
478}