Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v6.17-rc5 488 lines 12 kB view raw
1// SPDX-License-Identifier: MIT 2/* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6#include "xe_execlist.h" 7 8#include <drm/drm_managed.h> 9 10#include "instructions/xe_mi_commands.h" 11#include "regs/xe_engine_regs.h" 12#include "regs/xe_gt_regs.h" 13#include "regs/xe_lrc_layout.h" 14#include "xe_assert.h" 15#include "xe_bo.h" 16#include "xe_device.h" 17#include "xe_exec_queue.h" 18#include "xe_gt.h" 19#include "xe_hw_fence.h" 20#include "xe_irq.h" 21#include "xe_lrc.h" 22#include "xe_macros.h" 23#include "xe_mmio.h" 24#include "xe_mocs.h" 25#include "xe_ring_ops_types.h" 26#include "xe_sched_job.h" 27 28#define XE_EXECLIST_HANG_LIMIT 1 29 30#define SW_CTX_ID_SHIFT 37 31#define SW_CTX_ID_WIDTH 11 32#define XEHP_SW_CTX_ID_SHIFT 39 33#define XEHP_SW_CTX_ID_WIDTH 16 34 35#define SW_CTX_ID \ 36 GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \ 37 SW_CTX_ID_SHIFT) 38 39#define XEHP_SW_CTX_ID \ 40 GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ 41 XEHP_SW_CTX_ID_SHIFT) 42 43 44static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, 45 u32 ctx_id) 46{ 47 struct xe_gt *gt = hwe->gt; 48 struct xe_mmio *mmio = &gt->mmio; 49 struct xe_device *xe = gt_to_xe(gt); 50 u64 lrc_desc; 51 u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); 52 53 lrc_desc = xe_lrc_descriptor(lrc); 54 55 if (GRAPHICS_VERx100(xe) >= 1250) { 56 xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); 57 lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); 58 } else { 59 xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id)); 60 lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id); 61 } 62 63 if (hwe->class == XE_ENGINE_CLASS_COMPUTE) 64 xe_mmio_write32(mmio, RCU_MODE, 65 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); 66 67 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 68 lrc->ring.old_tail = lrc->ring.tail; 69 70 /* 71 * Make sure the context image is complete before we submit it to HW. 72 * 73 * Ostensibly, writes (including the WCB) should be flushed prior to 74 * an uncached write such as our mmio register access, the empirical 75 * evidence (esp. on Braswell) suggests that the WC write into memory 76 * may not be visible to the HW prior to the completion of the UC 77 * register write and that we may begin execution from the context 78 * before its image is complete leading to invalid PD chasing. 79 */ 80 wmb(); 81 82 xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), 83 xe_bo_ggtt_addr(hwe->hwsp)); 84 xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); 85 86 if (xe_device_has_msix(gt_to_xe(hwe->gt))) 87 ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); 88 xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode); 89 90 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), 91 lower_32_bits(lrc_desc)); 92 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base), 93 upper_32_bits(lrc_desc)); 94 xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base), 95 EL_CTRL_LOAD); 96} 97 98static void __xe_execlist_port_start(struct xe_execlist_port *port, 99 struct xe_execlist_exec_queue *exl) 100{ 101 struct xe_device *xe = gt_to_xe(port->hwe->gt); 102 int max_ctx = FIELD_MAX(SW_CTX_ID); 103 104 if (GRAPHICS_VERx100(xe) >= 1250) 105 max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); 106 107 xe_execlist_port_assert_held(port); 108 109 if (port->running_exl != exl || !exl->has_run) { 110 port->last_ctx_id++; 111 112 /* 0 is reserved for the kernel context */ 113 if (port->last_ctx_id > max_ctx) 114 port->last_ctx_id = 1; 115 } 116 117 __start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id); 118 port->running_exl = exl; 119 exl->has_run = true; 120} 121 122static void __xe_execlist_port_idle(struct xe_execlist_port *port) 123{ 124 u32 noop[2] = { MI_NOOP, MI_NOOP }; 125 126 xe_execlist_port_assert_held(port); 127 128 if (!port->running_exl) 129 return; 130 131 xe_lrc_write_ring(port->lrc, noop, sizeof(noop)); 132 __start_lrc(port->hwe, port->lrc, 0); 133 port->running_exl = NULL; 134} 135 136static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl) 137{ 138 struct xe_lrc *lrc = exl->q->lrc[0]; 139 140 return lrc->ring.tail == lrc->ring.old_tail; 141} 142 143static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) 144{ 145 struct xe_execlist_exec_queue *exl = NULL; 146 int i; 147 148 xe_execlist_port_assert_held(port); 149 150 for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { 151 while (!list_empty(&port->active[i])) { 152 exl = list_first_entry(&port->active[i], 153 struct xe_execlist_exec_queue, 154 active_link); 155 list_del(&exl->active_link); 156 157 if (xe_execlist_is_idle(exl)) { 158 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 159 continue; 160 } 161 162 list_add_tail(&exl->active_link, &port->active[i]); 163 __xe_execlist_port_start(port, exl); 164 return; 165 } 166 } 167 168 __xe_execlist_port_idle(port); 169} 170 171static u64 read_execlist_status(struct xe_hw_engine *hwe) 172{ 173 struct xe_gt *gt = hwe->gt; 174 u32 hi, lo; 175 176 lo = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); 177 hi = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); 178 179 return lo | (u64)hi << 32; 180} 181 182static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port) 183{ 184 u64 status; 185 186 xe_execlist_port_assert_held(port); 187 188 status = read_execlist_status(port->hwe); 189 if (status & BIT(7)) 190 return; 191 192 __xe_execlist_port_start_next_active(port); 193} 194 195static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, 196 u16 intr_vec) 197{ 198 struct xe_execlist_port *port = hwe->exl_port; 199 200 spin_lock(&port->lock); 201 xe_execlist_port_irq_handler_locked(port); 202 spin_unlock(&port->lock); 203} 204 205static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, 206 enum xe_exec_queue_priority priority) 207{ 208 xe_execlist_port_assert_held(port); 209 210 if (port->running_exl && port->running_exl->active_priority >= priority) 211 return; 212 213 __xe_execlist_port_start_next_active(port); 214} 215 216static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) 217{ 218 struct xe_execlist_port *port = exl->port; 219 enum xe_exec_queue_priority priority = exl->q->sched_props.priority; 220 221 XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); 222 XE_WARN_ON(priority < 0); 223 XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active)); 224 225 spin_lock_irq(&port->lock); 226 227 if (exl->active_priority != priority && 228 exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) { 229 /* Priority changed, move it to the right list */ 230 list_del(&exl->active_link); 231 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 232 } 233 234 if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) { 235 exl->active_priority = priority; 236 list_add_tail(&exl->active_link, &port->active[priority]); 237 } 238 239 xe_execlist_port_wake_locked(exl->port, priority); 240 241 spin_unlock_irq(&port->lock); 242} 243 244static void xe_execlist_port_irq_fail_timer(struct timer_list *timer) 245{ 246 struct xe_execlist_port *port = 247 container_of(timer, struct xe_execlist_port, irq_fail); 248 249 spin_lock_irq(&port->lock); 250 xe_execlist_port_irq_handler_locked(port); 251 spin_unlock_irq(&port->lock); 252 253 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 254 add_timer(&port->irq_fail); 255} 256 257struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, 258 struct xe_hw_engine *hwe) 259{ 260 struct drm_device *drm = &xe->drm; 261 struct xe_execlist_port *port; 262 int i, err; 263 264 port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL); 265 if (!port) { 266 err = -ENOMEM; 267 goto err; 268 } 269 270 port->hwe = hwe; 271 272 port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0); 273 if (IS_ERR(port->lrc)) { 274 err = PTR_ERR(port->lrc); 275 goto err; 276 } 277 278 spin_lock_init(&port->lock); 279 for (i = 0; i < ARRAY_SIZE(port->active); i++) 280 INIT_LIST_HEAD(&port->active[i]); 281 282 port->last_ctx_id = 1; 283 port->running_exl = NULL; 284 285 hwe->irq_handler = xe_execlist_port_irq_handler; 286 287 /* TODO: Fix the interrupt code so it doesn't race like mad */ 288 timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0); 289 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 290 add_timer(&port->irq_fail); 291 292 return port; 293 294err: 295 return ERR_PTR(err); 296} 297 298void xe_execlist_port_destroy(struct xe_execlist_port *port) 299{ 300 timer_delete(&port->irq_fail); 301 302 /* Prevent an interrupt while we're destroying */ 303 spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock); 304 port->hwe->irq_handler = NULL; 305 spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock); 306 307 xe_lrc_put(port->lrc); 308} 309 310static struct dma_fence * 311execlist_run_job(struct drm_sched_job *drm_job) 312{ 313 struct xe_sched_job *job = to_xe_sched_job(drm_job); 314 struct xe_exec_queue *q = job->q; 315 struct xe_execlist_exec_queue *exl = job->q->execlist; 316 317 q->ring_ops->emit_job(job); 318 xe_execlist_make_active(exl); 319 320 return job->fence; 321} 322 323static void execlist_job_free(struct drm_sched_job *drm_job) 324{ 325 struct xe_sched_job *job = to_xe_sched_job(drm_job); 326 327 xe_exec_queue_update_run_ticks(job->q); 328 xe_sched_job_put(job); 329} 330 331static const struct drm_sched_backend_ops drm_sched_ops = { 332 .run_job = execlist_run_job, 333 .free_job = execlist_job_free, 334}; 335 336static int execlist_exec_queue_init(struct xe_exec_queue *q) 337{ 338 struct drm_gpu_scheduler *sched; 339 const struct drm_sched_init_args args = { 340 .ops = &drm_sched_ops, 341 .num_rqs = 1, 342 .credit_limit = q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 343 .hang_limit = XE_SCHED_HANG_LIMIT, 344 .timeout = XE_SCHED_JOB_TIMEOUT, 345 .name = q->hwe->name, 346 .dev = gt_to_xe(q->gt)->drm.dev, 347 }; 348 struct xe_execlist_exec_queue *exl; 349 struct xe_device *xe = gt_to_xe(q->gt); 350 int err; 351 352 xe_assert(xe, !xe_device_uc_enabled(xe)); 353 354 drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n"); 355 356 exl = kzalloc(sizeof(*exl), GFP_KERNEL); 357 if (!exl) 358 return -ENOMEM; 359 360 exl->q = q; 361 362 err = drm_sched_init(&exl->sched, &args); 363 if (err) 364 goto err_free; 365 366 sched = &exl->sched; 367 err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL); 368 if (err) 369 goto err_sched; 370 371 exl->port = q->hwe->exl_port; 372 exl->has_run = false; 373 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 374 q->execlist = exl; 375 q->entity = &exl->entity; 376 377 xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1); 378 379 return 0; 380 381err_sched: 382 drm_sched_fini(&exl->sched); 383err_free: 384 kfree(exl); 385 return err; 386} 387 388static void execlist_exec_queue_fini_async(struct work_struct *w) 389{ 390 struct xe_execlist_exec_queue *ee = 391 container_of(w, struct xe_execlist_exec_queue, fini_async); 392 struct xe_exec_queue *q = ee->q; 393 struct xe_execlist_exec_queue *exl = q->execlist; 394 struct xe_device *xe = gt_to_xe(q->gt); 395 unsigned long flags; 396 397 xe_assert(xe, !xe_device_uc_enabled(xe)); 398 399 spin_lock_irqsave(&exl->port->lock, flags); 400 if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET)) 401 list_del(&exl->active_link); 402 spin_unlock_irqrestore(&exl->port->lock, flags); 403 404 drm_sched_entity_fini(&exl->entity); 405 drm_sched_fini(&exl->sched); 406 kfree(exl); 407 408 xe_exec_queue_fini(q); 409} 410 411static void execlist_exec_queue_kill(struct xe_exec_queue *q) 412{ 413 /* NIY */ 414} 415 416static void execlist_exec_queue_fini(struct xe_exec_queue *q) 417{ 418 INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); 419 queue_work(system_unbound_wq, &q->execlist->fini_async); 420} 421 422static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, 423 enum xe_exec_queue_priority priority) 424{ 425 /* NIY */ 426 return 0; 427} 428 429static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 430{ 431 /* NIY */ 432 return 0; 433} 434 435static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 436 u32 preempt_timeout_us) 437{ 438 /* NIY */ 439 return 0; 440} 441 442static int execlist_exec_queue_suspend(struct xe_exec_queue *q) 443{ 444 /* NIY */ 445 return 0; 446} 447 448static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q) 449 450{ 451 /* NIY */ 452 return 0; 453} 454 455static void execlist_exec_queue_resume(struct xe_exec_queue *q) 456{ 457 /* NIY */ 458} 459 460static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) 461{ 462 /* NIY */ 463 return false; 464} 465 466static const struct xe_exec_queue_ops execlist_exec_queue_ops = { 467 .init = execlist_exec_queue_init, 468 .kill = execlist_exec_queue_kill, 469 .fini = execlist_exec_queue_fini, 470 .set_priority = execlist_exec_queue_set_priority, 471 .set_timeslice = execlist_exec_queue_set_timeslice, 472 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, 473 .suspend = execlist_exec_queue_suspend, 474 .suspend_wait = execlist_exec_queue_suspend_wait, 475 .resume = execlist_exec_queue_resume, 476 .reset_status = execlist_exec_queue_reset_status, 477}; 478 479int xe_execlist_init(struct xe_gt *gt) 480{ 481 /* GuC submission enabled, nothing to do */ 482 if (xe_device_uc_enabled(gt_to_xe(gt))) 483 return 0; 484 485 gt->exec_queue_ops = &execlist_exec_queue_ops; 486 487 return 0; 488}