Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/gsc: Handle GSCCS ER interrupt

Starting on Xe2, the GSCCS engine reset is a 2-step process. When the
driver or the GuC hits the GDRST register, the CS is immediately reset
and a success is reported, but the GSC shim continues its reset in the
background. While the shim reset is ongoing, the CS is able to accept
new context submission, but any commands that require the shim will
be stalled until the reset is completed. This means that we can keep
submitting to the GSCCS as long as we make sure that the preemption
timeout is big enough to cover any delay introduced by the reset; since
the GSC preempt timeout is not tunable at runtime, we only need to check
that the value set in kconfig is big enough (and increase it if it
isn't).
When the shim reset completes, a specific CS interrupt is triggered,
in response to which we need to check the GSCI_TIMER_STATUS register
to see if the reset was successful or not.
Note that the GSCI_TIMER_STATUS register is not power save/restored,
so it gets reset on MC6 entry. However, a reset failure stops MC6,
so in that scenario we're always guaranteed to find the correct value.

Since we can't check the register within interrupt context, the
existing GSC worker has been updated to handle it.
The expected action to take on ER failure is to trigger a driver FLR,
but we still don't support that, so for now we just print an error. A
comment has been added to the code to keep track of the FLR requirement.

v2: Add a check for the initial timeout value (Alan)

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240304145634.820684-1-daniele.ceraolospurio@intel.com

+109 -2
+7
drivers/gpu/drm/xe/regs/xe_gsc_regs.h
··· 38 38 #define HECI_H_GS1(base) XE_REG((base) + 0xc4c) 39 39 #define HECI_H_GS1_ER_PREP REG_BIT(0) 40 40 41 + #define GSCI_TIMER_STATUS XE_REG(0x11ca28) 42 + #define GSCI_TIMER_STATUS_VALUE REG_GENMASK(1, 0) 43 + #define GSCI_TIMER_STATUS_RESET_IN_PROGRESS 0 44 + #define GSCI_TIMER_STATUS_TIMER_EXPIRED 1 45 + #define GSCI_TIMER_STATUS_RESET_COMPLETE 2 46 + #define GSCI_TIMER_STATUS_OUT_OF_RESET 3 47 + 41 48 #endif
+1
drivers/gpu/drm/xe/regs/xe_gt_regs.h
··· 486 486 #define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) 487 487 #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) 488 488 #define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) 489 + #define GSC_ER_COMPLETE REG_BIT(5) 489 490 #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) 490 491 #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) 491 492 #define GT_RENDER_USER_INTERRUPT REG_BIT(0)
+63
drivers/gpu/drm/xe/xe_gsc.c
··· 27 27 #include "xe_wa.h" 28 28 #include "instructions/xe_gsc_commands.h" 29 29 #include "regs/xe_gsc_regs.h" 30 + #include "regs/xe_gt_regs.h" 30 31 31 32 static struct xe_gt * 32 33 gsc_to_gt(struct xe_gsc *gsc) ··· 274 273 return 0; 275 274 } 276 275 276 + static int gsc_er_complete(struct xe_gt *gt) 277 + { 278 + u32 er_status; 279 + 280 + if (!gsc_fw_is_loaded(gt)) 281 + return 0; 282 + 283 + /* 284 + * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the 285 + * driver or the GuC hit the GDRST register, the CS is immediately reset 286 + * and a success is reported, but the GSC shim keeps resetting in the 287 + * background. While the shim reset is ongoing, the CS is able to accept 288 + * new context submission, but any commands that require the shim will 289 + * be stalled until the reset is completed. This means that we can keep 290 + * submitting to the GSCCS as long as we make sure that the preemption 291 + * timeout is big enough to cover any delay introduced by the reset. 292 + * When the shim reset completes, a specific CS interrupt is triggered, 293 + * in response to which we need to check the GSCI_TIMER_STATUS register 294 + * to see if the reset was successful or not. 295 + * Note that the GSCI_TIMER_STATUS register is not power save/restored, 296 + * so it gets reset on MC6 entry. However, a reset failure stops MC6, 297 + * so in that scenario we're always guaranteed to find the correct 298 + * value. 299 + */ 300 + er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE; 301 + 302 + if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) { 303 + /* 304 + * XXX: we should trigger an FLR here, but we don't have support 305 + * for that yet. 306 + */ 307 + xe_gt_err(gt, "GSC ER timed out!\n"); 308 + return -EIO; 309 + } 310 + 311 + return 0; 312 + } 313 + 277 314 static void gsc_work(struct work_struct *work) 278 315 { 279 316 struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); ··· 328 289 xe_pm_runtime_get(xe); 329 290 xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 330 291 292 + if (actions & GSC_ACTION_ER_COMPLETE) { 293 + ret = gsc_er_complete(gt); 294 + if (ret) 295 + goto out; 296 + } 297 + 331 298 if (actions & GSC_ACTION_FW_LOAD) { 332 299 ret = gsc_upload_and_init(gsc); 333 300 if (ret && ret != -EEXIST) ··· 345 300 if (actions & GSC_ACTION_SW_PROXY) 346 301 xe_gsc_proxy_request_handler(gsc); 347 302 303 + out: 348 304 xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 349 305 xe_pm_runtime_put(xe); 306 + } 307 + 308 + void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec) 309 + { 310 + struct xe_gt *gt = hwe->gt; 311 + struct xe_gsc *gsc = &gt->uc.gsc; 312 + 313 + if (unlikely(!intr_vec)) 314 + return; 315 + 316 + if (intr_vec & GSC_ER_COMPLETE) { 317 + spin_lock(&gsc->lock); 318 + gsc->work_actions |= GSC_ACTION_ER_COMPLETE; 319 + spin_unlock(&gsc->lock); 320 + 321 + queue_work(gsc->wq, &gsc->work); 322 + } 350 323 } 351 324 352 325 int xe_gsc_init(struct xe_gsc *gsc)
+2
drivers/gpu/drm/xe/xe_gsc.h
··· 9 9 #include "xe_gsc_types.h" 10 10 11 11 struct xe_gt; 12 + struct xe_hw_engine; 12 13 13 14 int xe_gsc_init(struct xe_gsc *gsc); 14 15 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); 15 16 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); 16 17 void xe_gsc_load_start(struct xe_gsc *gsc); 17 18 void xe_gsc_remove(struct xe_gsc *gsc); 19 + void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); 18 20 19 21 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep); 20 22
+1
drivers/gpu/drm/xe/xe_gsc_types.h
··· 47 47 u32 work_actions; 48 48 #define GSC_ACTION_FW_LOAD BIT(0) 49 49 #define GSC_ACTION_SW_PROXY BIT(1) 50 + #define GSC_ACTION_ER_COMPLETE BIT(2) 50 51 51 52 /** @proxy: sub-structure containing the SW proxy-related variables */ 52 53 struct {
+34 -1
drivers/gpu/drm/xe/xe_hw_engine.c
··· 14 14 #include "xe_device.h" 15 15 #include "xe_execlist.h" 16 16 #include "xe_force_wake.h" 17 + #include "xe_gsc.h" 17 18 #include "xe_gt.h" 18 19 #include "xe_gt_ccs_mode.h" 20 + #include "xe_gt_printk.h" 19 21 #include "xe_gt_topology.h" 20 22 #include "xe_hw_fence.h" 21 23 #include "xe_irq.h" ··· 465 463 hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT; 466 464 hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 467 465 hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 466 + 467 + /* 468 + * The GSC engine can accept submissions while the GSC shim is 469 + * being reset, during which time the submission is stalled. In 470 + * the worst case, the shim reset can take up to the maximum GSC 471 + * command execution time (250ms), so the request start can be 472 + * delayed by that much; the request itself can take that long 473 + * without being preemptible, which means worst case it can 474 + * theoretically take up to 500ms for a preemption to go through 475 + * on the GSC engine. Adding to that an extra 100ms as a safety 476 + * margin, we get a minimum recommended timeout of 600ms. 477 + * The preempt_timeout value can't be tuned for OTHER_CLASS 478 + * because the class is reserved for kernel usage, so we just 479 + * need to make sure that the starting value is above that 480 + * threshold; since our default value (640ms) is greater than 481 + * 600ms, the only way we can go below is via a kconfig setting. 482 + * If that happens, log it in dmesg and update the value. 483 + */ 484 + if (hwe->class == XE_ENGINE_CLASS_OTHER) { 485 + const u32 min_preempt_timeout = 600 * 1000; 486 + if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) { 487 + hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout; 488 + xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n"); 489 + } 490 + } 491 + 468 492 /* Record default props */ 469 493 hwe->eclass->defaults = hwe->eclass->sched_props; 470 494 } ··· 537 509 } 538 510 } 539 511 540 - if (xe_device_uc_enabled(xe)) 512 + if (xe_device_uc_enabled(xe)) { 513 + /* GSCCS has a special interrupt for reset */ 514 + if (hwe->class == XE_ENGINE_CLASS_OTHER) 515 + hwe->irq_handler = xe_gsc_hwe_irq_handler; 516 + 541 517 xe_hw_engine_enable_ring(hwe); 518 + } 542 519 543 520 /* We reserve the highest BCS instance for USM */ 544 521 if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
+1 -1
drivers/gpu/drm/xe/xe_irq.c
··· 187 187 * GSCCS interrupts, but it has its own mask register. 188 188 */ 189 189 if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { 190 - gsc_mask = irqs; 190 + gsc_mask = irqs | GSC_ER_COMPLETE; 191 191 heci_mask = GSC_IRQ_INTF(1); 192 192 } else if (HAS_HECI_GSCFI(xe)) { 193 193 gsc_mask = GSC_IRQ_INTF(1);