Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/gt: Manual rc6 entry upon parking

Now that we rapidly park the GT when the GPU idles, we often find
ourselves idling faster than the RC6 promotion timer. Thus if we tell
the GPU to enter RC6 manually as we park, we can do so quicker (by
around 50ms, half an EI on average) and marginally increase our
powersaving across all execlists platforms.

v2: Now with a selftest to check we can enter RC6 manually

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Andi Shyti <andi.shyti@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Acked-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191127095657.3209854-1-chris@chris-wilson.co.uk

+105 -15
+2
drivers/gpu/drm/i915/gt/intel_gt_pm.c
··· 64 64 if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) 65 65 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 66 66 67 + intel_rc6_unpark(&gt->rc6); 67 68 intel_rps_unpark(&gt->rps); 68 69 i915_pmu_gt_unparked(i915); 69 70 ··· 86 85 i915_vma_parked(gt); 87 86 i915_pmu_gt_parked(i915); 88 87 intel_rps_park(&gt->rps); 88 + intel_rc6_park(&gt->rc6); 89 89 90 90 /* Everything switched off, flush any residual interrupt just in case */ 91 91 intel_synchronize_irq(i915);
+38 -15
drivers/gpu/drm/i915/gt/intel_rc6.c
··· 96 96 set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); 97 97 98 98 /* 3a: Enable RC6 */ 99 - set(uncore, GEN6_RC_CONTROL, 100 - GEN6_RC_CTL_HW_ENABLE | 101 - GEN6_RC_CTL_RC6_ENABLE | 102 - GEN6_RC_CTL_EI_MODE(1)); 99 + rc6->ctl_enable = 100 + GEN6_RC_CTL_HW_ENABLE | 101 + GEN6_RC_CTL_RC6_ENABLE | 102 + GEN6_RC_CTL_EI_MODE(1); 103 103 104 104 set(uncore, GEN9_PG_ENABLE, 105 105 GEN9_RENDER_PG_ENABLE | ··· 170 170 else 171 171 rc6_mode = GEN6_RC_CTL_EI_MODE(1); 172 172 173 - set(uncore, GEN6_RC_CONTROL, 174 - GEN6_RC_CTL_HW_ENABLE | 175 - GEN6_RC_CTL_RC6_ENABLE | 176 - rc6_mode); 173 + rc6->ctl_enable = 174 + GEN6_RC_CTL_HW_ENABLE | 175 + GEN6_RC_CTL_RC6_ENABLE | 176 + rc6_mode; 177 177 178 178 /* 179 179 * WaRsDisableCoarsePowerGating:skl,cnl ··· 200 200 set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 201 201 202 202 /* 3: Enable RC6 */ 203 - set(uncore, GEN6_RC_CONTROL, 203 + rc6->ctl_enable = 204 204 GEN6_RC_CTL_HW_ENABLE | 205 205 GEN7_RC_CTL_TO_MODE | 206 - GEN6_RC_CTL_RC6_ENABLE); 206 + GEN6_RC_CTL_RC6_ENABLE; 207 207 } 208 208 209 209 static void gen6_rc6_enable(struct intel_rc6 *rc6) ··· 239 239 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; 240 240 if (HAS_RC6pp(i915)) 241 241 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; 242 - set(uncore, GEN6_RC_CONTROL, 242 + rc6->ctl_enable = 243 243 rc6_mask | 244 244 GEN6_RC_CTL_EI_MODE(1) | 245 - GEN6_RC_CTL_HW_ENABLE); 245 + GEN6_RC_CTL_HW_ENABLE; 246 246 247 247 rc6vids = 0; 248 248 ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, ··· 360 360 VLV_RENDER_RC6_COUNT_EN)); 361 361 362 362 /* 3: Enable RC6 */ 363 - set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE); 363 + rc6->ctl_enable = GEN7_RC_CTL_TO_MODE; 364 364 } 365 365 366 366 static void vlv_rc6_enable(struct intel_rc6 *rc6) ··· 386 386 VLV_MEDIA_RC6_COUNT_EN | 387 387 VLV_RENDER_RC6_COUNT_EN)); 388 388 389 - set(uncore, GEN6_RC_CONTROL, 390 - GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); 389 + rc6->ctl_enable = 390 + GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 391 391 } 392 392 393 393 static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) ··· 632 632 /* rc6 is ready, runtime-pm is go! */ 633 633 rpm_put(rc6); 634 634 rc6->enabled = true; 635 + } 636 + 637 + void intel_rc6_unpark(struct intel_rc6 *rc6) 638 + { 639 + struct intel_uncore *uncore = rc6_to_uncore(rc6); 640 + 641 + if (!rc6->enabled) 642 + return; 643 + 644 + /* Restore HW timers for automatic RC6 entry while busy */ 645 + set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable); 646 + } 647 + 648 + void intel_rc6_park(struct intel_rc6 *rc6) 649 + { 650 + struct intel_uncore *uncore = rc6_to_uncore(rc6); 651 + 652 + if (!rc6->enabled) 653 + return; 654 + 655 + /* Turn off the HW timers and go directly to rc6 */ 656 + set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); 657 + set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT); 635 658 } 636 659 637 660 void intel_rc6_disable(struct intel_rc6 *rc6)
+3
drivers/gpu/drm/i915/gt/intel_rc6.h
··· 15 15 void intel_rc6_init(struct intel_rc6 *rc6); 16 16 void intel_rc6_fini(struct intel_rc6 *rc6); 17 17 18 + void intel_rc6_unpark(struct intel_rc6 *rc6); 19 + void intel_rc6_park(struct intel_rc6 *rc6); 20 + 18 21 void intel_rc6_sanitize(struct intel_rc6 *rc6); 19 22 void intel_rc6_enable(struct intel_rc6 *rc6); 20 23 void intel_rc6_disable(struct intel_rc6 *rc6);
+2
drivers/gpu/drm/i915/gt/intel_rc6_types.h
··· 18 18 u64 prev_hw_residency[4]; 19 19 u64 cur_residency[4]; 20 20 21 + u32 ctl_enable; 22 + 21 23 struct drm_i915_gem_object *pctx; 22 24 23 25 bool supported : 1;
+1
drivers/gpu/drm/i915/gt/selftest_gt_pm.c
··· 51 51 int intel_gt_pm_live_selftests(struct drm_i915_private *i915) 52 52 { 53 53 static const struct i915_subtest tests[] = { 54 + SUBTEST(live_rc6_manual), 54 55 SUBTEST(live_gt_resume), 55 56 }; 56 57
+58
drivers/gpu/drm/i915/gt/selftest_rc6.c
··· 12 12 13 13 #include "selftests/i915_random.h" 14 14 15 + int live_rc6_manual(void *arg) 16 + { 17 + struct intel_gt *gt = arg; 18 + struct intel_rc6 *rc6 = &gt->rc6; 19 + intel_wakeref_t wakeref; 20 + u64 res[2]; 21 + int err = 0; 22 + 23 + /* 24 + * Our claim is that we can "encourage" the GPU to enter rc6 at will. 25 + * Let's try it! 26 + */ 27 + 28 + if (!rc6->enabled) 29 + return 0; 30 + 31 + /* bsw/byt use a PCU and decouple RC6 from our manual control */ 32 + if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) 33 + return 0; 34 + 35 + wakeref = intel_runtime_pm_get(gt->uncore->rpm); 36 + 37 + /* Force RC6 off for starters */ 38 + __intel_rc6_disable(rc6); 39 + msleep(1); /* wakeup is not immediate, takes about 100us on icl */ 40 + 41 + res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); 42 + msleep(250); 43 + res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); 44 + if ((res[1] - res[0]) >> 10) { 45 + pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n", 46 + (res[1] - res[0]) >> 10); 47 + err = -EINVAL; 48 + goto out_unlock; 49 + } 50 + 51 + /* Manually enter RC6 */ 52 + intel_rc6_park(rc6); 53 + 54 + res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); 55 + msleep(100); 56 + res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); 57 + 58 + if (res[1] == res[0]) { 59 + pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n", 60 + intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE), 61 + intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL)); 62 + err = -EINVAL; 63 + } 64 + 65 + /* Restore what should have been the original state! */ 66 + intel_rc6_unpark(rc6); 67 + 68 + out_unlock: 69 + intel_runtime_pm_put(gt->uncore->rpm, wakeref); 70 + return err; 71 + } 72 + 15 73 static const u32 *__live_rc6_ctx(struct intel_context *ce) 16 74 { 17 75 struct i915_request *rq;
+1
drivers/gpu/drm/i915/gt/selftest_rc6.h
··· 8 8 #define SELFTEST_RC6_H 9 9 10 10 int live_rc6_ctx_wa(void *arg); 11 + int live_rc6_manual(void *arg); 11 12 12 13 #endif /* SELFTEST_RC6_H */