Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.7 7498 lines 216 kB view raw
1/* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eugeni Dodonov <eugeni.dodonov@intel.com> 25 * 26 */ 27 28#include <linux/cpufreq.h> 29#include "i915_drv.h" 30#include "intel_drv.h" 31#include "../../../platform/x86/intel_ips.h" 32#include <linux/module.h> 33 34/** 35 * DOC: RC6 36 * 37 * RC6 is a special power stage which allows the GPU to enter an very 38 * low-voltage mode when idle, using down to 0V while at this stage. This 39 * stage is entered automatically when the GPU is idle when RC6 support is 40 * enabled, and as soon as new workload arises GPU wakes up automatically as well. 41 * 42 * There are different RC6 modes available in Intel GPU, which differentiate 43 * among each other with the latency required to enter and leave RC6 and 44 * voltage consumed by the GPU in different states. 45 * 46 * The combination of the following flags define which states GPU is allowed 47 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and 48 * RC6pp is deepest RC6. Their support by hardware varies according to the 49 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one 50 * which brings the most power savings; deeper states save more power, but 51 * require higher latency to switch to and wake up. 52 */ 53#define INTEL_RC6_ENABLE (1<<0) 54#define INTEL_RC6p_ENABLE (1<<1) 55#define INTEL_RC6pp_ENABLE (1<<2) 56 57static void gen9_init_clock_gating(struct drm_device *dev) 58{ 59 struct drm_i915_private *dev_priv = dev->dev_private; 60 61 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */ 62 I915_WRITE(CHICKEN_PAR1_1, 63 I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); 64 65 I915_WRITE(GEN8_CONFIG0, 66 I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); 67 68 /* WaEnableChickenDCPR:skl,bxt,kbl */ 69 I915_WRITE(GEN8_CHICKEN_DCPR_1, 70 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); 71 72 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ 73 /* WaFbcWakeMemOn:skl,bxt,kbl */ 74 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 75 DISP_FBC_WM_DIS | 76 DISP_FBC_MEMORY_WAKE); 77 78 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */ 79 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 80 ILK_DPFC_DISABLE_DUMMY0); 81} 82 83static void bxt_init_clock_gating(struct drm_device *dev) 84{ 85 struct drm_i915_private *dev_priv = dev->dev_private; 86 87 gen9_init_clock_gating(dev); 88 89 /* WaDisableSDEUnitClockGating:bxt */ 90 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 91 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 92 93 /* 94 * FIXME: 95 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. 96 */ 97 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 98 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); 99 100 /* 101 * Wa: Backlight PWM may stop in the asserted state, causing backlight 102 * to stay fully on. 103 */ 104 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) 105 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | 106 PWM1_GATING_DIS | PWM2_GATING_DIS); 107} 108 109static void i915_pineview_get_mem_freq(struct drm_device *dev) 110{ 111 struct drm_i915_private *dev_priv = dev->dev_private; 112 u32 tmp; 113 114 tmp = I915_READ(CLKCFG); 115 116 switch (tmp & CLKCFG_FSB_MASK) { 117 case CLKCFG_FSB_533: 118 dev_priv->fsb_freq = 533; /* 133*4 */ 119 break; 120 case CLKCFG_FSB_800: 121 dev_priv->fsb_freq = 800; /* 200*4 */ 122 break; 123 case CLKCFG_FSB_667: 124 dev_priv->fsb_freq = 667; /* 167*4 */ 125 break; 126 case CLKCFG_FSB_400: 127 dev_priv->fsb_freq = 400; /* 100*4 */ 128 break; 129 } 130 131 switch (tmp & CLKCFG_MEM_MASK) { 132 case CLKCFG_MEM_533: 133 dev_priv->mem_freq = 533; 134 break; 135 case CLKCFG_MEM_667: 136 dev_priv->mem_freq = 667; 137 break; 138 case CLKCFG_MEM_800: 139 dev_priv->mem_freq = 800; 140 break; 141 } 142 143 /* detect pineview DDR3 setting */ 144 tmp = I915_READ(CSHRDDR3CTL); 145 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0; 146} 147 148static void i915_ironlake_get_mem_freq(struct drm_device *dev) 149{ 150 struct drm_i915_private *dev_priv = dev->dev_private; 151 u16 ddrpll, csipll; 152 153 ddrpll = I915_READ16(DDRMPLL1); 154 csipll = I915_READ16(CSIPLL0); 155 156 switch (ddrpll & 0xff) { 157 case 0xc: 158 dev_priv->mem_freq = 800; 159 break; 160 case 0x10: 161 dev_priv->mem_freq = 1066; 162 break; 163 case 0x14: 164 dev_priv->mem_freq = 1333; 165 break; 166 case 0x18: 167 dev_priv->mem_freq = 1600; 168 break; 169 default: 170 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n", 171 ddrpll & 0xff); 172 dev_priv->mem_freq = 0; 173 break; 174 } 175 176 dev_priv->ips.r_t = dev_priv->mem_freq; 177 178 switch (csipll & 0x3ff) { 179 case 0x00c: 180 dev_priv->fsb_freq = 3200; 181 break; 182 case 0x00e: 183 dev_priv->fsb_freq = 3733; 184 break; 185 case 0x010: 186 dev_priv->fsb_freq = 4266; 187 break; 188 case 0x012: 189 dev_priv->fsb_freq = 4800; 190 break; 191 case 0x014: 192 dev_priv->fsb_freq = 5333; 193 break; 194 case 0x016: 195 dev_priv->fsb_freq = 5866; 196 break; 197 case 0x018: 198 dev_priv->fsb_freq = 6400; 199 break; 200 default: 201 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n", 202 csipll & 0x3ff); 203 dev_priv->fsb_freq = 0; 204 break; 205 } 206 207 if (dev_priv->fsb_freq == 3200) { 208 dev_priv->ips.c_m = 0; 209 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { 210 dev_priv->ips.c_m = 1; 211 } else { 212 dev_priv->ips.c_m = 2; 213 } 214} 215 216static const struct cxsr_latency cxsr_latency_table[] = { 217 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */ 218 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */ 219 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */ 220 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */ 221 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */ 222 223 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */ 224 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */ 225 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */ 226 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */ 227 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */ 228 229 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */ 230 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */ 231 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */ 232 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */ 233 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */ 234 235 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */ 236 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */ 237 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */ 238 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */ 239 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */ 240 241 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */ 242 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */ 243 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */ 244 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */ 245 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */ 246 247 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */ 248 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */ 249 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */ 250 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */ 251 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */ 252}; 253 254static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, 255 int is_ddr3, 256 int fsb, 257 int mem) 258{ 259 const struct cxsr_latency *latency; 260 int i; 261 262 if (fsb == 0 || mem == 0) 263 return NULL; 264 265 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { 266 latency = &cxsr_latency_table[i]; 267 if (is_desktop == latency->is_desktop && 268 is_ddr3 == latency->is_ddr3 && 269 fsb == latency->fsb_freq && mem == latency->mem_freq) 270 return latency; 271 } 272 273 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 274 275 return NULL; 276} 277 278static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) 279{ 280 u32 val; 281 282 mutex_lock(&dev_priv->rps.hw_lock); 283 284 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 285 if (enable) 286 val &= ~FORCE_DDR_HIGH_FREQ; 287 else 288 val |= FORCE_DDR_HIGH_FREQ; 289 val &= ~FORCE_DDR_LOW_FREQ; 290 val |= FORCE_DDR_FREQ_REQ_ACK; 291 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 292 293 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 294 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) 295 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); 296 297 mutex_unlock(&dev_priv->rps.hw_lock); 298} 299 300static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) 301{ 302 u32 val; 303 304 mutex_lock(&dev_priv->rps.hw_lock); 305 306 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 307 if (enable) 308 val |= DSP_MAXFIFO_PM5_ENABLE; 309 else 310 val &= ~DSP_MAXFIFO_PM5_ENABLE; 311 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); 312 313 mutex_unlock(&dev_priv->rps.hw_lock); 314} 315 316#define FW_WM(value, plane) \ 317 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK) 318 319void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) 320{ 321 struct drm_device *dev = dev_priv->dev; 322 u32 val; 323 324 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { 325 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0); 326 POSTING_READ(FW_BLC_SELF_VLV); 327 dev_priv->wm.vlv.cxsr = enable; 328 } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) { 329 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0); 330 POSTING_READ(FW_BLC_SELF); 331 } else if (IS_PINEVIEW(dev)) { 332 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN; 333 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0; 334 I915_WRITE(DSPFW3, val); 335 POSTING_READ(DSPFW3); 336 } else if (IS_I945G(dev) || IS_I945GM(dev)) { 337 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) : 338 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN); 339 I915_WRITE(FW_BLC_SELF, val); 340 POSTING_READ(FW_BLC_SELF); 341 } else if (IS_I915GM(dev)) { 342 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : 343 _MASKED_BIT_DISABLE(INSTPM_SELF_EN); 344 I915_WRITE(INSTPM, val); 345 POSTING_READ(INSTPM); 346 } else { 347 return; 348 } 349 350 DRM_DEBUG_KMS("memory self-refresh is %s\n", 351 enable ? "enabled" : "disabled"); 352} 353 354 355/* 356 * Latency for FIFO fetches is dependent on several factors: 357 * - memory configuration (speed, channels) 358 * - chipset 359 * - current MCH state 360 * It can be fairly high in some situations, so here we assume a fairly 361 * pessimal value. It's a tradeoff between extra memory fetches (if we 362 * set this value too high, the FIFO will fetch frequently to stay full) 363 * and power consumption (set it too low to save power and we might see 364 * FIFO underruns and display "flicker"). 365 * 366 * A value of 5us seems to be a good balance; safe for very low end 367 * platforms but not overly aggressive on lower latency configs. 368 */ 369static const int pessimal_latency_ns = 5000; 370 371#define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ 372 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) 373 374static int vlv_get_fifo_size(struct drm_device *dev, 375 enum pipe pipe, int plane) 376{ 377 struct drm_i915_private *dev_priv = dev->dev_private; 378 int sprite0_start, sprite1_start, size; 379 380 switch (pipe) { 381 uint32_t dsparb, dsparb2, dsparb3; 382 case PIPE_A: 383 dsparb = I915_READ(DSPARB); 384 dsparb2 = I915_READ(DSPARB2); 385 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0); 386 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4); 387 break; 388 case PIPE_B: 389 dsparb = I915_READ(DSPARB); 390 dsparb2 = I915_READ(DSPARB2); 391 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8); 392 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12); 393 break; 394 case PIPE_C: 395 dsparb2 = I915_READ(DSPARB2); 396 dsparb3 = I915_READ(DSPARB3); 397 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16); 398 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); 399 break; 400 default: 401 return 0; 402 } 403 404 switch (plane) { 405 case 0: 406 size = sprite0_start; 407 break; 408 case 1: 409 size = sprite1_start - sprite0_start; 410 break; 411 case 2: 412 size = 512 - 1 - sprite1_start; 413 break; 414 default: 415 return 0; 416 } 417 418 DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n", 419 pipe_name(pipe), plane == 0 ? "primary" : "sprite", 420 plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1), 421 size); 422 423 return size; 424} 425 426static int i9xx_get_fifo_size(struct drm_device *dev, int plane) 427{ 428 struct drm_i915_private *dev_priv = dev->dev_private; 429 uint32_t dsparb = I915_READ(DSPARB); 430 int size; 431 432 size = dsparb & 0x7f; 433 if (plane) 434 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; 435 436 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 437 plane ? "B" : "A", size); 438 439 return size; 440} 441 442static int i830_get_fifo_size(struct drm_device *dev, int plane) 443{ 444 struct drm_i915_private *dev_priv = dev->dev_private; 445 uint32_t dsparb = I915_READ(DSPARB); 446 int size; 447 448 size = dsparb & 0x1ff; 449 if (plane) 450 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; 451 size >>= 1; /* Convert to cachelines */ 452 453 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 454 plane ? "B" : "A", size); 455 456 return size; 457} 458 459static int i845_get_fifo_size(struct drm_device *dev, int plane) 460{ 461 struct drm_i915_private *dev_priv = dev->dev_private; 462 uint32_t dsparb = I915_READ(DSPARB); 463 int size; 464 465 size = dsparb & 0x7f; 466 size >>= 2; /* Convert to cachelines */ 467 468 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 469 plane ? "B" : "A", 470 size); 471 472 return size; 473} 474 475/* Pineview has different values for various configs */ 476static const struct intel_watermark_params pineview_display_wm = { 477 .fifo_size = PINEVIEW_DISPLAY_FIFO, 478 .max_wm = PINEVIEW_MAX_WM, 479 .default_wm = PINEVIEW_DFT_WM, 480 .guard_size = PINEVIEW_GUARD_WM, 481 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 482}; 483static const struct intel_watermark_params pineview_display_hplloff_wm = { 484 .fifo_size = PINEVIEW_DISPLAY_FIFO, 485 .max_wm = PINEVIEW_MAX_WM, 486 .default_wm = PINEVIEW_DFT_HPLLOFF_WM, 487 .guard_size = PINEVIEW_GUARD_WM, 488 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 489}; 490static const struct intel_watermark_params pineview_cursor_wm = { 491 .fifo_size = PINEVIEW_CURSOR_FIFO, 492 .max_wm = PINEVIEW_CURSOR_MAX_WM, 493 .default_wm = PINEVIEW_CURSOR_DFT_WM, 494 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 495 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 496}; 497static const struct intel_watermark_params pineview_cursor_hplloff_wm = { 498 .fifo_size = PINEVIEW_CURSOR_FIFO, 499 .max_wm = PINEVIEW_CURSOR_MAX_WM, 500 .default_wm = PINEVIEW_CURSOR_DFT_WM, 501 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 502 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 503}; 504static const struct intel_watermark_params g4x_wm_info = { 505 .fifo_size = G4X_FIFO_SIZE, 506 .max_wm = G4X_MAX_WM, 507 .default_wm = G4X_MAX_WM, 508 .guard_size = 2, 509 .cacheline_size = G4X_FIFO_LINE_SIZE, 510}; 511static const struct intel_watermark_params g4x_cursor_wm_info = { 512 .fifo_size = I965_CURSOR_FIFO, 513 .max_wm = I965_CURSOR_MAX_WM, 514 .default_wm = I965_CURSOR_DFT_WM, 515 .guard_size = 2, 516 .cacheline_size = G4X_FIFO_LINE_SIZE, 517}; 518static const struct intel_watermark_params i965_cursor_wm_info = { 519 .fifo_size = I965_CURSOR_FIFO, 520 .max_wm = I965_CURSOR_MAX_WM, 521 .default_wm = I965_CURSOR_DFT_WM, 522 .guard_size = 2, 523 .cacheline_size = I915_FIFO_LINE_SIZE, 524}; 525static const struct intel_watermark_params i945_wm_info = { 526 .fifo_size = I945_FIFO_SIZE, 527 .max_wm = I915_MAX_WM, 528 .default_wm = 1, 529 .guard_size = 2, 530 .cacheline_size = I915_FIFO_LINE_SIZE, 531}; 532static const struct intel_watermark_params i915_wm_info = { 533 .fifo_size = I915_FIFO_SIZE, 534 .max_wm = I915_MAX_WM, 535 .default_wm = 1, 536 .guard_size = 2, 537 .cacheline_size = I915_FIFO_LINE_SIZE, 538}; 539static const struct intel_watermark_params i830_a_wm_info = { 540 .fifo_size = I855GM_FIFO_SIZE, 541 .max_wm = I915_MAX_WM, 542 .default_wm = 1, 543 .guard_size = 2, 544 .cacheline_size = I830_FIFO_LINE_SIZE, 545}; 546static const struct intel_watermark_params i830_bc_wm_info = { 547 .fifo_size = I855GM_FIFO_SIZE, 548 .max_wm = I915_MAX_WM/2, 549 .default_wm = 1, 550 .guard_size = 2, 551 .cacheline_size = I830_FIFO_LINE_SIZE, 552}; 553static const struct intel_watermark_params i845_wm_info = { 554 .fifo_size = I830_FIFO_SIZE, 555 .max_wm = I915_MAX_WM, 556 .default_wm = 1, 557 .guard_size = 2, 558 .cacheline_size = I830_FIFO_LINE_SIZE, 559}; 560 561/** 562 * intel_calculate_wm - calculate watermark level 563 * @clock_in_khz: pixel clock 564 * @wm: chip FIFO params 565 * @cpp: bytes per pixel 566 * @latency_ns: memory latency for the platform 567 * 568 * Calculate the watermark level (the level at which the display plane will 569 * start fetching from memory again). Each chip has a different display 570 * FIFO size and allocation, so the caller needs to figure that out and pass 571 * in the correct intel_watermark_params structure. 572 * 573 * As the pixel clock runs, the FIFO will be drained at a rate that depends 574 * on the pixel size. When it reaches the watermark level, it'll start 575 * fetching FIFO line sized based chunks from memory until the FIFO fills 576 * past the watermark point. If the FIFO drains completely, a FIFO underrun 577 * will occur, and a display engine hang could result. 578 */ 579static unsigned long intel_calculate_wm(unsigned long clock_in_khz, 580 const struct intel_watermark_params *wm, 581 int fifo_size, int cpp, 582 unsigned long latency_ns) 583{ 584 long entries_required, wm_size; 585 586 /* 587 * Note: we need to make sure we don't overflow for various clock & 588 * latency values. 589 * clocks go from a few thousand to several hundred thousand. 590 * latency is usually a few thousand 591 */ 592 entries_required = ((clock_in_khz / 1000) * cpp * latency_ns) / 593 1000; 594 entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size); 595 596 DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required); 597 598 wm_size = fifo_size - (entries_required + wm->guard_size); 599 600 DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size); 601 602 /* Don't promote wm_size to unsigned... */ 603 if (wm_size > (long)wm->max_wm) 604 wm_size = wm->max_wm; 605 if (wm_size <= 0) 606 wm_size = wm->default_wm; 607 608 /* 609 * Bspec seems to indicate that the value shouldn't be lower than 610 * 'burst size + 1'. Certainly 830 is quite unhappy with low values. 611 * Lets go for 8 which is the burst size since certain platforms 612 * already use a hardcoded 8 (which is what the spec says should be 613 * done). 614 */ 615 if (wm_size <= 8) 616 wm_size = 8; 617 618 return wm_size; 619} 620 621static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) 622{ 623 struct drm_crtc *crtc, *enabled = NULL; 624 625 for_each_crtc(dev, crtc) { 626 if (intel_crtc_active(crtc)) { 627 if (enabled) 628 return NULL; 629 enabled = crtc; 630 } 631 } 632 633 return enabled; 634} 635 636static void pineview_update_wm(struct drm_crtc *unused_crtc) 637{ 638 struct drm_device *dev = unused_crtc->dev; 639 struct drm_i915_private *dev_priv = dev->dev_private; 640 struct drm_crtc *crtc; 641 const struct cxsr_latency *latency; 642 u32 reg; 643 unsigned long wm; 644 645 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3, 646 dev_priv->fsb_freq, dev_priv->mem_freq); 647 if (!latency) { 648 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 649 intel_set_memory_cxsr(dev_priv, false); 650 return; 651 } 652 653 crtc = single_enabled_crtc(dev); 654 if (crtc) { 655 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 656 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 657 int clock = adjusted_mode->crtc_clock; 658 659 /* Display SR */ 660 wm = intel_calculate_wm(clock, &pineview_display_wm, 661 pineview_display_wm.fifo_size, 662 cpp, latency->display_sr); 663 reg = I915_READ(DSPFW1); 664 reg &= ~DSPFW_SR_MASK; 665 reg |= FW_WM(wm, SR); 666 I915_WRITE(DSPFW1, reg); 667 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg); 668 669 /* cursor SR */ 670 wm = intel_calculate_wm(clock, &pineview_cursor_wm, 671 pineview_display_wm.fifo_size, 672 cpp, latency->cursor_sr); 673 reg = I915_READ(DSPFW3); 674 reg &= ~DSPFW_CURSOR_SR_MASK; 675 reg |= FW_WM(wm, CURSOR_SR); 676 I915_WRITE(DSPFW3, reg); 677 678 /* Display HPLL off SR */ 679 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm, 680 pineview_display_hplloff_wm.fifo_size, 681 cpp, latency->display_hpll_disable); 682 reg = I915_READ(DSPFW3); 683 reg &= ~DSPFW_HPLL_SR_MASK; 684 reg |= FW_WM(wm, HPLL_SR); 685 I915_WRITE(DSPFW3, reg); 686 687 /* cursor HPLL off SR */ 688 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, 689 pineview_display_hplloff_wm.fifo_size, 690 cpp, latency->cursor_hpll_disable); 691 reg = I915_READ(DSPFW3); 692 reg &= ~DSPFW_HPLL_CURSOR_MASK; 693 reg |= FW_WM(wm, HPLL_CURSOR); 694 I915_WRITE(DSPFW3, reg); 695 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg); 696 697 intel_set_memory_cxsr(dev_priv, true); 698 } else { 699 intel_set_memory_cxsr(dev_priv, false); 700 } 701} 702 703static bool g4x_compute_wm0(struct drm_device *dev, 704 int plane, 705 const struct intel_watermark_params *display, 706 int display_latency_ns, 707 const struct intel_watermark_params *cursor, 708 int cursor_latency_ns, 709 int *plane_wm, 710 int *cursor_wm) 711{ 712 struct drm_crtc *crtc; 713 const struct drm_display_mode *adjusted_mode; 714 int htotal, hdisplay, clock, cpp; 715 int line_time_us, line_count; 716 int entries, tlb_miss; 717 718 crtc = intel_get_crtc_for_plane(dev, plane); 719 if (!intel_crtc_active(crtc)) { 720 *cursor_wm = cursor->guard_size; 721 *plane_wm = display->guard_size; 722 return false; 723 } 724 725 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 726 clock = adjusted_mode->crtc_clock; 727 htotal = adjusted_mode->crtc_htotal; 728 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 729 cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 730 731 /* Use the small buffer method to calculate plane watermark */ 732 entries = ((clock * cpp / 1000) * display_latency_ns) / 1000; 733 tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8; 734 if (tlb_miss > 0) 735 entries += tlb_miss; 736 entries = DIV_ROUND_UP(entries, display->cacheline_size); 737 *plane_wm = entries + display->guard_size; 738 if (*plane_wm > (int)display->max_wm) 739 *plane_wm = display->max_wm; 740 741 /* Use the large buffer method to calculate cursor watermark */ 742 line_time_us = max(htotal * 1000 / clock, 1); 743 line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; 744 entries = line_count * crtc->cursor->state->crtc_w * cpp; 745 tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; 746 if (tlb_miss > 0) 747 entries += tlb_miss; 748 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 749 *cursor_wm = entries + cursor->guard_size; 750 if (*cursor_wm > (int)cursor->max_wm) 751 *cursor_wm = (int)cursor->max_wm; 752 753 return true; 754} 755 756/* 757 * Check the wm result. 758 * 759 * If any calculated watermark values is larger than the maximum value that 760 * can be programmed into the associated watermark register, that watermark 761 * must be disabled. 762 */ 763static bool g4x_check_srwm(struct drm_device *dev, 764 int display_wm, int cursor_wm, 765 const struct intel_watermark_params *display, 766 const struct intel_watermark_params *cursor) 767{ 768 DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n", 769 display_wm, cursor_wm); 770 771 if (display_wm > display->max_wm) { 772 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n", 773 display_wm, display->max_wm); 774 return false; 775 } 776 777 if (cursor_wm > cursor->max_wm) { 778 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n", 779 cursor_wm, cursor->max_wm); 780 return false; 781 } 782 783 if (!(display_wm || cursor_wm)) { 784 DRM_DEBUG_KMS("SR latency is 0, disabling\n"); 785 return false; 786 } 787 788 return true; 789} 790 791static bool g4x_compute_srwm(struct drm_device *dev, 792 int plane, 793 int latency_ns, 794 const struct intel_watermark_params *display, 795 const struct intel_watermark_params *cursor, 796 int *display_wm, int *cursor_wm) 797{ 798 struct drm_crtc *crtc; 799 const struct drm_display_mode *adjusted_mode; 800 int hdisplay, htotal, cpp, clock; 801 unsigned long line_time_us; 802 int line_count, line_size; 803 int small, large; 804 int entries; 805 806 if (!latency_ns) { 807 *display_wm = *cursor_wm = 0; 808 return false; 809 } 810 811 crtc = intel_get_crtc_for_plane(dev, plane); 812 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 813 clock = adjusted_mode->crtc_clock; 814 htotal = adjusted_mode->crtc_htotal; 815 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 816 cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 817 818 line_time_us = max(htotal * 1000 / clock, 1); 819 line_count = (latency_ns / line_time_us + 1000) / 1000; 820 line_size = hdisplay * cpp; 821 822 /* Use the minimum of the small and large buffer method for primary */ 823 small = ((clock * cpp / 1000) * latency_ns) / 1000; 824 large = line_count * line_size; 825 826 entries = DIV_ROUND_UP(min(small, large), display->cacheline_size); 827 *display_wm = entries + display->guard_size; 828 829 /* calculate the self-refresh watermark for display cursor */ 830 entries = line_count * cpp * crtc->cursor->state->crtc_w; 831 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 832 *cursor_wm = entries + cursor->guard_size; 833 834 return g4x_check_srwm(dev, 835 *display_wm, *cursor_wm, 836 display, cursor); 837} 838 839#define FW_WM_VLV(value, plane) \ 840 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV) 841 842static void vlv_write_wm_values(struct intel_crtc *crtc, 843 const struct vlv_wm_values *wm) 844{ 845 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 846 enum pipe pipe = crtc->pipe; 847 848 I915_WRITE(VLV_DDL(pipe), 849 (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) | 850 (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) | 851 (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) | 852 (wm->ddl[pipe].primary << DDL_PLANE_SHIFT)); 853 854 I915_WRITE(DSPFW1, 855 FW_WM(wm->sr.plane, SR) | 856 FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) | 857 FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) | 858 FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA)); 859 I915_WRITE(DSPFW2, 860 FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) | 861 FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) | 862 FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA)); 863 I915_WRITE(DSPFW3, 864 FW_WM(wm->sr.cursor, CURSOR_SR)); 865 866 if (IS_CHERRYVIEW(dev_priv)) { 867 I915_WRITE(DSPFW7_CHV, 868 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) | 869 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC)); 870 I915_WRITE(DSPFW8_CHV, 871 FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) | 872 FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE)); 873 I915_WRITE(DSPFW9_CHV, 874 FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) | 875 FW_WM(wm->pipe[PIPE_C].cursor, CURSORC)); 876 I915_WRITE(DSPHOWM, 877 FW_WM(wm->sr.plane >> 9, SR_HI) | 878 FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) | 879 FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) | 880 FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) | 881 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) | 882 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) | 883 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) | 884 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) | 885 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) | 886 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI)); 887 } else { 888 I915_WRITE(DSPFW7, 889 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) | 890 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC)); 891 I915_WRITE(DSPHOWM, 892 FW_WM(wm->sr.plane >> 9, SR_HI) | 893 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) | 894 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) | 895 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) | 896 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) | 897 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) | 898 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI)); 899 } 900 901 /* zero (unused) WM1 watermarks */ 902 I915_WRITE(DSPFW4, 0); 903 I915_WRITE(DSPFW5, 0); 904 I915_WRITE(DSPFW6, 0); 905 I915_WRITE(DSPHOWM1, 0); 906 907 POSTING_READ(DSPFW1); 908} 909 910#undef FW_WM_VLV 911 912enum vlv_wm_level { 913 VLV_WM_LEVEL_PM2, 914 VLV_WM_LEVEL_PM5, 915 VLV_WM_LEVEL_DDR_DVFS, 916}; 917 918/* latency must be in 0.1us units. */ 919static unsigned int vlv_wm_method2(unsigned int pixel_rate, 920 unsigned int pipe_htotal, 921 unsigned int horiz_pixels, 922 unsigned int cpp, 923 unsigned int latency) 924{ 925 unsigned int ret; 926 927 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 928 ret = (ret + 1) * horiz_pixels * cpp; 929 ret = DIV_ROUND_UP(ret, 64); 930 931 return ret; 932} 933 934static void vlv_setup_wm_latency(struct drm_device *dev) 935{ 936 struct drm_i915_private *dev_priv = dev->dev_private; 937 938 /* all latencies in usec */ 939 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3; 940 941 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2; 942 943 if (IS_CHERRYVIEW(dev_priv)) { 944 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12; 945 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33; 946 947 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS; 948 } 949} 950 951static uint16_t vlv_compute_wm_level(struct intel_plane *plane, 952 struct intel_crtc *crtc, 953 const struct intel_plane_state *state, 954 int level) 955{ 956 struct drm_i915_private *dev_priv = to_i915(plane->base.dev); 957 int clock, htotal, cpp, width, wm; 958 959 if (dev_priv->wm.pri_latency[level] == 0) 960 return USHRT_MAX; 961 962 if (!state->visible) 963 return 0; 964 965 cpp = drm_format_plane_cpp(state->base.fb->pixel_format, 0); 966 clock = crtc->config->base.adjusted_mode.crtc_clock; 967 htotal = crtc->config->base.adjusted_mode.crtc_htotal; 968 width = crtc->config->pipe_src_w; 969 if (WARN_ON(htotal == 0)) 970 htotal = 1; 971 972 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 973 /* 974 * FIXME the formula gives values that are 975 * too big for the cursor FIFO, and hence we 976 * would never be able to use cursors. For 977 * now just hardcode the watermark. 978 */ 979 wm = 63; 980 } else { 981 wm = vlv_wm_method2(clock, htotal, width, cpp, 982 dev_priv->wm.pri_latency[level] * 10); 983 } 984 985 return min_t(int, wm, USHRT_MAX); 986} 987 988static void vlv_compute_fifo(struct intel_crtc *crtc) 989{ 990 struct drm_device *dev = crtc->base.dev; 991 struct vlv_wm_state *wm_state = &crtc->wm_state; 992 struct intel_plane *plane; 993 unsigned int total_rate = 0; 994 const int fifo_size = 512 - 1; 995 int fifo_extra, fifo_left = fifo_size; 996 997 for_each_intel_plane_on_crtc(dev, crtc, plane) { 998 struct intel_plane_state *state = 999 to_intel_plane_state(plane->base.state); 1000 1001 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) 1002 continue; 1003 1004 if (state->visible) { 1005 wm_state->num_active_planes++; 1006 total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0); 1007 } 1008 } 1009 1010 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1011 struct intel_plane_state *state = 1012 to_intel_plane_state(plane->base.state); 1013 unsigned int rate; 1014 1015 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 1016 plane->wm.fifo_size = 63; 1017 continue; 1018 } 1019 1020 if (!state->visible) { 1021 plane->wm.fifo_size = 0; 1022 continue; 1023 } 1024 1025 rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0); 1026 plane->wm.fifo_size = fifo_size * rate / total_rate; 1027 fifo_left -= plane->wm.fifo_size; 1028 } 1029 1030 fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1); 1031 1032 /* spread the remainder evenly */ 1033 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1034 int plane_extra; 1035 1036 if (fifo_left == 0) 1037 break; 1038 1039 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) 1040 continue; 1041 1042 /* give it all to the first plane if none are active */ 1043 if (plane->wm.fifo_size == 0 && 1044 wm_state->num_active_planes) 1045 continue; 1046 1047 plane_extra = min(fifo_extra, fifo_left); 1048 plane->wm.fifo_size += plane_extra; 1049 fifo_left -= plane_extra; 1050 } 1051 1052 WARN_ON(fifo_left != 0); 1053} 1054 1055static void vlv_invert_wms(struct intel_crtc *crtc) 1056{ 1057 struct vlv_wm_state *wm_state = &crtc->wm_state; 1058 int level; 1059 1060 for (level = 0; level < wm_state->num_levels; level++) { 1061 struct drm_device *dev = crtc->base.dev; 1062 const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1; 1063 struct intel_plane *plane; 1064 1065 wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane; 1066 wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor; 1067 1068 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1069 switch (plane->base.type) { 1070 int sprite; 1071 case DRM_PLANE_TYPE_CURSOR: 1072 wm_state->wm[level].cursor = plane->wm.fifo_size - 1073 wm_state->wm[level].cursor; 1074 break; 1075 case DRM_PLANE_TYPE_PRIMARY: 1076 wm_state->wm[level].primary = plane->wm.fifo_size - 1077 wm_state->wm[level].primary; 1078 break; 1079 case DRM_PLANE_TYPE_OVERLAY: 1080 sprite = plane->plane; 1081 wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size - 1082 wm_state->wm[level].sprite[sprite]; 1083 break; 1084 } 1085 } 1086 } 1087} 1088 1089static void vlv_compute_wm(struct intel_crtc *crtc) 1090{ 1091 struct drm_device *dev = crtc->base.dev; 1092 struct vlv_wm_state *wm_state = &crtc->wm_state; 1093 struct intel_plane *plane; 1094 int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1; 1095 int level; 1096 1097 memset(wm_state, 0, sizeof(*wm_state)); 1098 1099 wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; 1100 wm_state->num_levels = to_i915(dev)->wm.max_level + 1; 1101 1102 wm_state->num_active_planes = 0; 1103 1104 vlv_compute_fifo(crtc); 1105 1106 if (wm_state->num_active_planes != 1) 1107 wm_state->cxsr = false; 1108 1109 if (wm_state->cxsr) { 1110 for (level = 0; level < wm_state->num_levels; level++) { 1111 wm_state->sr[level].plane = sr_fifo_size; 1112 wm_state->sr[level].cursor = 63; 1113 } 1114 } 1115 1116 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1117 struct intel_plane_state *state = 1118 to_intel_plane_state(plane->base.state); 1119 1120 if (!state->visible) 1121 continue; 1122 1123 /* normal watermarks */ 1124 for (level = 0; level < wm_state->num_levels; level++) { 1125 int wm = vlv_compute_wm_level(plane, crtc, state, level); 1126 int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511; 1127 1128 /* hack */ 1129 if (WARN_ON(level == 0 && wm > max_wm)) 1130 wm = max_wm; 1131 1132 if (wm > plane->wm.fifo_size) 1133 break; 1134 1135 switch (plane->base.type) { 1136 int sprite; 1137 case DRM_PLANE_TYPE_CURSOR: 1138 wm_state->wm[level].cursor = wm; 1139 break; 1140 case DRM_PLANE_TYPE_PRIMARY: 1141 wm_state->wm[level].primary = wm; 1142 break; 1143 case DRM_PLANE_TYPE_OVERLAY: 1144 sprite = plane->plane; 1145 wm_state->wm[level].sprite[sprite] = wm; 1146 break; 1147 } 1148 } 1149 1150 wm_state->num_levels = level; 1151 1152 if (!wm_state->cxsr) 1153 continue; 1154 1155 /* maxfifo watermarks */ 1156 switch (plane->base.type) { 1157 int sprite, level; 1158 case DRM_PLANE_TYPE_CURSOR: 1159 for (level = 0; level < wm_state->num_levels; level++) 1160 wm_state->sr[level].cursor = 1161 wm_state->wm[level].cursor; 1162 break; 1163 case DRM_PLANE_TYPE_PRIMARY: 1164 for (level = 0; level < wm_state->num_levels; level++) 1165 wm_state->sr[level].plane = 1166 min(wm_state->sr[level].plane, 1167 wm_state->wm[level].primary); 1168 break; 1169 case DRM_PLANE_TYPE_OVERLAY: 1170 sprite = plane->plane; 1171 for (level = 0; level < wm_state->num_levels; level++) 1172 wm_state->sr[level].plane = 1173 min(wm_state->sr[level].plane, 1174 wm_state->wm[level].sprite[sprite]); 1175 break; 1176 } 1177 } 1178 1179 /* clear any (partially) filled invalid levels */ 1180 for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) { 1181 memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level])); 1182 memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); 1183 } 1184 1185 vlv_invert_wms(crtc); 1186} 1187 1188#define VLV_FIFO(plane, value) \ 1189 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) 1190 1191static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) 1192{ 1193 struct drm_device *dev = crtc->base.dev; 1194 struct drm_i915_private *dev_priv = to_i915(dev); 1195 struct intel_plane *plane; 1196 int sprite0_start = 0, sprite1_start = 0, fifo_size = 0; 1197 1198 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1199 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 1200 WARN_ON(plane->wm.fifo_size != 63); 1201 continue; 1202 } 1203 1204 if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) 1205 sprite0_start = plane->wm.fifo_size; 1206 else if (plane->plane == 0) 1207 sprite1_start = sprite0_start + plane->wm.fifo_size; 1208 else 1209 fifo_size = sprite1_start + plane->wm.fifo_size; 1210 } 1211 1212 WARN_ON(fifo_size != 512 - 1); 1213 1214 DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n", 1215 pipe_name(crtc->pipe), sprite0_start, 1216 sprite1_start, fifo_size); 1217 1218 switch (crtc->pipe) { 1219 uint32_t dsparb, dsparb2, dsparb3; 1220 case PIPE_A: 1221 dsparb = I915_READ(DSPARB); 1222 dsparb2 = I915_READ(DSPARB2); 1223 1224 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) | 1225 VLV_FIFO(SPRITEB, 0xff)); 1226 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) | 1227 VLV_FIFO(SPRITEB, sprite1_start)); 1228 1229 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) | 1230 VLV_FIFO(SPRITEB_HI, 0x1)); 1231 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) | 1232 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8)); 1233 1234 I915_WRITE(DSPARB, dsparb); 1235 I915_WRITE(DSPARB2, dsparb2); 1236 break; 1237 case PIPE_B: 1238 dsparb = I915_READ(DSPARB); 1239 dsparb2 = I915_READ(DSPARB2); 1240 1241 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) | 1242 VLV_FIFO(SPRITED, 0xff)); 1243 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) | 1244 VLV_FIFO(SPRITED, sprite1_start)); 1245 1246 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) | 1247 VLV_FIFO(SPRITED_HI, 0xff)); 1248 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) | 1249 VLV_FIFO(SPRITED_HI, sprite1_start >> 8)); 1250 1251 I915_WRITE(DSPARB, dsparb); 1252 I915_WRITE(DSPARB2, dsparb2); 1253 break; 1254 case PIPE_C: 1255 dsparb3 = I915_READ(DSPARB3); 1256 dsparb2 = I915_READ(DSPARB2); 1257 1258 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) | 1259 VLV_FIFO(SPRITEF, 0xff)); 1260 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) | 1261 VLV_FIFO(SPRITEF, sprite1_start)); 1262 1263 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) | 1264 VLV_FIFO(SPRITEF_HI, 0xff)); 1265 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) | 1266 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8)); 1267 1268 I915_WRITE(DSPARB3, dsparb3); 1269 I915_WRITE(DSPARB2, dsparb2); 1270 break; 1271 default: 1272 break; 1273 } 1274} 1275 1276#undef VLV_FIFO 1277 1278static void vlv_merge_wm(struct drm_device *dev, 1279 struct vlv_wm_values *wm) 1280{ 1281 struct intel_crtc *crtc; 1282 int num_active_crtcs = 0; 1283 1284 wm->level = to_i915(dev)->wm.max_level; 1285 wm->cxsr = true; 1286 1287 for_each_intel_crtc(dev, crtc) { 1288 const struct vlv_wm_state *wm_state = &crtc->wm_state; 1289 1290 if (!crtc->active) 1291 continue; 1292 1293 if (!wm_state->cxsr) 1294 wm->cxsr = false; 1295 1296 num_active_crtcs++; 1297 wm->level = min_t(int, wm->level, wm_state->num_levels - 1); 1298 } 1299 1300 if (num_active_crtcs != 1) 1301 wm->cxsr = false; 1302 1303 if (num_active_crtcs > 1) 1304 wm->level = VLV_WM_LEVEL_PM2; 1305 1306 for_each_intel_crtc(dev, crtc) { 1307 struct vlv_wm_state *wm_state = &crtc->wm_state; 1308 enum pipe pipe = crtc->pipe; 1309 1310 if (!crtc->active) 1311 continue; 1312 1313 wm->pipe[pipe] = wm_state->wm[wm->level]; 1314 if (wm->cxsr) 1315 wm->sr = wm_state->sr[wm->level]; 1316 1317 wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2; 1318 wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2; 1319 wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2; 1320 wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2; 1321 } 1322} 1323 1324static void vlv_update_wm(struct drm_crtc *crtc) 1325{ 1326 struct drm_device *dev = crtc->dev; 1327 struct drm_i915_private *dev_priv = dev->dev_private; 1328 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 1329 enum pipe pipe = intel_crtc->pipe; 1330 struct vlv_wm_values wm = {}; 1331 1332 vlv_compute_wm(intel_crtc); 1333 vlv_merge_wm(dev, &wm); 1334 1335 if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) { 1336 /* FIXME should be part of crtc atomic commit */ 1337 vlv_pipe_set_fifo_size(intel_crtc); 1338 return; 1339 } 1340 1341 if (wm.level < VLV_WM_LEVEL_DDR_DVFS && 1342 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS) 1343 chv_set_memory_dvfs(dev_priv, false); 1344 1345 if (wm.level < VLV_WM_LEVEL_PM5 && 1346 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5) 1347 chv_set_memory_pm5(dev_priv, false); 1348 1349 if (!wm.cxsr && dev_priv->wm.vlv.cxsr) 1350 intel_set_memory_cxsr(dev_priv, false); 1351 1352 /* FIXME should be part of crtc atomic commit */ 1353 vlv_pipe_set_fifo_size(intel_crtc); 1354 1355 vlv_write_wm_values(intel_crtc, &wm); 1356 1357 DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " 1358 "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n", 1359 pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor, 1360 wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1], 1361 wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr); 1362 1363 if (wm.cxsr && !dev_priv->wm.vlv.cxsr) 1364 intel_set_memory_cxsr(dev_priv, true); 1365 1366 if (wm.level >= VLV_WM_LEVEL_PM5 && 1367 dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5) 1368 chv_set_memory_pm5(dev_priv, true); 1369 1370 if (wm.level >= VLV_WM_LEVEL_DDR_DVFS && 1371 dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS) 1372 chv_set_memory_dvfs(dev_priv, true); 1373 1374 dev_priv->wm.vlv = wm; 1375} 1376 1377#define single_plane_enabled(mask) is_power_of_2(mask) 1378 1379static void g4x_update_wm(struct drm_crtc *crtc) 1380{ 1381 struct drm_device *dev = crtc->dev; 1382 static const int sr_latency_ns = 12000; 1383 struct drm_i915_private *dev_priv = dev->dev_private; 1384 int planea_wm, planeb_wm, cursora_wm, cursorb_wm; 1385 int plane_sr, cursor_sr; 1386 unsigned int enabled = 0; 1387 bool cxsr_enabled; 1388 1389 if (g4x_compute_wm0(dev, PIPE_A, 1390 &g4x_wm_info, pessimal_latency_ns, 1391 &g4x_cursor_wm_info, pessimal_latency_ns, 1392 &planea_wm, &cursora_wm)) 1393 enabled |= 1 << PIPE_A; 1394 1395 if (g4x_compute_wm0(dev, PIPE_B, 1396 &g4x_wm_info, pessimal_latency_ns, 1397 &g4x_cursor_wm_info, pessimal_latency_ns, 1398 &planeb_wm, &cursorb_wm)) 1399 enabled |= 1 << PIPE_B; 1400 1401 if (single_plane_enabled(enabled) && 1402 g4x_compute_srwm(dev, ffs(enabled) - 1, 1403 sr_latency_ns, 1404 &g4x_wm_info, 1405 &g4x_cursor_wm_info, 1406 &plane_sr, &cursor_sr)) { 1407 cxsr_enabled = true; 1408 } else { 1409 cxsr_enabled = false; 1410 intel_set_memory_cxsr(dev_priv, false); 1411 plane_sr = cursor_sr = 0; 1412 } 1413 1414 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " 1415 "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", 1416 planea_wm, cursora_wm, 1417 planeb_wm, cursorb_wm, 1418 plane_sr, cursor_sr); 1419 1420 I915_WRITE(DSPFW1, 1421 FW_WM(plane_sr, SR) | 1422 FW_WM(cursorb_wm, CURSORB) | 1423 FW_WM(planeb_wm, PLANEB) | 1424 FW_WM(planea_wm, PLANEA)); 1425 I915_WRITE(DSPFW2, 1426 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | 1427 FW_WM(cursora_wm, CURSORA)); 1428 /* HPLL off in SR has some issues on G4x... disable it */ 1429 I915_WRITE(DSPFW3, 1430 (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) | 1431 FW_WM(cursor_sr, CURSOR_SR)); 1432 1433 if (cxsr_enabled) 1434 intel_set_memory_cxsr(dev_priv, true); 1435} 1436 1437static void i965_update_wm(struct drm_crtc *unused_crtc) 1438{ 1439 struct drm_device *dev = unused_crtc->dev; 1440 struct drm_i915_private *dev_priv = dev->dev_private; 1441 struct drm_crtc *crtc; 1442 int srwm = 1; 1443 int cursor_sr = 16; 1444 bool cxsr_enabled; 1445 1446 /* Calc sr entries for one plane configs */ 1447 crtc = single_enabled_crtc(dev); 1448 if (crtc) { 1449 /* self-refresh has much higher latency */ 1450 static const int sr_latency_ns = 12000; 1451 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1452 int clock = adjusted_mode->crtc_clock; 1453 int htotal = adjusted_mode->crtc_htotal; 1454 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 1455 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 1456 unsigned long line_time_us; 1457 int entries; 1458 1459 line_time_us = max(htotal * 1000 / clock, 1); 1460 1461 /* Use ns/us then divide to preserve precision */ 1462 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1463 cpp * hdisplay; 1464 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE); 1465 srwm = I965_FIFO_SIZE - entries; 1466 if (srwm < 0) 1467 srwm = 1; 1468 srwm &= 0x1ff; 1469 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", 1470 entries, srwm); 1471 1472 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1473 cpp * crtc->cursor->state->crtc_w; 1474 entries = DIV_ROUND_UP(entries, 1475 i965_cursor_wm_info.cacheline_size); 1476 cursor_sr = i965_cursor_wm_info.fifo_size - 1477 (entries + i965_cursor_wm_info.guard_size); 1478 1479 if (cursor_sr > i965_cursor_wm_info.max_wm) 1480 cursor_sr = i965_cursor_wm_info.max_wm; 1481 1482 DRM_DEBUG_KMS("self-refresh watermark: display plane %d " 1483 "cursor %d\n", srwm, cursor_sr); 1484 1485 cxsr_enabled = true; 1486 } else { 1487 cxsr_enabled = false; 1488 /* Turn off self refresh if both pipes are enabled */ 1489 intel_set_memory_cxsr(dev_priv, false); 1490 } 1491 1492 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", 1493 srwm); 1494 1495 /* 965 has limitations... */ 1496 I915_WRITE(DSPFW1, FW_WM(srwm, SR) | 1497 FW_WM(8, CURSORB) | 1498 FW_WM(8, PLANEB) | 1499 FW_WM(8, PLANEA)); 1500 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) | 1501 FW_WM(8, PLANEC_OLD)); 1502 /* update cursor SR watermark */ 1503 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR)); 1504 1505 if (cxsr_enabled) 1506 intel_set_memory_cxsr(dev_priv, true); 1507} 1508 1509#undef FW_WM 1510 1511static void i9xx_update_wm(struct drm_crtc *unused_crtc) 1512{ 1513 struct drm_device *dev = unused_crtc->dev; 1514 struct drm_i915_private *dev_priv = dev->dev_private; 1515 const struct intel_watermark_params *wm_info; 1516 uint32_t fwater_lo; 1517 uint32_t fwater_hi; 1518 int cwm, srwm = 1; 1519 int fifo_size; 1520 int planea_wm, planeb_wm; 1521 struct drm_crtc *crtc, *enabled = NULL; 1522 1523 if (IS_I945GM(dev)) 1524 wm_info = &i945_wm_info; 1525 else if (!IS_GEN2(dev)) 1526 wm_info = &i915_wm_info; 1527 else 1528 wm_info = &i830_a_wm_info; 1529 1530 fifo_size = dev_priv->display.get_fifo_size(dev, 0); 1531 crtc = intel_get_crtc_for_plane(dev, 0); 1532 if (intel_crtc_active(crtc)) { 1533 const struct drm_display_mode *adjusted_mode; 1534 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 1535 if (IS_GEN2(dev)) 1536 cpp = 4; 1537 1538 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1539 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1540 wm_info, fifo_size, cpp, 1541 pessimal_latency_ns); 1542 enabled = crtc; 1543 } else { 1544 planea_wm = fifo_size - wm_info->guard_size; 1545 if (planea_wm > (long)wm_info->max_wm) 1546 planea_wm = wm_info->max_wm; 1547 } 1548 1549 if (IS_GEN2(dev)) 1550 wm_info = &i830_bc_wm_info; 1551 1552 fifo_size = dev_priv->display.get_fifo_size(dev, 1); 1553 crtc = intel_get_crtc_for_plane(dev, 1); 1554 if (intel_crtc_active(crtc)) { 1555 const struct drm_display_mode *adjusted_mode; 1556 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 1557 if (IS_GEN2(dev)) 1558 cpp = 4; 1559 1560 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1561 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1562 wm_info, fifo_size, cpp, 1563 pessimal_latency_ns); 1564 if (enabled == NULL) 1565 enabled = crtc; 1566 else 1567 enabled = NULL; 1568 } else { 1569 planeb_wm = fifo_size - wm_info->guard_size; 1570 if (planeb_wm > (long)wm_info->max_wm) 1571 planeb_wm = wm_info->max_wm; 1572 } 1573 1574 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); 1575 1576 if (IS_I915GM(dev) && enabled) { 1577 struct drm_i915_gem_object *obj; 1578 1579 obj = intel_fb_obj(enabled->primary->state->fb); 1580 1581 /* self-refresh seems busted with untiled */ 1582 if (obj->tiling_mode == I915_TILING_NONE) 1583 enabled = NULL; 1584 } 1585 1586 /* 1587 * Overlay gets an aggressive default since video jitter is bad. 1588 */ 1589 cwm = 2; 1590 1591 /* Play safe and disable self-refresh before adjusting watermarks. */ 1592 intel_set_memory_cxsr(dev_priv, false); 1593 1594 /* Calc sr entries for one plane configs */ 1595 if (HAS_FW_BLC(dev) && enabled) { 1596 /* self-refresh has much higher latency */ 1597 static const int sr_latency_ns = 6000; 1598 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode; 1599 int clock = adjusted_mode->crtc_clock; 1600 int htotal = adjusted_mode->crtc_htotal; 1601 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w; 1602 int cpp = drm_format_plane_cpp(enabled->primary->state->fb->pixel_format, 0); 1603 unsigned long line_time_us; 1604 int entries; 1605 1606 line_time_us = max(htotal * 1000 / clock, 1); 1607 1608 /* Use ns/us then divide to preserve precision */ 1609 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1610 cpp * hdisplay; 1611 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); 1612 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); 1613 srwm = wm_info->fifo_size - entries; 1614 if (srwm < 0) 1615 srwm = 1; 1616 1617 if (IS_I945G(dev) || IS_I945GM(dev)) 1618 I915_WRITE(FW_BLC_SELF, 1619 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); 1620 else if (IS_I915GM(dev)) 1621 I915_WRITE(FW_BLC_SELF, srwm & 0x3f); 1622 } 1623 1624 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", 1625 planea_wm, planeb_wm, cwm, srwm); 1626 1627 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f); 1628 fwater_hi = (cwm & 0x1f); 1629 1630 /* Set request length to 8 cachelines per fetch */ 1631 fwater_lo = fwater_lo | (1 << 24) | (1 << 8); 1632 fwater_hi = fwater_hi | (1 << 8); 1633 1634 I915_WRITE(FW_BLC, fwater_lo); 1635 I915_WRITE(FW_BLC2, fwater_hi); 1636 1637 if (enabled) 1638 intel_set_memory_cxsr(dev_priv, true); 1639} 1640 1641static void i845_update_wm(struct drm_crtc *unused_crtc) 1642{ 1643 struct drm_device *dev = unused_crtc->dev; 1644 struct drm_i915_private *dev_priv = dev->dev_private; 1645 struct drm_crtc *crtc; 1646 const struct drm_display_mode *adjusted_mode; 1647 uint32_t fwater_lo; 1648 int planea_wm; 1649 1650 crtc = single_enabled_crtc(dev); 1651 if (crtc == NULL) 1652 return; 1653 1654 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1655 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1656 &i845_wm_info, 1657 dev_priv->display.get_fifo_size(dev, 0), 1658 4, pessimal_latency_ns); 1659 fwater_lo = I915_READ(FW_BLC) & ~0xfff; 1660 fwater_lo |= (3<<8) | planea_wm; 1661 1662 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm); 1663 1664 I915_WRITE(FW_BLC, fwater_lo); 1665} 1666 1667uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) 1668{ 1669 uint32_t pixel_rate; 1670 1671 pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; 1672 1673 /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to 1674 * adjust the pixel_rate here. */ 1675 1676 if (pipe_config->pch_pfit.enabled) { 1677 uint64_t pipe_w, pipe_h, pfit_w, pfit_h; 1678 uint32_t pfit_size = pipe_config->pch_pfit.size; 1679 1680 pipe_w = pipe_config->pipe_src_w; 1681 pipe_h = pipe_config->pipe_src_h; 1682 1683 pfit_w = (pfit_size >> 16) & 0xFFFF; 1684 pfit_h = pfit_size & 0xFFFF; 1685 if (pipe_w < pfit_w) 1686 pipe_w = pfit_w; 1687 if (pipe_h < pfit_h) 1688 pipe_h = pfit_h; 1689 1690 if (WARN_ON(!pfit_w || !pfit_h)) 1691 return pixel_rate; 1692 1693 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, 1694 pfit_w * pfit_h); 1695 } 1696 1697 return pixel_rate; 1698} 1699 1700/* latency must be in 0.1us units. */ 1701static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) 1702{ 1703 uint64_t ret; 1704 1705 if (WARN(latency == 0, "Latency value missing\n")) 1706 return UINT_MAX; 1707 1708 ret = (uint64_t) pixel_rate * cpp * latency; 1709 ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2; 1710 1711 return ret; 1712} 1713 1714/* latency must be in 0.1us units. */ 1715static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 1716 uint32_t horiz_pixels, uint8_t cpp, 1717 uint32_t latency) 1718{ 1719 uint32_t ret; 1720 1721 if (WARN(latency == 0, "Latency value missing\n")) 1722 return UINT_MAX; 1723 if (WARN_ON(!pipe_htotal)) 1724 return UINT_MAX; 1725 1726 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 1727 ret = (ret + 1) * horiz_pixels * cpp; 1728 ret = DIV_ROUND_UP(ret, 64) + 2; 1729 return ret; 1730} 1731 1732static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, 1733 uint8_t cpp) 1734{ 1735 /* 1736 * Neither of these should be possible since this function shouldn't be 1737 * called if the CRTC is off or the plane is invisible. But let's be 1738 * extra paranoid to avoid a potential divide-by-zero if we screw up 1739 * elsewhere in the driver. 1740 */ 1741 if (WARN_ON(!cpp)) 1742 return 0; 1743 if (WARN_ON(!horiz_pixels)) 1744 return 0; 1745 1746 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2; 1747} 1748 1749struct ilk_wm_maximums { 1750 uint16_t pri; 1751 uint16_t spr; 1752 uint16_t cur; 1753 uint16_t fbc; 1754}; 1755 1756/* 1757 * For both WM_PIPE and WM_LP. 1758 * mem_value must be in 0.1us units. 1759 */ 1760static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, 1761 const struct intel_plane_state *pstate, 1762 uint32_t mem_value, 1763 bool is_lp) 1764{ 1765 int cpp = pstate->base.fb ? 1766 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; 1767 uint32_t method1, method2; 1768 1769 if (!cstate->base.active || !pstate->visible) 1770 return 0; 1771 1772 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); 1773 1774 if (!is_lp) 1775 return method1; 1776 1777 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1778 cstate->base.adjusted_mode.crtc_htotal, 1779 drm_rect_width(&pstate->dst), 1780 cpp, mem_value); 1781 1782 return min(method1, method2); 1783} 1784 1785/* 1786 * For both WM_PIPE and WM_LP. 1787 * mem_value must be in 0.1us units. 1788 */ 1789static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, 1790 const struct intel_plane_state *pstate, 1791 uint32_t mem_value) 1792{ 1793 int cpp = pstate->base.fb ? 1794 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; 1795 uint32_t method1, method2; 1796 1797 if (!cstate->base.active || !pstate->visible) 1798 return 0; 1799 1800 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); 1801 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1802 cstate->base.adjusted_mode.crtc_htotal, 1803 drm_rect_width(&pstate->dst), 1804 cpp, mem_value); 1805 return min(method1, method2); 1806} 1807 1808/* 1809 * For both WM_PIPE and WM_LP. 1810 * mem_value must be in 0.1us units. 1811 */ 1812static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, 1813 const struct intel_plane_state *pstate, 1814 uint32_t mem_value) 1815{ 1816 /* 1817 * We treat the cursor plane as always-on for the purposes of watermark 1818 * calculation. Until we have two-stage watermark programming merged, 1819 * this is necessary to avoid flickering. 1820 */ 1821 int cpp = 4; 1822 int width = pstate->visible ? pstate->base.crtc_w : 64; 1823 1824 if (!cstate->base.active) 1825 return 0; 1826 1827 return ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1828 cstate->base.adjusted_mode.crtc_htotal, 1829 width, cpp, mem_value); 1830} 1831 1832/* Only for WM_LP. */ 1833static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, 1834 const struct intel_plane_state *pstate, 1835 uint32_t pri_val) 1836{ 1837 int cpp = pstate->base.fb ? 1838 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; 1839 1840 if (!cstate->base.active || !pstate->visible) 1841 return 0; 1842 1843 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), cpp); 1844} 1845 1846static unsigned int ilk_display_fifo_size(const struct drm_device *dev) 1847{ 1848 if (INTEL_INFO(dev)->gen >= 8) 1849 return 3072; 1850 else if (INTEL_INFO(dev)->gen >= 7) 1851 return 768; 1852 else 1853 return 512; 1854} 1855 1856static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev, 1857 int level, bool is_sprite) 1858{ 1859 if (INTEL_INFO(dev)->gen >= 8) 1860 /* BDW primary/sprite plane watermarks */ 1861 return level == 0 ? 255 : 2047; 1862 else if (INTEL_INFO(dev)->gen >= 7) 1863 /* IVB/HSW primary/sprite plane watermarks */ 1864 return level == 0 ? 127 : 1023; 1865 else if (!is_sprite) 1866 /* ILK/SNB primary plane watermarks */ 1867 return level == 0 ? 127 : 511; 1868 else 1869 /* ILK/SNB sprite plane watermarks */ 1870 return level == 0 ? 63 : 255; 1871} 1872 1873static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev, 1874 int level) 1875{ 1876 if (INTEL_INFO(dev)->gen >= 7) 1877 return level == 0 ? 63 : 255; 1878 else 1879 return level == 0 ? 31 : 63; 1880} 1881 1882static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev) 1883{ 1884 if (INTEL_INFO(dev)->gen >= 8) 1885 return 31; 1886 else 1887 return 15; 1888} 1889 1890/* Calculate the maximum primary/sprite plane watermark */ 1891static unsigned int ilk_plane_wm_max(const struct drm_device *dev, 1892 int level, 1893 const struct intel_wm_config *config, 1894 enum intel_ddb_partitioning ddb_partitioning, 1895 bool is_sprite) 1896{ 1897 unsigned int fifo_size = ilk_display_fifo_size(dev); 1898 1899 /* if sprites aren't enabled, sprites get nothing */ 1900 if (is_sprite && !config->sprites_enabled) 1901 return 0; 1902 1903 /* HSW allows LP1+ watermarks even with multiple pipes */ 1904 if (level == 0 || config->num_pipes_active > 1) { 1905 fifo_size /= INTEL_INFO(dev)->num_pipes; 1906 1907 /* 1908 * For some reason the non self refresh 1909 * FIFO size is only half of the self 1910 * refresh FIFO size on ILK/SNB. 1911 */ 1912 if (INTEL_INFO(dev)->gen <= 6) 1913 fifo_size /= 2; 1914 } 1915 1916 if (config->sprites_enabled) { 1917 /* level 0 is always calculated with 1:1 split */ 1918 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) { 1919 if (is_sprite) 1920 fifo_size *= 5; 1921 fifo_size /= 6; 1922 } else { 1923 fifo_size /= 2; 1924 } 1925 } 1926 1927 /* clamp to max that the registers can hold */ 1928 return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite)); 1929} 1930 1931/* Calculate the maximum cursor plane watermark */ 1932static unsigned int ilk_cursor_wm_max(const struct drm_device *dev, 1933 int level, 1934 const struct intel_wm_config *config) 1935{ 1936 /* HSW LP1+ watermarks w/ multiple pipes */ 1937 if (level > 0 && config->num_pipes_active > 1) 1938 return 64; 1939 1940 /* otherwise just report max that registers can hold */ 1941 return ilk_cursor_wm_reg_max(dev, level); 1942} 1943 1944static void ilk_compute_wm_maximums(const struct drm_device *dev, 1945 int level, 1946 const struct intel_wm_config *config, 1947 enum intel_ddb_partitioning ddb_partitioning, 1948 struct ilk_wm_maximums *max) 1949{ 1950 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false); 1951 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true); 1952 max->cur = ilk_cursor_wm_max(dev, level, config); 1953 max->fbc = ilk_fbc_wm_reg_max(dev); 1954} 1955 1956static void ilk_compute_wm_reg_maximums(struct drm_device *dev, 1957 int level, 1958 struct ilk_wm_maximums *max) 1959{ 1960 max->pri = ilk_plane_wm_reg_max(dev, level, false); 1961 max->spr = ilk_plane_wm_reg_max(dev, level, true); 1962 max->cur = ilk_cursor_wm_reg_max(dev, level); 1963 max->fbc = ilk_fbc_wm_reg_max(dev); 1964} 1965 1966static bool ilk_validate_wm_level(int level, 1967 const struct ilk_wm_maximums *max, 1968 struct intel_wm_level *result) 1969{ 1970 bool ret; 1971 1972 /* already determined to be invalid? */ 1973 if (!result->enable) 1974 return false; 1975 1976 result->enable = result->pri_val <= max->pri && 1977 result->spr_val <= max->spr && 1978 result->cur_val <= max->cur; 1979 1980 ret = result->enable; 1981 1982 /* 1983 * HACK until we can pre-compute everything, 1984 * and thus fail gracefully if LP0 watermarks 1985 * are exceeded... 1986 */ 1987 if (level == 0 && !result->enable) { 1988 if (result->pri_val > max->pri) 1989 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n", 1990 level, result->pri_val, max->pri); 1991 if (result->spr_val > max->spr) 1992 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n", 1993 level, result->spr_val, max->spr); 1994 if (result->cur_val > max->cur) 1995 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n", 1996 level, result->cur_val, max->cur); 1997 1998 result->pri_val = min_t(uint32_t, result->pri_val, max->pri); 1999 result->spr_val = min_t(uint32_t, result->spr_val, max->spr); 2000 result->cur_val = min_t(uint32_t, result->cur_val, max->cur); 2001 result->enable = true; 2002 } 2003 2004 return ret; 2005} 2006 2007static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, 2008 const struct intel_crtc *intel_crtc, 2009 int level, 2010 struct intel_crtc_state *cstate, 2011 struct intel_plane_state *pristate, 2012 struct intel_plane_state *sprstate, 2013 struct intel_plane_state *curstate, 2014 struct intel_wm_level *result) 2015{ 2016 uint16_t pri_latency = dev_priv->wm.pri_latency[level]; 2017 uint16_t spr_latency = dev_priv->wm.spr_latency[level]; 2018 uint16_t cur_latency = dev_priv->wm.cur_latency[level]; 2019 2020 /* WM1+ latency values stored in 0.5us units */ 2021 if (level > 0) { 2022 pri_latency *= 5; 2023 spr_latency *= 5; 2024 cur_latency *= 5; 2025 } 2026 2027 if (pristate) { 2028 result->pri_val = ilk_compute_pri_wm(cstate, pristate, 2029 pri_latency, level); 2030 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val); 2031 } 2032 2033 if (sprstate) 2034 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency); 2035 2036 if (curstate) 2037 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency); 2038 2039 result->enable = true; 2040} 2041 2042static uint32_t 2043hsw_compute_linetime_wm(struct drm_device *dev, 2044 struct intel_crtc_state *cstate) 2045{ 2046 struct drm_i915_private *dev_priv = dev->dev_private; 2047 const struct drm_display_mode *adjusted_mode = 2048 &cstate->base.adjusted_mode; 2049 u32 linetime, ips_linetime; 2050 2051 if (!cstate->base.active) 2052 return 0; 2053 if (WARN_ON(adjusted_mode->crtc_clock == 0)) 2054 return 0; 2055 if (WARN_ON(dev_priv->cdclk_freq == 0)) 2056 return 0; 2057 2058 /* The WM are computed with base on how long it takes to fill a single 2059 * row at the given clock rate, multiplied by 8. 2060 * */ 2061 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2062 adjusted_mode->crtc_clock); 2063 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2064 dev_priv->cdclk_freq); 2065 2066 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | 2067 PIPE_WM_LINETIME_TIME(linetime); 2068} 2069 2070static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) 2071{ 2072 struct drm_i915_private *dev_priv = dev->dev_private; 2073 2074 if (IS_GEN9(dev)) { 2075 uint32_t val; 2076 int ret, i; 2077 int level, max_level = ilk_wm_max_level(dev); 2078 2079 /* read the first set of memory latencies[0:3] */ 2080 val = 0; /* data0 to be programmed to 0 for first set */ 2081 mutex_lock(&dev_priv->rps.hw_lock); 2082 ret = sandybridge_pcode_read(dev_priv, 2083 GEN9_PCODE_READ_MEM_LATENCY, 2084 &val); 2085 mutex_unlock(&dev_priv->rps.hw_lock); 2086 2087 if (ret) { 2088 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2089 return; 2090 } 2091 2092 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2093 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2094 GEN9_MEM_LATENCY_LEVEL_MASK; 2095 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2096 GEN9_MEM_LATENCY_LEVEL_MASK; 2097 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2098 GEN9_MEM_LATENCY_LEVEL_MASK; 2099 2100 /* read the second set of memory latencies[4:7] */ 2101 val = 1; /* data0 to be programmed to 1 for second set */ 2102 mutex_lock(&dev_priv->rps.hw_lock); 2103 ret = sandybridge_pcode_read(dev_priv, 2104 GEN9_PCODE_READ_MEM_LATENCY, 2105 &val); 2106 mutex_unlock(&dev_priv->rps.hw_lock); 2107 if (ret) { 2108 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2109 return; 2110 } 2111 2112 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2113 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2114 GEN9_MEM_LATENCY_LEVEL_MASK; 2115 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2116 GEN9_MEM_LATENCY_LEVEL_MASK; 2117 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2118 GEN9_MEM_LATENCY_LEVEL_MASK; 2119 2120 /* 2121 * WaWmMemoryReadLatency:skl 2122 * 2123 * punit doesn't take into account the read latency so we need 2124 * to add 2us to the various latency levels we retrieve from 2125 * the punit. 2126 * - W0 is a bit special in that it's the only level that 2127 * can't be disabled if we want to have display working, so 2128 * we always add 2us there. 2129 * - For levels >=1, punit returns 0us latency when they are 2130 * disabled, so we respect that and don't add 2us then 2131 * 2132 * Additionally, if a level n (n > 1) has a 0us latency, all 2133 * levels m (m >= n) need to be disabled. We make sure to 2134 * sanitize the values out of the punit to satisfy this 2135 * requirement. 2136 */ 2137 wm[0] += 2; 2138 for (level = 1; level <= max_level; level++) 2139 if (wm[level] != 0) 2140 wm[level] += 2; 2141 else { 2142 for (i = level + 1; i <= max_level; i++) 2143 wm[i] = 0; 2144 2145 break; 2146 } 2147 } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { 2148 uint64_t sskpd = I915_READ64(MCH_SSKPD); 2149 2150 wm[0] = (sskpd >> 56) & 0xFF; 2151 if (wm[0] == 0) 2152 wm[0] = sskpd & 0xF; 2153 wm[1] = (sskpd >> 4) & 0xFF; 2154 wm[2] = (sskpd >> 12) & 0xFF; 2155 wm[3] = (sskpd >> 20) & 0x1FF; 2156 wm[4] = (sskpd >> 32) & 0x1FF; 2157 } else if (INTEL_INFO(dev)->gen >= 6) { 2158 uint32_t sskpd = I915_READ(MCH_SSKPD); 2159 2160 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; 2161 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK; 2162 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK; 2163 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK; 2164 } else if (INTEL_INFO(dev)->gen >= 5) { 2165 uint32_t mltr = I915_READ(MLTR_ILK); 2166 2167 /* ILK primary LP0 latency is 700 ns */ 2168 wm[0] = 7; 2169 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK; 2170 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK; 2171 } 2172} 2173 2174static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5]) 2175{ 2176 /* ILK sprite LP0 latency is 1300 ns */ 2177 if (INTEL_INFO(dev)->gen == 5) 2178 wm[0] = 13; 2179} 2180 2181static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5]) 2182{ 2183 /* ILK cursor LP0 latency is 1300 ns */ 2184 if (INTEL_INFO(dev)->gen == 5) 2185 wm[0] = 13; 2186 2187 /* WaDoubleCursorLP3Latency:ivb */ 2188 if (IS_IVYBRIDGE(dev)) 2189 wm[3] *= 2; 2190} 2191 2192int ilk_wm_max_level(const struct drm_device *dev) 2193{ 2194 /* how many WM levels are we expecting */ 2195 if (INTEL_INFO(dev)->gen >= 9) 2196 return 7; 2197 else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2198 return 4; 2199 else if (INTEL_INFO(dev)->gen >= 6) 2200 return 3; 2201 else 2202 return 2; 2203} 2204 2205static void intel_print_wm_latency(struct drm_device *dev, 2206 const char *name, 2207 const uint16_t wm[8]) 2208{ 2209 int level, max_level = ilk_wm_max_level(dev); 2210 2211 for (level = 0; level <= max_level; level++) { 2212 unsigned int latency = wm[level]; 2213 2214 if (latency == 0) { 2215 DRM_ERROR("%s WM%d latency not provided\n", 2216 name, level); 2217 continue; 2218 } 2219 2220 /* 2221 * - latencies are in us on gen9. 2222 * - before then, WM1+ latency values are in 0.5us units 2223 */ 2224 if (IS_GEN9(dev)) 2225 latency *= 10; 2226 else if (level > 0) 2227 latency *= 5; 2228 2229 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", 2230 name, level, wm[level], 2231 latency / 10, latency % 10); 2232 } 2233} 2234 2235static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, 2236 uint16_t wm[5], uint16_t min) 2237{ 2238 int level, max_level = ilk_wm_max_level(dev_priv->dev); 2239 2240 if (wm[0] >= min) 2241 return false; 2242 2243 wm[0] = max(wm[0], min); 2244 for (level = 1; level <= max_level; level++) 2245 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5)); 2246 2247 return true; 2248} 2249 2250static void snb_wm_latency_quirk(struct drm_device *dev) 2251{ 2252 struct drm_i915_private *dev_priv = dev->dev_private; 2253 bool changed; 2254 2255 /* 2256 * The BIOS provided WM memory latency values are often 2257 * inadequate for high resolution displays. Adjust them. 2258 */ 2259 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) | 2260 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) | 2261 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); 2262 2263 if (!changed) 2264 return; 2265 2266 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n"); 2267 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency); 2268 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency); 2269 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency); 2270} 2271 2272static void ilk_setup_wm_latency(struct drm_device *dev) 2273{ 2274 struct drm_i915_private *dev_priv = dev->dev_private; 2275 2276 intel_read_wm_latency(dev, dev_priv->wm.pri_latency); 2277 2278 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency, 2279 sizeof(dev_priv->wm.pri_latency)); 2280 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency, 2281 sizeof(dev_priv->wm.pri_latency)); 2282 2283 intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency); 2284 intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency); 2285 2286 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency); 2287 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency); 2288 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency); 2289 2290 if (IS_GEN6(dev)) 2291 snb_wm_latency_quirk(dev); 2292} 2293 2294static void skl_setup_wm_latency(struct drm_device *dev) 2295{ 2296 struct drm_i915_private *dev_priv = dev->dev_private; 2297 2298 intel_read_wm_latency(dev, dev_priv->wm.skl_latency); 2299 intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency); 2300} 2301 2302static bool ilk_validate_pipe_wm(struct drm_device *dev, 2303 struct intel_pipe_wm *pipe_wm) 2304{ 2305 /* LP0 watermark maximums depend on this pipe alone */ 2306 const struct intel_wm_config config = { 2307 .num_pipes_active = 1, 2308 .sprites_enabled = pipe_wm->sprites_enabled, 2309 .sprites_scaled = pipe_wm->sprites_scaled, 2310 }; 2311 struct ilk_wm_maximums max; 2312 2313 /* LP0 watermarks always use 1/2 DDB partitioning */ 2314 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max); 2315 2316 /* At least LP0 must be valid */ 2317 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) { 2318 DRM_DEBUG_KMS("LP0 watermark invalid\n"); 2319 return false; 2320 } 2321 2322 return true; 2323} 2324 2325/* Compute new watermarks for the pipe */ 2326static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) 2327{ 2328 struct drm_atomic_state *state = cstate->base.state; 2329 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 2330 struct intel_pipe_wm *pipe_wm; 2331 struct drm_device *dev = state->dev; 2332 const struct drm_i915_private *dev_priv = dev->dev_private; 2333 struct intel_plane *intel_plane; 2334 struct intel_plane_state *pristate = NULL; 2335 struct intel_plane_state *sprstate = NULL; 2336 struct intel_plane_state *curstate = NULL; 2337 int level, max_level = ilk_wm_max_level(dev), usable_level; 2338 struct ilk_wm_maximums max; 2339 2340 pipe_wm = &cstate->wm.optimal.ilk; 2341 2342 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 2343 struct intel_plane_state *ps; 2344 2345 ps = intel_atomic_get_existing_plane_state(state, 2346 intel_plane); 2347 if (!ps) 2348 continue; 2349 2350 if (intel_plane->base.type == DRM_PLANE_TYPE_PRIMARY) 2351 pristate = ps; 2352 else if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) 2353 sprstate = ps; 2354 else if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR) 2355 curstate = ps; 2356 } 2357 2358 pipe_wm->pipe_enabled = cstate->base.active; 2359 if (sprstate) { 2360 pipe_wm->sprites_enabled = sprstate->visible; 2361 pipe_wm->sprites_scaled = sprstate->visible && 2362 (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 || 2363 drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16); 2364 } 2365 2366 usable_level = max_level; 2367 2368 /* ILK/SNB: LP2+ watermarks only w/o sprites */ 2369 if (INTEL_INFO(dev)->gen <= 6 && pipe_wm->sprites_enabled) 2370 usable_level = 1; 2371 2372 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */ 2373 if (pipe_wm->sprites_scaled) 2374 usable_level = 0; 2375 2376 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, 2377 pristate, sprstate, curstate, &pipe_wm->raw_wm[0]); 2378 2379 memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm)); 2380 pipe_wm->wm[0] = pipe_wm->raw_wm[0]; 2381 2382 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2383 pipe_wm->linetime = hsw_compute_linetime_wm(dev, cstate); 2384 2385 if (!ilk_validate_pipe_wm(dev, pipe_wm)) 2386 return -EINVAL; 2387 2388 ilk_compute_wm_reg_maximums(dev, 1, &max); 2389 2390 for (level = 1; level <= max_level; level++) { 2391 struct intel_wm_level *wm = &pipe_wm->raw_wm[level]; 2392 2393 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, 2394 pristate, sprstate, curstate, wm); 2395 2396 /* 2397 * Disable any watermark level that exceeds the 2398 * register maximums since such watermarks are 2399 * always invalid. 2400 */ 2401 if (level > usable_level) 2402 continue; 2403 2404 if (ilk_validate_wm_level(level, &max, wm)) 2405 pipe_wm->wm[level] = *wm; 2406 else 2407 usable_level = level; 2408 } 2409 2410 return 0; 2411} 2412 2413/* 2414 * Build a set of 'intermediate' watermark values that satisfy both the old 2415 * state and the new state. These can be programmed to the hardware 2416 * immediately. 2417 */ 2418static int ilk_compute_intermediate_wm(struct drm_device *dev, 2419 struct intel_crtc *intel_crtc, 2420 struct intel_crtc_state *newstate) 2421{ 2422 struct intel_pipe_wm *a = &newstate->wm.intermediate; 2423 struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk; 2424 int level, max_level = ilk_wm_max_level(dev); 2425 2426 /* 2427 * Start with the final, target watermarks, then combine with the 2428 * currently active watermarks to get values that are safe both before 2429 * and after the vblank. 2430 */ 2431 *a = newstate->wm.optimal.ilk; 2432 a->pipe_enabled |= b->pipe_enabled; 2433 a->sprites_enabled |= b->sprites_enabled; 2434 a->sprites_scaled |= b->sprites_scaled; 2435 2436 for (level = 0; level <= max_level; level++) { 2437 struct intel_wm_level *a_wm = &a->wm[level]; 2438 const struct intel_wm_level *b_wm = &b->wm[level]; 2439 2440 a_wm->enable &= b_wm->enable; 2441 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val); 2442 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val); 2443 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val); 2444 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val); 2445 } 2446 2447 /* 2448 * We need to make sure that these merged watermark values are 2449 * actually a valid configuration themselves. If they're not, 2450 * there's no safe way to transition from the old state to 2451 * the new state, so we need to fail the atomic transaction. 2452 */ 2453 if (!ilk_validate_pipe_wm(dev, a)) 2454 return -EINVAL; 2455 2456 /* 2457 * If our intermediate WM are identical to the final WM, then we can 2458 * omit the post-vblank programming; only update if it's different. 2459 */ 2460 if (memcmp(a, &newstate->wm.optimal.ilk, sizeof(*a)) == 0) 2461 newstate->wm.need_postvbl_update = false; 2462 2463 return 0; 2464} 2465 2466/* 2467 * Merge the watermarks from all active pipes for a specific level. 2468 */ 2469static void ilk_merge_wm_level(struct drm_device *dev, 2470 int level, 2471 struct intel_wm_level *ret_wm) 2472{ 2473 const struct intel_crtc *intel_crtc; 2474 2475 ret_wm->enable = true; 2476 2477 for_each_intel_crtc(dev, intel_crtc) { 2478 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk; 2479 const struct intel_wm_level *wm = &active->wm[level]; 2480 2481 if (!active->pipe_enabled) 2482 continue; 2483 2484 /* 2485 * The watermark values may have been used in the past, 2486 * so we must maintain them in the registers for some 2487 * time even if the level is now disabled. 2488 */ 2489 if (!wm->enable) 2490 ret_wm->enable = false; 2491 2492 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val); 2493 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val); 2494 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val); 2495 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val); 2496 } 2497} 2498 2499/* 2500 * Merge all low power watermarks for all active pipes. 2501 */ 2502static void ilk_wm_merge(struct drm_device *dev, 2503 const struct intel_wm_config *config, 2504 const struct ilk_wm_maximums *max, 2505 struct intel_pipe_wm *merged) 2506{ 2507 struct drm_i915_private *dev_priv = dev->dev_private; 2508 int level, max_level = ilk_wm_max_level(dev); 2509 int last_enabled_level = max_level; 2510 2511 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ 2512 if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) && 2513 config->num_pipes_active > 1) 2514 last_enabled_level = 0; 2515 2516 /* ILK: FBC WM must be disabled always */ 2517 merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6; 2518 2519 /* merge each WM1+ level */ 2520 for (level = 1; level <= max_level; level++) { 2521 struct intel_wm_level *wm = &merged->wm[level]; 2522 2523 ilk_merge_wm_level(dev, level, wm); 2524 2525 if (level > last_enabled_level) 2526 wm->enable = false; 2527 else if (!ilk_validate_wm_level(level, max, wm)) 2528 /* make sure all following levels get disabled */ 2529 last_enabled_level = level - 1; 2530 2531 /* 2532 * The spec says it is preferred to disable 2533 * FBC WMs instead of disabling a WM level. 2534 */ 2535 if (wm->fbc_val > max->fbc) { 2536 if (wm->enable) 2537 merged->fbc_wm_enabled = false; 2538 wm->fbc_val = 0; 2539 } 2540 } 2541 2542 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */ 2543 /* 2544 * FIXME this is racy. FBC might get enabled later. 2545 * What we should check here is whether FBC can be 2546 * enabled sometime later. 2547 */ 2548 if (IS_GEN5(dev) && !merged->fbc_wm_enabled && 2549 intel_fbc_is_active(dev_priv)) { 2550 for (level = 2; level <= max_level; level++) { 2551 struct intel_wm_level *wm = &merged->wm[level]; 2552 2553 wm->enable = false; 2554 } 2555 } 2556} 2557 2558static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm) 2559{ 2560 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */ 2561 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable); 2562} 2563 2564/* The value we need to program into the WM_LPx latency field */ 2565static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level) 2566{ 2567 struct drm_i915_private *dev_priv = dev->dev_private; 2568 2569 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2570 return 2 * level; 2571 else 2572 return dev_priv->wm.pri_latency[level]; 2573} 2574 2575static void ilk_compute_wm_results(struct drm_device *dev, 2576 const struct intel_pipe_wm *merged, 2577 enum intel_ddb_partitioning partitioning, 2578 struct ilk_wm_values *results) 2579{ 2580 struct intel_crtc *intel_crtc; 2581 int level, wm_lp; 2582 2583 results->enable_fbc_wm = merged->fbc_wm_enabled; 2584 results->partitioning = partitioning; 2585 2586 /* LP1+ register values */ 2587 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2588 const struct intel_wm_level *r; 2589 2590 level = ilk_wm_lp_to_level(wm_lp, merged); 2591 2592 r = &merged->wm[level]; 2593 2594 /* 2595 * Maintain the watermark values even if the level is 2596 * disabled. Doing otherwise could cause underruns. 2597 */ 2598 results->wm_lp[wm_lp - 1] = 2599 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) | 2600 (r->pri_val << WM1_LP_SR_SHIFT) | 2601 r->cur_val; 2602 2603 if (r->enable) 2604 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN; 2605 2606 if (INTEL_INFO(dev)->gen >= 8) 2607 results->wm_lp[wm_lp - 1] |= 2608 r->fbc_val << WM1_LP_FBC_SHIFT_BDW; 2609 else 2610 results->wm_lp[wm_lp - 1] |= 2611 r->fbc_val << WM1_LP_FBC_SHIFT; 2612 2613 /* 2614 * Always set WM1S_LP_EN when spr_val != 0, even if the 2615 * level is disabled. Doing otherwise could cause underruns. 2616 */ 2617 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) { 2618 WARN_ON(wm_lp != 1); 2619 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val; 2620 } else 2621 results->wm_lp_spr[wm_lp - 1] = r->spr_val; 2622 } 2623 2624 /* LP0 register values */ 2625 for_each_intel_crtc(dev, intel_crtc) { 2626 enum pipe pipe = intel_crtc->pipe; 2627 const struct intel_wm_level *r = 2628 &intel_crtc->wm.active.ilk.wm[0]; 2629 2630 if (WARN_ON(!r->enable)) 2631 continue; 2632 2633 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime; 2634 2635 results->wm_pipe[pipe] = 2636 (r->pri_val << WM0_PIPE_PLANE_SHIFT) | 2637 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) | 2638 r->cur_val; 2639 } 2640} 2641 2642/* Find the result with the highest level enabled. Check for enable_fbc_wm in 2643 * case both are at the same level. Prefer r1 in case they're the same. */ 2644static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev, 2645 struct intel_pipe_wm *r1, 2646 struct intel_pipe_wm *r2) 2647{ 2648 int level, max_level = ilk_wm_max_level(dev); 2649 int level1 = 0, level2 = 0; 2650 2651 for (level = 1; level <= max_level; level++) { 2652 if (r1->wm[level].enable) 2653 level1 = level; 2654 if (r2->wm[level].enable) 2655 level2 = level; 2656 } 2657 2658 if (level1 == level2) { 2659 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled) 2660 return r2; 2661 else 2662 return r1; 2663 } else if (level1 > level2) { 2664 return r1; 2665 } else { 2666 return r2; 2667 } 2668} 2669 2670/* dirty bits used to track which watermarks need changes */ 2671#define WM_DIRTY_PIPE(pipe) (1 << (pipe)) 2672#define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe))) 2673#define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp))) 2674#define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3)) 2675#define WM_DIRTY_FBC (1 << 24) 2676#define WM_DIRTY_DDB (1 << 25) 2677 2678static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv, 2679 const struct ilk_wm_values *old, 2680 const struct ilk_wm_values *new) 2681{ 2682 unsigned int dirty = 0; 2683 enum pipe pipe; 2684 int wm_lp; 2685 2686 for_each_pipe(dev_priv, pipe) { 2687 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) { 2688 dirty |= WM_DIRTY_LINETIME(pipe); 2689 /* Must disable LP1+ watermarks too */ 2690 dirty |= WM_DIRTY_LP_ALL; 2691 } 2692 2693 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) { 2694 dirty |= WM_DIRTY_PIPE(pipe); 2695 /* Must disable LP1+ watermarks too */ 2696 dirty |= WM_DIRTY_LP_ALL; 2697 } 2698 } 2699 2700 if (old->enable_fbc_wm != new->enable_fbc_wm) { 2701 dirty |= WM_DIRTY_FBC; 2702 /* Must disable LP1+ watermarks too */ 2703 dirty |= WM_DIRTY_LP_ALL; 2704 } 2705 2706 if (old->partitioning != new->partitioning) { 2707 dirty |= WM_DIRTY_DDB; 2708 /* Must disable LP1+ watermarks too */ 2709 dirty |= WM_DIRTY_LP_ALL; 2710 } 2711 2712 /* LP1+ watermarks already deemed dirty, no need to continue */ 2713 if (dirty & WM_DIRTY_LP_ALL) 2714 return dirty; 2715 2716 /* Find the lowest numbered LP1+ watermark in need of an update... */ 2717 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2718 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] || 2719 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1]) 2720 break; 2721 } 2722 2723 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */ 2724 for (; wm_lp <= 3; wm_lp++) 2725 dirty |= WM_DIRTY_LP(wm_lp); 2726 2727 return dirty; 2728} 2729 2730static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv, 2731 unsigned int dirty) 2732{ 2733 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2734 bool changed = false; 2735 2736 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) { 2737 previous->wm_lp[2] &= ~WM1_LP_SR_EN; 2738 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]); 2739 changed = true; 2740 } 2741 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) { 2742 previous->wm_lp[1] &= ~WM1_LP_SR_EN; 2743 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]); 2744 changed = true; 2745 } 2746 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) { 2747 previous->wm_lp[0] &= ~WM1_LP_SR_EN; 2748 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]); 2749 changed = true; 2750 } 2751 2752 /* 2753 * Don't touch WM1S_LP_EN here. 2754 * Doing so could cause underruns. 2755 */ 2756 2757 return changed; 2758} 2759 2760/* 2761 * The spec says we shouldn't write when we don't need, because every write 2762 * causes WMs to be re-evaluated, expending some power. 2763 */ 2764static void ilk_write_wm_values(struct drm_i915_private *dev_priv, 2765 struct ilk_wm_values *results) 2766{ 2767 struct drm_device *dev = dev_priv->dev; 2768 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2769 unsigned int dirty; 2770 uint32_t val; 2771 2772 dirty = ilk_compute_wm_dirty(dev_priv, previous, results); 2773 if (!dirty) 2774 return; 2775 2776 _ilk_disable_lp_wm(dev_priv, dirty); 2777 2778 if (dirty & WM_DIRTY_PIPE(PIPE_A)) 2779 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]); 2780 if (dirty & WM_DIRTY_PIPE(PIPE_B)) 2781 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]); 2782 if (dirty & WM_DIRTY_PIPE(PIPE_C)) 2783 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]); 2784 2785 if (dirty & WM_DIRTY_LINETIME(PIPE_A)) 2786 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]); 2787 if (dirty & WM_DIRTY_LINETIME(PIPE_B)) 2788 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]); 2789 if (dirty & WM_DIRTY_LINETIME(PIPE_C)) 2790 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]); 2791 2792 if (dirty & WM_DIRTY_DDB) { 2793 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { 2794 val = I915_READ(WM_MISC); 2795 if (results->partitioning == INTEL_DDB_PART_1_2) 2796 val &= ~WM_MISC_DATA_PARTITION_5_6; 2797 else 2798 val |= WM_MISC_DATA_PARTITION_5_6; 2799 I915_WRITE(WM_MISC, val); 2800 } else { 2801 val = I915_READ(DISP_ARB_CTL2); 2802 if (results->partitioning == INTEL_DDB_PART_1_2) 2803 val &= ~DISP_DATA_PARTITION_5_6; 2804 else 2805 val |= DISP_DATA_PARTITION_5_6; 2806 I915_WRITE(DISP_ARB_CTL2, val); 2807 } 2808 } 2809 2810 if (dirty & WM_DIRTY_FBC) { 2811 val = I915_READ(DISP_ARB_CTL); 2812 if (results->enable_fbc_wm) 2813 val &= ~DISP_FBC_WM_DIS; 2814 else 2815 val |= DISP_FBC_WM_DIS; 2816 I915_WRITE(DISP_ARB_CTL, val); 2817 } 2818 2819 if (dirty & WM_DIRTY_LP(1) && 2820 previous->wm_lp_spr[0] != results->wm_lp_spr[0]) 2821 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]); 2822 2823 if (INTEL_INFO(dev)->gen >= 7) { 2824 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1]) 2825 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]); 2826 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2]) 2827 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]); 2828 } 2829 2830 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0]) 2831 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]); 2832 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1]) 2833 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]); 2834 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2]) 2835 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]); 2836 2837 dev_priv->wm.hw = *results; 2838} 2839 2840bool ilk_disable_lp_wm(struct drm_device *dev) 2841{ 2842 struct drm_i915_private *dev_priv = dev->dev_private; 2843 2844 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); 2845} 2846 2847/* 2848 * On gen9, we need to allocate Display Data Buffer (DDB) portions to the 2849 * different active planes. 2850 */ 2851 2852#define SKL_DDB_SIZE 896 /* in blocks */ 2853#define BXT_DDB_SIZE 512 2854 2855/* 2856 * Return the index of a plane in the SKL DDB and wm result arrays. Primary 2857 * plane is always in slot 0, cursor is always in slot I915_MAX_PLANES-1, and 2858 * other universal planes are in indices 1..n. Note that this may leave unused 2859 * indices between the top "sprite" plane and the cursor. 2860 */ 2861static int 2862skl_wm_plane_id(const struct intel_plane *plane) 2863{ 2864 switch (plane->base.type) { 2865 case DRM_PLANE_TYPE_PRIMARY: 2866 return 0; 2867 case DRM_PLANE_TYPE_CURSOR: 2868 return PLANE_CURSOR; 2869 case DRM_PLANE_TYPE_OVERLAY: 2870 return plane->plane + 1; 2871 default: 2872 MISSING_CASE(plane->base.type); 2873 return plane->plane; 2874 } 2875} 2876 2877static void 2878skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, 2879 const struct intel_crtc_state *cstate, 2880 const struct intel_wm_config *config, 2881 struct skl_ddb_entry *alloc /* out */) 2882{ 2883 struct drm_crtc *for_crtc = cstate->base.crtc; 2884 struct drm_crtc *crtc; 2885 unsigned int pipe_size, ddb_size; 2886 int nth_active_pipe; 2887 2888 if (!cstate->base.active) { 2889 alloc->start = 0; 2890 alloc->end = 0; 2891 return; 2892 } 2893 2894 if (IS_BROXTON(dev)) 2895 ddb_size = BXT_DDB_SIZE; 2896 else 2897 ddb_size = SKL_DDB_SIZE; 2898 2899 ddb_size -= 4; /* 4 blocks for bypass path allocation */ 2900 2901 nth_active_pipe = 0; 2902 for_each_crtc(dev, crtc) { 2903 if (!to_intel_crtc(crtc)->active) 2904 continue; 2905 2906 if (crtc == for_crtc) 2907 break; 2908 2909 nth_active_pipe++; 2910 } 2911 2912 pipe_size = ddb_size / config->num_pipes_active; 2913 alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active; 2914 alloc->end = alloc->start + pipe_size; 2915} 2916 2917static unsigned int skl_cursor_allocation(const struct intel_wm_config *config) 2918{ 2919 if (config->num_pipes_active == 1) 2920 return 32; 2921 2922 return 8; 2923} 2924 2925static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) 2926{ 2927 entry->start = reg & 0x3ff; 2928 entry->end = (reg >> 16) & 0x3ff; 2929 if (entry->end) 2930 entry->end += 1; 2931} 2932 2933void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, 2934 struct skl_ddb_allocation *ddb /* out */) 2935{ 2936 enum pipe pipe; 2937 int plane; 2938 u32 val; 2939 2940 memset(ddb, 0, sizeof(*ddb)); 2941 2942 for_each_pipe(dev_priv, pipe) { 2943 enum intel_display_power_domain power_domain; 2944 2945 power_domain = POWER_DOMAIN_PIPE(pipe); 2946 if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) 2947 continue; 2948 2949 for_each_plane(dev_priv, pipe, plane) { 2950 val = I915_READ(PLANE_BUF_CFG(pipe, plane)); 2951 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane], 2952 val); 2953 } 2954 2955 val = I915_READ(CUR_BUF_CFG(pipe)); 2956 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR], 2957 val); 2958 2959 intel_display_power_put(dev_priv, power_domain); 2960 } 2961} 2962 2963static unsigned int 2964skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, 2965 const struct drm_plane_state *pstate, 2966 int y) 2967{ 2968 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); 2969 struct drm_framebuffer *fb = pstate->fb; 2970 uint32_t width = 0, height = 0; 2971 2972 width = drm_rect_width(&intel_pstate->src) >> 16; 2973 height = drm_rect_height(&intel_pstate->src) >> 16; 2974 2975 if (intel_rotation_90_or_270(pstate->rotation)) 2976 swap(width, height); 2977 2978 /* for planar format */ 2979 if (fb->pixel_format == DRM_FORMAT_NV12) { 2980 if (y) /* y-plane data rate */ 2981 return width * height * 2982 drm_format_plane_cpp(fb->pixel_format, 0); 2983 else /* uv-plane data rate */ 2984 return (width / 2) * (height / 2) * 2985 drm_format_plane_cpp(fb->pixel_format, 1); 2986 } 2987 2988 /* for packed formats */ 2989 return width * height * drm_format_plane_cpp(fb->pixel_format, 0); 2990} 2991 2992/* 2993 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching 2994 * a 8192x4096@32bpp framebuffer: 2995 * 3 * 4096 * 8192 * 4 < 2^32 2996 */ 2997static unsigned int 2998skl_get_total_relative_data_rate(const struct intel_crtc_state *cstate) 2999{ 3000 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3001 struct drm_device *dev = intel_crtc->base.dev; 3002 const struct intel_plane *intel_plane; 3003 unsigned int total_data_rate = 0; 3004 3005 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 3006 const struct drm_plane_state *pstate = intel_plane->base.state; 3007 3008 if (pstate->fb == NULL) 3009 continue; 3010 3011 if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR) 3012 continue; 3013 3014 /* packed/uv */ 3015 total_data_rate += skl_plane_relative_data_rate(cstate, 3016 pstate, 3017 0); 3018 3019 if (pstate->fb->pixel_format == DRM_FORMAT_NV12) 3020 /* y-plane */ 3021 total_data_rate += skl_plane_relative_data_rate(cstate, 3022 pstate, 3023 1); 3024 } 3025 3026 return total_data_rate; 3027} 3028 3029static void 3030skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, 3031 struct skl_ddb_allocation *ddb /* out */) 3032{ 3033 struct drm_crtc *crtc = cstate->base.crtc; 3034 struct drm_device *dev = crtc->dev; 3035 struct drm_i915_private *dev_priv = to_i915(dev); 3036 struct intel_wm_config *config = &dev_priv->wm.config; 3037 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3038 struct intel_plane *intel_plane; 3039 enum pipe pipe = intel_crtc->pipe; 3040 struct skl_ddb_entry *alloc = &ddb->pipe[pipe]; 3041 uint16_t alloc_size, start, cursor_blocks; 3042 uint16_t minimum[I915_MAX_PLANES]; 3043 uint16_t y_minimum[I915_MAX_PLANES]; 3044 unsigned int total_data_rate; 3045 3046 skl_ddb_get_pipe_allocation_limits(dev, cstate, config, alloc); 3047 alloc_size = skl_ddb_entry_size(alloc); 3048 if (alloc_size == 0) { 3049 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); 3050 memset(&ddb->plane[pipe][PLANE_CURSOR], 0, 3051 sizeof(ddb->plane[pipe][PLANE_CURSOR])); 3052 return; 3053 } 3054 3055 cursor_blocks = skl_cursor_allocation(config); 3056 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks; 3057 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; 3058 3059 alloc_size -= cursor_blocks; 3060 alloc->end -= cursor_blocks; 3061 3062 /* 1. Allocate the mininum required blocks for each active plane */ 3063 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 3064 struct drm_plane *plane = &intel_plane->base; 3065 struct drm_framebuffer *fb = plane->state->fb; 3066 int id = skl_wm_plane_id(intel_plane); 3067 3068 if (!to_intel_plane_state(plane->state)->visible) 3069 continue; 3070 3071 if (plane->type == DRM_PLANE_TYPE_CURSOR) 3072 continue; 3073 3074 minimum[id] = 8; 3075 alloc_size -= minimum[id]; 3076 y_minimum[id] = (fb->pixel_format == DRM_FORMAT_NV12) ? 8 : 0; 3077 alloc_size -= y_minimum[id]; 3078 } 3079 3080 /* 3081 * 2. Distribute the remaining space in proportion to the amount of 3082 * data each plane needs to fetch from memory. 3083 * 3084 * FIXME: we may not allocate every single block here. 3085 */ 3086 total_data_rate = skl_get_total_relative_data_rate(cstate); 3087 3088 start = alloc->start; 3089 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 3090 struct drm_plane *plane = &intel_plane->base; 3091 struct drm_plane_state *pstate = intel_plane->base.state; 3092 unsigned int data_rate, y_data_rate; 3093 uint16_t plane_blocks, y_plane_blocks = 0; 3094 int id = skl_wm_plane_id(intel_plane); 3095 3096 if (!to_intel_plane_state(pstate)->visible) 3097 continue; 3098 if (plane->type == DRM_PLANE_TYPE_CURSOR) 3099 continue; 3100 3101 data_rate = skl_plane_relative_data_rate(cstate, pstate, 0); 3102 3103 /* 3104 * allocation for (packed formats) or (uv-plane part of planar format): 3105 * promote the expression to 64 bits to avoid overflowing, the 3106 * result is < available as data_rate / total_data_rate < 1 3107 */ 3108 plane_blocks = minimum[id]; 3109 plane_blocks += div_u64((uint64_t)alloc_size * data_rate, 3110 total_data_rate); 3111 3112 ddb->plane[pipe][id].start = start; 3113 ddb->plane[pipe][id].end = start + plane_blocks; 3114 3115 start += plane_blocks; 3116 3117 /* 3118 * allocation for y_plane part of planar format: 3119 */ 3120 if (pstate->fb->pixel_format == DRM_FORMAT_NV12) { 3121 y_data_rate = skl_plane_relative_data_rate(cstate, 3122 pstate, 3123 1); 3124 y_plane_blocks = y_minimum[id]; 3125 y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, 3126 total_data_rate); 3127 3128 ddb->y_plane[pipe][id].start = start; 3129 ddb->y_plane[pipe][id].end = start + y_plane_blocks; 3130 3131 start += y_plane_blocks; 3132 } 3133 3134 } 3135 3136} 3137 3138static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config) 3139{ 3140 /* TODO: Take into account the scalers once we support them */ 3141 return config->base.adjusted_mode.crtc_clock; 3142} 3143 3144/* 3145 * The max latency should be 257 (max the punit can code is 255 and we add 2us 3146 * for the read latency) and cpp should always be <= 8, so that 3147 * should allow pixel_rate up to ~2 GHz which seems sufficient since max 3148 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. 3149*/ 3150static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) 3151{ 3152 uint32_t wm_intermediate_val, ret; 3153 3154 if (latency == 0) 3155 return UINT_MAX; 3156 3157 wm_intermediate_val = latency * pixel_rate * cpp / 512; 3158 ret = DIV_ROUND_UP(wm_intermediate_val, 1000); 3159 3160 return ret; 3161} 3162 3163static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 3164 uint32_t horiz_pixels, uint8_t cpp, 3165 uint64_t tiling, uint32_t latency) 3166{ 3167 uint32_t ret; 3168 uint32_t plane_bytes_per_line, plane_blocks_per_line; 3169 uint32_t wm_intermediate_val; 3170 3171 if (latency == 0) 3172 return UINT_MAX; 3173 3174 plane_bytes_per_line = horiz_pixels * cpp; 3175 3176 if (tiling == I915_FORMAT_MOD_Y_TILED || 3177 tiling == I915_FORMAT_MOD_Yf_TILED) { 3178 plane_bytes_per_line *= 4; 3179 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3180 plane_blocks_per_line /= 4; 3181 } else { 3182 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3183 } 3184 3185 wm_intermediate_val = latency * pixel_rate; 3186 ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * 3187 plane_blocks_per_line; 3188 3189 return ret; 3190} 3191 3192static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb, 3193 const struct intel_crtc *intel_crtc) 3194{ 3195 struct drm_device *dev = intel_crtc->base.dev; 3196 struct drm_i915_private *dev_priv = dev->dev_private; 3197 const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; 3198 3199 /* 3200 * If ddb allocation of pipes changed, it may require recalculation of 3201 * watermarks 3202 */ 3203 if (memcmp(new_ddb->pipe, cur_ddb->pipe, sizeof(new_ddb->pipe))) 3204 return true; 3205 3206 return false; 3207} 3208 3209static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv, 3210 struct intel_crtc_state *cstate, 3211 struct intel_plane *intel_plane, 3212 uint16_t ddb_allocation, 3213 int level, 3214 uint16_t *out_blocks, /* out */ 3215 uint8_t *out_lines /* out */) 3216{ 3217 struct drm_plane *plane = &intel_plane->base; 3218 struct drm_framebuffer *fb = plane->state->fb; 3219 struct intel_plane_state *intel_pstate = 3220 to_intel_plane_state(plane->state); 3221 uint32_t latency = dev_priv->wm.skl_latency[level]; 3222 uint32_t method1, method2; 3223 uint32_t plane_bytes_per_line, plane_blocks_per_line; 3224 uint32_t res_blocks, res_lines; 3225 uint32_t selected_result; 3226 uint8_t cpp; 3227 uint32_t width = 0, height = 0; 3228 3229 if (latency == 0 || !cstate->base.active || !intel_pstate->visible) 3230 return false; 3231 3232 width = drm_rect_width(&intel_pstate->src) >> 16; 3233 height = drm_rect_height(&intel_pstate->src) >> 16; 3234 3235 if (intel_rotation_90_or_270(plane->state->rotation)) 3236 swap(width, height); 3237 3238 cpp = drm_format_plane_cpp(fb->pixel_format, 0); 3239 method1 = skl_wm_method1(skl_pipe_pixel_rate(cstate), 3240 cpp, latency); 3241 method2 = skl_wm_method2(skl_pipe_pixel_rate(cstate), 3242 cstate->base.adjusted_mode.crtc_htotal, 3243 width, 3244 cpp, 3245 fb->modifier[0], 3246 latency); 3247 3248 plane_bytes_per_line = width * cpp; 3249 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3250 3251 if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || 3252 fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { 3253 uint32_t min_scanlines = 4; 3254 uint32_t y_tile_minimum; 3255 if (intel_rotation_90_or_270(plane->state->rotation)) { 3256 int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ? 3257 drm_format_plane_cpp(fb->pixel_format, 1) : 3258 drm_format_plane_cpp(fb->pixel_format, 0); 3259 3260 switch (cpp) { 3261 case 1: 3262 min_scanlines = 16; 3263 break; 3264 case 2: 3265 min_scanlines = 8; 3266 break; 3267 case 8: 3268 WARN(1, "Unsupported pixel depth for rotation"); 3269 } 3270 } 3271 y_tile_minimum = plane_blocks_per_line * min_scanlines; 3272 selected_result = max(method2, y_tile_minimum); 3273 } else { 3274 if ((ddb_allocation / plane_blocks_per_line) >= 1) 3275 selected_result = min(method1, method2); 3276 else 3277 selected_result = method1; 3278 } 3279 3280 res_blocks = selected_result + 1; 3281 res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line); 3282 3283 if (level >= 1 && level <= 7) { 3284 if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || 3285 fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) 3286 res_lines += 4; 3287 else 3288 res_blocks++; 3289 } 3290 3291 if (res_blocks >= ddb_allocation || res_lines > 31) 3292 return false; 3293 3294 *out_blocks = res_blocks; 3295 *out_lines = res_lines; 3296 3297 return true; 3298} 3299 3300static void skl_compute_wm_level(const struct drm_i915_private *dev_priv, 3301 struct skl_ddb_allocation *ddb, 3302 struct intel_crtc_state *cstate, 3303 int level, 3304 struct skl_wm_level *result) 3305{ 3306 struct drm_device *dev = dev_priv->dev; 3307 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3308 struct intel_plane *intel_plane; 3309 uint16_t ddb_blocks; 3310 enum pipe pipe = intel_crtc->pipe; 3311 3312 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 3313 int i = skl_wm_plane_id(intel_plane); 3314 3315 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]); 3316 3317 result->plane_en[i] = skl_compute_plane_wm(dev_priv, 3318 cstate, 3319 intel_plane, 3320 ddb_blocks, 3321 level, 3322 &result->plane_res_b[i], 3323 &result->plane_res_l[i]); 3324 } 3325} 3326 3327static uint32_t 3328skl_compute_linetime_wm(struct intel_crtc_state *cstate) 3329{ 3330 if (!cstate->base.active) 3331 return 0; 3332 3333 if (WARN_ON(skl_pipe_pixel_rate(cstate) == 0)) 3334 return 0; 3335 3336 return DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * 1000, 3337 skl_pipe_pixel_rate(cstate)); 3338} 3339 3340static void skl_compute_transition_wm(struct intel_crtc_state *cstate, 3341 struct skl_wm_level *trans_wm /* out */) 3342{ 3343 struct drm_crtc *crtc = cstate->base.crtc; 3344 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3345 struct intel_plane *intel_plane; 3346 3347 if (!cstate->base.active) 3348 return; 3349 3350 /* Until we know more, just disable transition WMs */ 3351 for_each_intel_plane_on_crtc(crtc->dev, intel_crtc, intel_plane) { 3352 int i = skl_wm_plane_id(intel_plane); 3353 3354 trans_wm->plane_en[i] = false; 3355 } 3356} 3357 3358static void skl_compute_pipe_wm(struct intel_crtc_state *cstate, 3359 struct skl_ddb_allocation *ddb, 3360 struct skl_pipe_wm *pipe_wm) 3361{ 3362 struct drm_device *dev = cstate->base.crtc->dev; 3363 const struct drm_i915_private *dev_priv = dev->dev_private; 3364 int level, max_level = ilk_wm_max_level(dev); 3365 3366 for (level = 0; level <= max_level; level++) { 3367 skl_compute_wm_level(dev_priv, ddb, cstate, 3368 level, &pipe_wm->wm[level]); 3369 } 3370 pipe_wm->linetime = skl_compute_linetime_wm(cstate); 3371 3372 skl_compute_transition_wm(cstate, &pipe_wm->trans_wm); 3373} 3374 3375static void skl_compute_wm_results(struct drm_device *dev, 3376 struct skl_pipe_wm *p_wm, 3377 struct skl_wm_values *r, 3378 struct intel_crtc *intel_crtc) 3379{ 3380 int level, max_level = ilk_wm_max_level(dev); 3381 enum pipe pipe = intel_crtc->pipe; 3382 uint32_t temp; 3383 int i; 3384 3385 for (level = 0; level <= max_level; level++) { 3386 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3387 temp = 0; 3388 3389 temp |= p_wm->wm[level].plane_res_l[i] << 3390 PLANE_WM_LINES_SHIFT; 3391 temp |= p_wm->wm[level].plane_res_b[i]; 3392 if (p_wm->wm[level].plane_en[i]) 3393 temp |= PLANE_WM_EN; 3394 3395 r->plane[pipe][i][level] = temp; 3396 } 3397 3398 temp = 0; 3399 3400 temp |= p_wm->wm[level].plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT; 3401 temp |= p_wm->wm[level].plane_res_b[PLANE_CURSOR]; 3402 3403 if (p_wm->wm[level].plane_en[PLANE_CURSOR]) 3404 temp |= PLANE_WM_EN; 3405 3406 r->plane[pipe][PLANE_CURSOR][level] = temp; 3407 3408 } 3409 3410 /* transition WMs */ 3411 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3412 temp = 0; 3413 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT; 3414 temp |= p_wm->trans_wm.plane_res_b[i]; 3415 if (p_wm->trans_wm.plane_en[i]) 3416 temp |= PLANE_WM_EN; 3417 3418 r->plane_trans[pipe][i] = temp; 3419 } 3420 3421 temp = 0; 3422 temp |= p_wm->trans_wm.plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT; 3423 temp |= p_wm->trans_wm.plane_res_b[PLANE_CURSOR]; 3424 if (p_wm->trans_wm.plane_en[PLANE_CURSOR]) 3425 temp |= PLANE_WM_EN; 3426 3427 r->plane_trans[pipe][PLANE_CURSOR] = temp; 3428 3429 r->wm_linetime[pipe] = p_wm->linetime; 3430} 3431 3432static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, 3433 i915_reg_t reg, 3434 const struct skl_ddb_entry *entry) 3435{ 3436 if (entry->end) 3437 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start); 3438 else 3439 I915_WRITE(reg, 0); 3440} 3441 3442static void skl_write_wm_values(struct drm_i915_private *dev_priv, 3443 const struct skl_wm_values *new) 3444{ 3445 struct drm_device *dev = dev_priv->dev; 3446 struct intel_crtc *crtc; 3447 3448 for_each_intel_crtc(dev, crtc) { 3449 int i, level, max_level = ilk_wm_max_level(dev); 3450 enum pipe pipe = crtc->pipe; 3451 3452 if (!new->dirty[pipe]) 3453 continue; 3454 3455 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]); 3456 3457 for (level = 0; level <= max_level; level++) { 3458 for (i = 0; i < intel_num_planes(crtc); i++) 3459 I915_WRITE(PLANE_WM(pipe, i, level), 3460 new->plane[pipe][i][level]); 3461 I915_WRITE(CUR_WM(pipe, level), 3462 new->plane[pipe][PLANE_CURSOR][level]); 3463 } 3464 for (i = 0; i < intel_num_planes(crtc); i++) 3465 I915_WRITE(PLANE_WM_TRANS(pipe, i), 3466 new->plane_trans[pipe][i]); 3467 I915_WRITE(CUR_WM_TRANS(pipe), 3468 new->plane_trans[pipe][PLANE_CURSOR]); 3469 3470 for (i = 0; i < intel_num_planes(crtc); i++) { 3471 skl_ddb_entry_write(dev_priv, 3472 PLANE_BUF_CFG(pipe, i), 3473 &new->ddb.plane[pipe][i]); 3474 skl_ddb_entry_write(dev_priv, 3475 PLANE_NV12_BUF_CFG(pipe, i), 3476 &new->ddb.y_plane[pipe][i]); 3477 } 3478 3479 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), 3480 &new->ddb.plane[pipe][PLANE_CURSOR]); 3481 } 3482} 3483 3484/* 3485 * When setting up a new DDB allocation arrangement, we need to correctly 3486 * sequence the times at which the new allocations for the pipes are taken into 3487 * account or we'll have pipes fetching from space previously allocated to 3488 * another pipe. 3489 * 3490 * Roughly the sequence looks like: 3491 * 1. re-allocate the pipe(s) with the allocation being reduced and not 3492 * overlapping with a previous light-up pipe (another way to put it is: 3493 * pipes with their new allocation strickly included into their old ones). 3494 * 2. re-allocate the other pipes that get their allocation reduced 3495 * 3. allocate the pipes having their allocation increased 3496 * 3497 * Steps 1. and 2. are here to take care of the following case: 3498 * - Initially DDB looks like this: 3499 * | B | C | 3500 * - enable pipe A. 3501 * - pipe B has a reduced DDB allocation that overlaps with the old pipe C 3502 * allocation 3503 * | A | B | C | 3504 * 3505 * We need to sequence the re-allocation: C, B, A (and not B, C, A). 3506 */ 3507 3508static void 3509skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass) 3510{ 3511 int plane; 3512 3513 DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass); 3514 3515 for_each_plane(dev_priv, pipe, plane) { 3516 I915_WRITE(PLANE_SURF(pipe, plane), 3517 I915_READ(PLANE_SURF(pipe, plane))); 3518 } 3519 I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe))); 3520} 3521 3522static bool 3523skl_ddb_allocation_included(const struct skl_ddb_allocation *old, 3524 const struct skl_ddb_allocation *new, 3525 enum pipe pipe) 3526{ 3527 uint16_t old_size, new_size; 3528 3529 old_size = skl_ddb_entry_size(&old->pipe[pipe]); 3530 new_size = skl_ddb_entry_size(&new->pipe[pipe]); 3531 3532 return old_size != new_size && 3533 new->pipe[pipe].start >= old->pipe[pipe].start && 3534 new->pipe[pipe].end <= old->pipe[pipe].end; 3535} 3536 3537static void skl_flush_wm_values(struct drm_i915_private *dev_priv, 3538 struct skl_wm_values *new_values) 3539{ 3540 struct drm_device *dev = dev_priv->dev; 3541 struct skl_ddb_allocation *cur_ddb, *new_ddb; 3542 bool reallocated[I915_MAX_PIPES] = {}; 3543 struct intel_crtc *crtc; 3544 enum pipe pipe; 3545 3546 new_ddb = &new_values->ddb; 3547 cur_ddb = &dev_priv->wm.skl_hw.ddb; 3548 3549 /* 3550 * First pass: flush the pipes with the new allocation contained into 3551 * the old space. 3552 * 3553 * We'll wait for the vblank on those pipes to ensure we can safely 3554 * re-allocate the freed space without this pipe fetching from it. 3555 */ 3556 for_each_intel_crtc(dev, crtc) { 3557 if (!crtc->active) 3558 continue; 3559 3560 pipe = crtc->pipe; 3561 3562 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe)) 3563 continue; 3564 3565 skl_wm_flush_pipe(dev_priv, pipe, 1); 3566 intel_wait_for_vblank(dev, pipe); 3567 3568 reallocated[pipe] = true; 3569 } 3570 3571 3572 /* 3573 * Second pass: flush the pipes that are having their allocation 3574 * reduced, but overlapping with a previous allocation. 3575 * 3576 * Here as well we need to wait for the vblank to make sure the freed 3577 * space is not used anymore. 3578 */ 3579 for_each_intel_crtc(dev, crtc) { 3580 if (!crtc->active) 3581 continue; 3582 3583 pipe = crtc->pipe; 3584 3585 if (reallocated[pipe]) 3586 continue; 3587 3588 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) < 3589 skl_ddb_entry_size(&cur_ddb->pipe[pipe])) { 3590 skl_wm_flush_pipe(dev_priv, pipe, 2); 3591 intel_wait_for_vblank(dev, pipe); 3592 reallocated[pipe] = true; 3593 } 3594 } 3595 3596 /* 3597 * Third pass: flush the pipes that got more space allocated. 3598 * 3599 * We don't need to actively wait for the update here, next vblank 3600 * will just get more DDB space with the correct WM values. 3601 */ 3602 for_each_intel_crtc(dev, crtc) { 3603 if (!crtc->active) 3604 continue; 3605 3606 pipe = crtc->pipe; 3607 3608 /* 3609 * At this point, only the pipes more space than before are 3610 * left to re-allocate. 3611 */ 3612 if (reallocated[pipe]) 3613 continue; 3614 3615 skl_wm_flush_pipe(dev_priv, pipe, 3); 3616 } 3617} 3618 3619static bool skl_update_pipe_wm(struct drm_crtc *crtc, 3620 struct skl_ddb_allocation *ddb, /* out */ 3621 struct skl_pipe_wm *pipe_wm /* out */) 3622{ 3623 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3624 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 3625 3626 skl_allocate_pipe_ddb(cstate, ddb); 3627 skl_compute_pipe_wm(cstate, ddb, pipe_wm); 3628 3629 if (!memcmp(&intel_crtc->wm.active.skl, pipe_wm, sizeof(*pipe_wm))) 3630 return false; 3631 3632 intel_crtc->wm.active.skl = *pipe_wm; 3633 3634 return true; 3635} 3636 3637static void skl_update_other_pipe_wm(struct drm_device *dev, 3638 struct drm_crtc *crtc, 3639 struct skl_wm_values *r) 3640{ 3641 struct intel_crtc *intel_crtc; 3642 struct intel_crtc *this_crtc = to_intel_crtc(crtc); 3643 3644 /* 3645 * If the WM update hasn't changed the allocation for this_crtc (the 3646 * crtc we are currently computing the new WM values for), other 3647 * enabled crtcs will keep the same allocation and we don't need to 3648 * recompute anything for them. 3649 */ 3650 if (!skl_ddb_allocation_changed(&r->ddb, this_crtc)) 3651 return; 3652 3653 /* 3654 * Otherwise, because of this_crtc being freshly enabled/disabled, the 3655 * other active pipes need new DDB allocation and WM values. 3656 */ 3657 for_each_intel_crtc(dev, intel_crtc) { 3658 struct skl_pipe_wm pipe_wm = {}; 3659 bool wm_changed; 3660 3661 if (this_crtc->pipe == intel_crtc->pipe) 3662 continue; 3663 3664 if (!intel_crtc->active) 3665 continue; 3666 3667 wm_changed = skl_update_pipe_wm(&intel_crtc->base, 3668 &r->ddb, &pipe_wm); 3669 3670 /* 3671 * If we end up re-computing the other pipe WM values, it's 3672 * because it was really needed, so we expect the WM values to 3673 * be different. 3674 */ 3675 WARN_ON(!wm_changed); 3676 3677 skl_compute_wm_results(dev, &pipe_wm, r, intel_crtc); 3678 r->dirty[intel_crtc->pipe] = true; 3679 } 3680} 3681 3682static void skl_clear_wm(struct skl_wm_values *watermarks, enum pipe pipe) 3683{ 3684 watermarks->wm_linetime[pipe] = 0; 3685 memset(watermarks->plane[pipe], 0, 3686 sizeof(uint32_t) * 8 * I915_MAX_PLANES); 3687 memset(watermarks->plane_trans[pipe], 3688 0, sizeof(uint32_t) * I915_MAX_PLANES); 3689 watermarks->plane_trans[pipe][PLANE_CURSOR] = 0; 3690 3691 /* Clear ddb entries for pipe */ 3692 memset(&watermarks->ddb.pipe[pipe], 0, sizeof(struct skl_ddb_entry)); 3693 memset(&watermarks->ddb.plane[pipe], 0, 3694 sizeof(struct skl_ddb_entry) * I915_MAX_PLANES); 3695 memset(&watermarks->ddb.y_plane[pipe], 0, 3696 sizeof(struct skl_ddb_entry) * I915_MAX_PLANES); 3697 memset(&watermarks->ddb.plane[pipe][PLANE_CURSOR], 0, 3698 sizeof(struct skl_ddb_entry)); 3699 3700} 3701 3702static void skl_update_wm(struct drm_crtc *crtc) 3703{ 3704 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3705 struct drm_device *dev = crtc->dev; 3706 struct drm_i915_private *dev_priv = dev->dev_private; 3707 struct skl_wm_values *results = &dev_priv->wm.skl_results; 3708 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 3709 struct skl_pipe_wm *pipe_wm = &cstate->wm.optimal.skl; 3710 3711 3712 /* Clear all dirty flags */ 3713 memset(results->dirty, 0, sizeof(bool) * I915_MAX_PIPES); 3714 3715 skl_clear_wm(results, intel_crtc->pipe); 3716 3717 if (!skl_update_pipe_wm(crtc, &results->ddb, pipe_wm)) 3718 return; 3719 3720 skl_compute_wm_results(dev, pipe_wm, results, intel_crtc); 3721 results->dirty[intel_crtc->pipe] = true; 3722 3723 skl_update_other_pipe_wm(dev, crtc, results); 3724 skl_write_wm_values(dev_priv, results); 3725 skl_flush_wm_values(dev_priv, results); 3726 3727 /* store the new configuration */ 3728 dev_priv->wm.skl_hw = *results; 3729} 3730 3731static void ilk_compute_wm_config(struct drm_device *dev, 3732 struct intel_wm_config *config) 3733{ 3734 struct intel_crtc *crtc; 3735 3736 /* Compute the currently _active_ config */ 3737 for_each_intel_crtc(dev, crtc) { 3738 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk; 3739 3740 if (!wm->pipe_enabled) 3741 continue; 3742 3743 config->sprites_enabled |= wm->sprites_enabled; 3744 config->sprites_scaled |= wm->sprites_scaled; 3745 config->num_pipes_active++; 3746 } 3747} 3748 3749static void ilk_program_watermarks(struct drm_i915_private *dev_priv) 3750{ 3751 struct drm_device *dev = dev_priv->dev; 3752 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; 3753 struct ilk_wm_maximums max; 3754 struct intel_wm_config config = {}; 3755 struct ilk_wm_values results = {}; 3756 enum intel_ddb_partitioning partitioning; 3757 3758 ilk_compute_wm_config(dev, &config); 3759 3760 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); 3761 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); 3762 3763 /* 5/6 split only in single pipe config on IVB+ */ 3764 if (INTEL_INFO(dev)->gen >= 7 && 3765 config.num_pipes_active == 1 && config.sprites_enabled) { 3766 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); 3767 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); 3768 3769 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); 3770 } else { 3771 best_lp_wm = &lp_wm_1_2; 3772 } 3773 3774 partitioning = (best_lp_wm == &lp_wm_1_2) ? 3775 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; 3776 3777 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); 3778 3779 ilk_write_wm_values(dev_priv, &results); 3780} 3781 3782static void ilk_initial_watermarks(struct intel_crtc_state *cstate) 3783{ 3784 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); 3785 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3786 3787 mutex_lock(&dev_priv->wm.wm_mutex); 3788 intel_crtc->wm.active.ilk = cstate->wm.intermediate; 3789 ilk_program_watermarks(dev_priv); 3790 mutex_unlock(&dev_priv->wm.wm_mutex); 3791} 3792 3793static void ilk_optimize_watermarks(struct intel_crtc_state *cstate) 3794{ 3795 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); 3796 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3797 3798 mutex_lock(&dev_priv->wm.wm_mutex); 3799 if (cstate->wm.need_postvbl_update) { 3800 intel_crtc->wm.active.ilk = cstate->wm.optimal.ilk; 3801 ilk_program_watermarks(dev_priv); 3802 } 3803 mutex_unlock(&dev_priv->wm.wm_mutex); 3804} 3805 3806static void skl_pipe_wm_active_state(uint32_t val, 3807 struct skl_pipe_wm *active, 3808 bool is_transwm, 3809 bool is_cursor, 3810 int i, 3811 int level) 3812{ 3813 bool is_enabled = (val & PLANE_WM_EN) != 0; 3814 3815 if (!is_transwm) { 3816 if (!is_cursor) { 3817 active->wm[level].plane_en[i] = is_enabled; 3818 active->wm[level].plane_res_b[i] = 3819 val & PLANE_WM_BLOCKS_MASK; 3820 active->wm[level].plane_res_l[i] = 3821 (val >> PLANE_WM_LINES_SHIFT) & 3822 PLANE_WM_LINES_MASK; 3823 } else { 3824 active->wm[level].plane_en[PLANE_CURSOR] = is_enabled; 3825 active->wm[level].plane_res_b[PLANE_CURSOR] = 3826 val & PLANE_WM_BLOCKS_MASK; 3827 active->wm[level].plane_res_l[PLANE_CURSOR] = 3828 (val >> PLANE_WM_LINES_SHIFT) & 3829 PLANE_WM_LINES_MASK; 3830 } 3831 } else { 3832 if (!is_cursor) { 3833 active->trans_wm.plane_en[i] = is_enabled; 3834 active->trans_wm.plane_res_b[i] = 3835 val & PLANE_WM_BLOCKS_MASK; 3836 active->trans_wm.plane_res_l[i] = 3837 (val >> PLANE_WM_LINES_SHIFT) & 3838 PLANE_WM_LINES_MASK; 3839 } else { 3840 active->trans_wm.plane_en[PLANE_CURSOR] = is_enabled; 3841 active->trans_wm.plane_res_b[PLANE_CURSOR] = 3842 val & PLANE_WM_BLOCKS_MASK; 3843 active->trans_wm.plane_res_l[PLANE_CURSOR] = 3844 (val >> PLANE_WM_LINES_SHIFT) & 3845 PLANE_WM_LINES_MASK; 3846 } 3847 } 3848} 3849 3850static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc) 3851{ 3852 struct drm_device *dev = crtc->dev; 3853 struct drm_i915_private *dev_priv = dev->dev_private; 3854 struct skl_wm_values *hw = &dev_priv->wm.skl_hw; 3855 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3856 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 3857 struct skl_pipe_wm *active = &cstate->wm.optimal.skl; 3858 enum pipe pipe = intel_crtc->pipe; 3859 int level, i, max_level; 3860 uint32_t temp; 3861 3862 max_level = ilk_wm_max_level(dev); 3863 3864 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 3865 3866 for (level = 0; level <= max_level; level++) { 3867 for (i = 0; i < intel_num_planes(intel_crtc); i++) 3868 hw->plane[pipe][i][level] = 3869 I915_READ(PLANE_WM(pipe, i, level)); 3870 hw->plane[pipe][PLANE_CURSOR][level] = I915_READ(CUR_WM(pipe, level)); 3871 } 3872 3873 for (i = 0; i < intel_num_planes(intel_crtc); i++) 3874 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i)); 3875 hw->plane_trans[pipe][PLANE_CURSOR] = I915_READ(CUR_WM_TRANS(pipe)); 3876 3877 if (!intel_crtc->active) 3878 return; 3879 3880 hw->dirty[pipe] = true; 3881 3882 active->linetime = hw->wm_linetime[pipe]; 3883 3884 for (level = 0; level <= max_level; level++) { 3885 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3886 temp = hw->plane[pipe][i][level]; 3887 skl_pipe_wm_active_state(temp, active, false, 3888 false, i, level); 3889 } 3890 temp = hw->plane[pipe][PLANE_CURSOR][level]; 3891 skl_pipe_wm_active_state(temp, active, false, true, i, level); 3892 } 3893 3894 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3895 temp = hw->plane_trans[pipe][i]; 3896 skl_pipe_wm_active_state(temp, active, true, false, i, 0); 3897 } 3898 3899 temp = hw->plane_trans[pipe][PLANE_CURSOR]; 3900 skl_pipe_wm_active_state(temp, active, true, true, i, 0); 3901 3902 intel_crtc->wm.active.skl = *active; 3903} 3904 3905void skl_wm_get_hw_state(struct drm_device *dev) 3906{ 3907 struct drm_i915_private *dev_priv = dev->dev_private; 3908 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; 3909 struct drm_crtc *crtc; 3910 3911 skl_ddb_get_hw_state(dev_priv, ddb); 3912 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) 3913 skl_pipe_wm_get_hw_state(crtc); 3914} 3915 3916static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) 3917{ 3918 struct drm_device *dev = crtc->dev; 3919 struct drm_i915_private *dev_priv = dev->dev_private; 3920 struct ilk_wm_values *hw = &dev_priv->wm.hw; 3921 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3922 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 3923 struct intel_pipe_wm *active = &cstate->wm.optimal.ilk; 3924 enum pipe pipe = intel_crtc->pipe; 3925 static const i915_reg_t wm0_pipe_reg[] = { 3926 [PIPE_A] = WM0_PIPEA_ILK, 3927 [PIPE_B] = WM0_PIPEB_ILK, 3928 [PIPE_C] = WM0_PIPEC_IVB, 3929 }; 3930 3931 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); 3932 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 3933 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 3934 3935 memset(active, 0, sizeof(*active)); 3936 3937 active->pipe_enabled = intel_crtc->active; 3938 3939 if (active->pipe_enabled) { 3940 u32 tmp = hw->wm_pipe[pipe]; 3941 3942 /* 3943 * For active pipes LP0 watermark is marked as 3944 * enabled, and LP1+ watermaks as disabled since 3945 * we can't really reverse compute them in case 3946 * multiple pipes are active. 3947 */ 3948 active->wm[0].enable = true; 3949 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; 3950 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; 3951 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; 3952 active->linetime = hw->wm_linetime[pipe]; 3953 } else { 3954 int level, max_level = ilk_wm_max_level(dev); 3955 3956 /* 3957 * For inactive pipes, all watermark levels 3958 * should be marked as enabled but zeroed, 3959 * which is what we'd compute them to. 3960 */ 3961 for (level = 0; level <= max_level; level++) 3962 active->wm[level].enable = true; 3963 } 3964 3965 intel_crtc->wm.active.ilk = *active; 3966} 3967 3968#define _FW_WM(value, plane) \ 3969 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT) 3970#define _FW_WM_VLV(value, plane) \ 3971 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT) 3972 3973static void vlv_read_wm_values(struct drm_i915_private *dev_priv, 3974 struct vlv_wm_values *wm) 3975{ 3976 enum pipe pipe; 3977 uint32_t tmp; 3978 3979 for_each_pipe(dev_priv, pipe) { 3980 tmp = I915_READ(VLV_DDL(pipe)); 3981 3982 wm->ddl[pipe].primary = 3983 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3984 wm->ddl[pipe].cursor = 3985 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3986 wm->ddl[pipe].sprite[0] = 3987 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3988 wm->ddl[pipe].sprite[1] = 3989 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3990 } 3991 3992 tmp = I915_READ(DSPFW1); 3993 wm->sr.plane = _FW_WM(tmp, SR); 3994 wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB); 3995 wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB); 3996 wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA); 3997 3998 tmp = I915_READ(DSPFW2); 3999 wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB); 4000 wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA); 4001 wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA); 4002 4003 tmp = I915_READ(DSPFW3); 4004 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR); 4005 4006 if (IS_CHERRYVIEW(dev_priv)) { 4007 tmp = I915_READ(DSPFW7_CHV); 4008 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED); 4009 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC); 4010 4011 tmp = I915_READ(DSPFW8_CHV); 4012 wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF); 4013 wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE); 4014 4015 tmp = I915_READ(DSPFW9_CHV); 4016 wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC); 4017 wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC); 4018 4019 tmp = I915_READ(DSPHOWM); 4020 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 4021 wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8; 4022 wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8; 4023 wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8; 4024 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8; 4025 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 4026 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8; 4027 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 4028 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 4029 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8; 4030 } else { 4031 tmp = I915_READ(DSPFW7); 4032 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED); 4033 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC); 4034 4035 tmp = I915_READ(DSPHOWM); 4036 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 4037 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8; 4038 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 4039 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8; 4040 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 4041 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 4042 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8; 4043 } 4044} 4045 4046#undef _FW_WM 4047#undef _FW_WM_VLV 4048 4049void vlv_wm_get_hw_state(struct drm_device *dev) 4050{ 4051 struct drm_i915_private *dev_priv = to_i915(dev); 4052 struct vlv_wm_values *wm = &dev_priv->wm.vlv; 4053 struct intel_plane *plane; 4054 enum pipe pipe; 4055 u32 val; 4056 4057 vlv_read_wm_values(dev_priv, wm); 4058 4059 for_each_intel_plane(dev, plane) { 4060 switch (plane->base.type) { 4061 int sprite; 4062 case DRM_PLANE_TYPE_CURSOR: 4063 plane->wm.fifo_size = 63; 4064 break; 4065 case DRM_PLANE_TYPE_PRIMARY: 4066 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0); 4067 break; 4068 case DRM_PLANE_TYPE_OVERLAY: 4069 sprite = plane->plane; 4070 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1); 4071 break; 4072 } 4073 } 4074 4075 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; 4076 wm->level = VLV_WM_LEVEL_PM2; 4077 4078 if (IS_CHERRYVIEW(dev_priv)) { 4079 mutex_lock(&dev_priv->rps.hw_lock); 4080 4081 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 4082 if (val & DSP_MAXFIFO_PM5_ENABLE) 4083 wm->level = VLV_WM_LEVEL_PM5; 4084 4085 /* 4086 * If DDR DVFS is disabled in the BIOS, Punit 4087 * will never ack the request. So if that happens 4088 * assume we don't have to enable/disable DDR DVFS 4089 * dynamically. To test that just set the REQ_ACK 4090 * bit to poke the Punit, but don't change the 4091 * HIGH/LOW bits so that we don't actually change 4092 * the current state. 4093 */ 4094 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 4095 val |= FORCE_DDR_FREQ_REQ_ACK; 4096 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 4097 4098 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 4099 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) { 4100 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, " 4101 "assuming DDR DVFS is disabled\n"); 4102 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5; 4103 } else { 4104 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 4105 if ((val & FORCE_DDR_HIGH_FREQ) == 0) 4106 wm->level = VLV_WM_LEVEL_DDR_DVFS; 4107 } 4108 4109 mutex_unlock(&dev_priv->rps.hw_lock); 4110 } 4111 4112 for_each_pipe(dev_priv, pipe) 4113 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", 4114 pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor, 4115 wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]); 4116 4117 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", 4118 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); 4119} 4120 4121void ilk_wm_get_hw_state(struct drm_device *dev) 4122{ 4123 struct drm_i915_private *dev_priv = dev->dev_private; 4124 struct ilk_wm_values *hw = &dev_priv->wm.hw; 4125 struct drm_crtc *crtc; 4126 4127 for_each_crtc(dev, crtc) 4128 ilk_pipe_wm_get_hw_state(crtc); 4129 4130 hw->wm_lp[0] = I915_READ(WM1_LP_ILK); 4131 hw->wm_lp[1] = I915_READ(WM2_LP_ILK); 4132 hw->wm_lp[2] = I915_READ(WM3_LP_ILK); 4133 4134 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); 4135 if (INTEL_INFO(dev)->gen >= 7) { 4136 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); 4137 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); 4138 } 4139 4140 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4141 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? 4142 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4143 else if (IS_IVYBRIDGE(dev)) 4144 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? 4145 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4146 4147 hw->enable_fbc_wm = 4148 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); 4149} 4150 4151/** 4152 * intel_update_watermarks - update FIFO watermark values based on current modes 4153 * 4154 * Calculate watermark values for the various WM regs based on current mode 4155 * and plane configuration. 4156 * 4157 * There are several cases to deal with here: 4158 * - normal (i.e. non-self-refresh) 4159 * - self-refresh (SR) mode 4160 * - lines are large relative to FIFO size (buffer can hold up to 2) 4161 * - lines are small relative to FIFO size (buffer can hold more than 2 4162 * lines), so need to account for TLB latency 4163 * 4164 * The normal calculation is: 4165 * watermark = dotclock * bytes per pixel * latency 4166 * where latency is platform & configuration dependent (we assume pessimal 4167 * values here). 4168 * 4169 * The SR calculation is: 4170 * watermark = (trunc(latency/line time)+1) * surface width * 4171 * bytes per pixel 4172 * where 4173 * line time = htotal / dotclock 4174 * surface width = hdisplay for normal plane and 64 for cursor 4175 * and latency is assumed to be high, as above. 4176 * 4177 * The final value programmed to the register should always be rounded up, 4178 * and include an extra 2 entries to account for clock crossings. 4179 * 4180 * We don't use the sprite, so we can ignore that. And on Crestline we have 4181 * to set the non-SR watermarks to 8. 4182 */ 4183void intel_update_watermarks(struct drm_crtc *crtc) 4184{ 4185 struct drm_i915_private *dev_priv = crtc->dev->dev_private; 4186 4187 if (dev_priv->display.update_wm) 4188 dev_priv->display.update_wm(crtc); 4189} 4190 4191/* 4192 * Lock protecting IPS related data structures 4193 */ 4194DEFINE_SPINLOCK(mchdev_lock); 4195 4196/* Global for IPS driver to get at the current i915 device. Protected by 4197 * mchdev_lock. */ 4198static struct drm_i915_private *i915_mch_dev; 4199 4200bool ironlake_set_drps(struct drm_device *dev, u8 val) 4201{ 4202 struct drm_i915_private *dev_priv = dev->dev_private; 4203 u16 rgvswctl; 4204 4205 assert_spin_locked(&mchdev_lock); 4206 4207 rgvswctl = I915_READ16(MEMSWCTL); 4208 if (rgvswctl & MEMCTL_CMD_STS) { 4209 DRM_DEBUG("gpu busy, RCS change rejected\n"); 4210 return false; /* still busy with another command */ 4211 } 4212 4213 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 4214 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; 4215 I915_WRITE16(MEMSWCTL, rgvswctl); 4216 POSTING_READ16(MEMSWCTL); 4217 4218 rgvswctl |= MEMCTL_CMD_STS; 4219 I915_WRITE16(MEMSWCTL, rgvswctl); 4220 4221 return true; 4222} 4223 4224static void ironlake_enable_drps(struct drm_device *dev) 4225{ 4226 struct drm_i915_private *dev_priv = dev->dev_private; 4227 u32 rgvmodectl; 4228 u8 fmax, fmin, fstart, vstart; 4229 4230 spin_lock_irq(&mchdev_lock); 4231 4232 rgvmodectl = I915_READ(MEMMODECTL); 4233 4234 /* Enable temp reporting */ 4235 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); 4236 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); 4237 4238 /* 100ms RC evaluation intervals */ 4239 I915_WRITE(RCUPEI, 100000); 4240 I915_WRITE(RCDNEI, 100000); 4241 4242 /* Set max/min thresholds to 90ms and 80ms respectively */ 4243 I915_WRITE(RCBMAXAVG, 90000); 4244 I915_WRITE(RCBMINAVG, 80000); 4245 4246 I915_WRITE(MEMIHYST, 1); 4247 4248 /* Set up min, max, and cur for interrupt handling */ 4249 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 4250 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 4251 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 4252 MEMMODE_FSTART_SHIFT; 4253 4254 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> 4255 PXVFREQ_PX_SHIFT; 4256 4257 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ 4258 dev_priv->ips.fstart = fstart; 4259 4260 dev_priv->ips.max_delay = fstart; 4261 dev_priv->ips.min_delay = fmin; 4262 dev_priv->ips.cur_delay = fstart; 4263 4264 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", 4265 fmax, fmin, fstart); 4266 4267 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 4268 4269 /* 4270 * Interrupts will be enabled in ironlake_irq_postinstall 4271 */ 4272 4273 I915_WRITE(VIDSTART, vstart); 4274 POSTING_READ(VIDSTART); 4275 4276 rgvmodectl |= MEMMODE_SWMODE_EN; 4277 I915_WRITE(MEMMODECTL, rgvmodectl); 4278 4279 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) 4280 DRM_ERROR("stuck trying to change perf mode\n"); 4281 mdelay(1); 4282 4283 ironlake_set_drps(dev, fstart); 4284 4285 dev_priv->ips.last_count1 = I915_READ(DMIEC) + 4286 I915_READ(DDREC) + I915_READ(CSIEC); 4287 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); 4288 dev_priv->ips.last_count2 = I915_READ(GFXEC); 4289 dev_priv->ips.last_time2 = ktime_get_raw_ns(); 4290 4291 spin_unlock_irq(&mchdev_lock); 4292} 4293 4294static void ironlake_disable_drps(struct drm_device *dev) 4295{ 4296 struct drm_i915_private *dev_priv = dev->dev_private; 4297 u16 rgvswctl; 4298 4299 spin_lock_irq(&mchdev_lock); 4300 4301 rgvswctl = I915_READ16(MEMSWCTL); 4302 4303 /* Ack interrupts, disable EFC interrupt */ 4304 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); 4305 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); 4306 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); 4307 I915_WRITE(DEIIR, DE_PCU_EVENT); 4308 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); 4309 4310 /* Go back to the starting frequency */ 4311 ironlake_set_drps(dev, dev_priv->ips.fstart); 4312 mdelay(1); 4313 rgvswctl |= MEMCTL_CMD_STS; 4314 I915_WRITE(MEMSWCTL, rgvswctl); 4315 mdelay(1); 4316 4317 spin_unlock_irq(&mchdev_lock); 4318} 4319 4320/* There's a funny hw issue where the hw returns all 0 when reading from 4321 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value 4322 * ourselves, instead of doing a rmw cycle (which might result in us clearing 4323 * all limits and the gpu stuck at whatever frequency it is at atm). 4324 */ 4325static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) 4326{ 4327 u32 limits; 4328 4329 /* Only set the down limit when we've reached the lowest level to avoid 4330 * getting more interrupts, otherwise leave this clear. This prevents a 4331 * race in the hw when coming out of rc6: There's a tiny window where 4332 * the hw runs at the minimal clock before selecting the desired 4333 * frequency, if the down threshold expires in that window we will not 4334 * receive a down interrupt. */ 4335 if (IS_GEN9(dev_priv)) { 4336 limits = (dev_priv->rps.max_freq_softlimit) << 23; 4337 if (val <= dev_priv->rps.min_freq_softlimit) 4338 limits |= (dev_priv->rps.min_freq_softlimit) << 14; 4339 } else { 4340 limits = dev_priv->rps.max_freq_softlimit << 24; 4341 if (val <= dev_priv->rps.min_freq_softlimit) 4342 limits |= dev_priv->rps.min_freq_softlimit << 16; 4343 } 4344 4345 return limits; 4346} 4347 4348static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) 4349{ 4350 int new_power; 4351 u32 threshold_up = 0, threshold_down = 0; /* in % */ 4352 u32 ei_up = 0, ei_down = 0; 4353 4354 new_power = dev_priv->rps.power; 4355 switch (dev_priv->rps.power) { 4356 case LOW_POWER: 4357 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq) 4358 new_power = BETWEEN; 4359 break; 4360 4361 case BETWEEN: 4362 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq) 4363 new_power = LOW_POWER; 4364 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq) 4365 new_power = HIGH_POWER; 4366 break; 4367 4368 case HIGH_POWER: 4369 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq) 4370 new_power = BETWEEN; 4371 break; 4372 } 4373 /* Max/min bins are special */ 4374 if (val <= dev_priv->rps.min_freq_softlimit) 4375 new_power = LOW_POWER; 4376 if (val >= dev_priv->rps.max_freq_softlimit) 4377 new_power = HIGH_POWER; 4378 if (new_power == dev_priv->rps.power) 4379 return; 4380 4381 /* Note the units here are not exactly 1us, but 1280ns. */ 4382 switch (new_power) { 4383 case LOW_POWER: 4384 /* Upclock if more than 95% busy over 16ms */ 4385 ei_up = 16000; 4386 threshold_up = 95; 4387 4388 /* Downclock if less than 85% busy over 32ms */ 4389 ei_down = 32000; 4390 threshold_down = 85; 4391 break; 4392 4393 case BETWEEN: 4394 /* Upclock if more than 90% busy over 13ms */ 4395 ei_up = 13000; 4396 threshold_up = 90; 4397 4398 /* Downclock if less than 75% busy over 32ms */ 4399 ei_down = 32000; 4400 threshold_down = 75; 4401 break; 4402 4403 case HIGH_POWER: 4404 /* Upclock if more than 85% busy over 10ms */ 4405 ei_up = 10000; 4406 threshold_up = 85; 4407 4408 /* Downclock if less than 60% busy over 32ms */ 4409 ei_down = 32000; 4410 threshold_down = 60; 4411 break; 4412 } 4413 4414 I915_WRITE(GEN6_RP_UP_EI, 4415 GT_INTERVAL_FROM_US(dev_priv, ei_up)); 4416 I915_WRITE(GEN6_RP_UP_THRESHOLD, 4417 GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100))); 4418 4419 I915_WRITE(GEN6_RP_DOWN_EI, 4420 GT_INTERVAL_FROM_US(dev_priv, ei_down)); 4421 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 4422 GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100))); 4423 4424 I915_WRITE(GEN6_RP_CONTROL, 4425 GEN6_RP_MEDIA_TURBO | 4426 GEN6_RP_MEDIA_HW_NORMAL_MODE | 4427 GEN6_RP_MEDIA_IS_GFX | 4428 GEN6_RP_ENABLE | 4429 GEN6_RP_UP_BUSY_AVG | 4430 GEN6_RP_DOWN_IDLE_AVG); 4431 4432 dev_priv->rps.power = new_power; 4433 dev_priv->rps.up_threshold = threshold_up; 4434 dev_priv->rps.down_threshold = threshold_down; 4435 dev_priv->rps.last_adj = 0; 4436} 4437 4438static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) 4439{ 4440 u32 mask = 0; 4441 4442 if (val > dev_priv->rps.min_freq_softlimit) 4443 mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; 4444 if (val < dev_priv->rps.max_freq_softlimit) 4445 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 4446 4447 mask &= dev_priv->pm_rps_events; 4448 4449 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); 4450} 4451 4452/* gen6_set_rps is called to update the frequency request, but should also be 4453 * called when the range (min_delay and max_delay) is modified so that we can 4454 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ 4455static void gen6_set_rps(struct drm_device *dev, u8 val) 4456{ 4457 struct drm_i915_private *dev_priv = dev->dev_private; 4458 4459 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ 4460 if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) 4461 return; 4462 4463 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4464 WARN_ON(val > dev_priv->rps.max_freq); 4465 WARN_ON(val < dev_priv->rps.min_freq); 4466 4467 /* min/max delay may still have been modified so be sure to 4468 * write the limits value. 4469 */ 4470 if (val != dev_priv->rps.cur_freq) { 4471 gen6_set_rps_thresholds(dev_priv, val); 4472 4473 if (IS_GEN9(dev)) 4474 I915_WRITE(GEN6_RPNSWREQ, 4475 GEN9_FREQUENCY(val)); 4476 else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4477 I915_WRITE(GEN6_RPNSWREQ, 4478 HSW_FREQUENCY(val)); 4479 else 4480 I915_WRITE(GEN6_RPNSWREQ, 4481 GEN6_FREQUENCY(val) | 4482 GEN6_OFFSET(0) | 4483 GEN6_AGGRESSIVE_TURBO); 4484 } 4485 4486 /* Make sure we continue to get interrupts 4487 * until we hit the minimum or maximum frequencies. 4488 */ 4489 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); 4490 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4491 4492 POSTING_READ(GEN6_RPNSWREQ); 4493 4494 dev_priv->rps.cur_freq = val; 4495 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 4496} 4497 4498static void valleyview_set_rps(struct drm_device *dev, u8 val) 4499{ 4500 struct drm_i915_private *dev_priv = dev->dev_private; 4501 4502 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4503 WARN_ON(val > dev_priv->rps.max_freq); 4504 WARN_ON(val < dev_priv->rps.min_freq); 4505 4506 if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), 4507 "Odd GPU freq value\n")) 4508 val &= ~1; 4509 4510 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4511 4512 if (val != dev_priv->rps.cur_freq) { 4513 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); 4514 if (!IS_CHERRYVIEW(dev_priv)) 4515 gen6_set_rps_thresholds(dev_priv, val); 4516 } 4517 4518 dev_priv->rps.cur_freq = val; 4519 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 4520} 4521 4522/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down 4523 * 4524 * * If Gfx is Idle, then 4525 * 1. Forcewake Media well. 4526 * 2. Request idle freq. 4527 * 3. Release Forcewake of Media well. 4528*/ 4529static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) 4530{ 4531 u32 val = dev_priv->rps.idle_freq; 4532 4533 if (dev_priv->rps.cur_freq <= val) 4534 return; 4535 4536 /* Wake up the media well, as that takes a lot less 4537 * power than the Render well. */ 4538 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); 4539 valleyview_set_rps(dev_priv->dev, val); 4540 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); 4541} 4542 4543void gen6_rps_busy(struct drm_i915_private *dev_priv) 4544{ 4545 mutex_lock(&dev_priv->rps.hw_lock); 4546 if (dev_priv->rps.enabled) { 4547 if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) 4548 gen6_rps_reset_ei(dev_priv); 4549 I915_WRITE(GEN6_PMINTRMSK, 4550 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); 4551 } 4552 mutex_unlock(&dev_priv->rps.hw_lock); 4553} 4554 4555void gen6_rps_idle(struct drm_i915_private *dev_priv) 4556{ 4557 struct drm_device *dev = dev_priv->dev; 4558 4559 mutex_lock(&dev_priv->rps.hw_lock); 4560 if (dev_priv->rps.enabled) { 4561 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) 4562 vlv_set_rps_idle(dev_priv); 4563 else 4564 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 4565 dev_priv->rps.last_adj = 0; 4566 I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); 4567 } 4568 mutex_unlock(&dev_priv->rps.hw_lock); 4569 4570 spin_lock(&dev_priv->rps.client_lock); 4571 while (!list_empty(&dev_priv->rps.clients)) 4572 list_del_init(dev_priv->rps.clients.next); 4573 spin_unlock(&dev_priv->rps.client_lock); 4574} 4575 4576void gen6_rps_boost(struct drm_i915_private *dev_priv, 4577 struct intel_rps_client *rps, 4578 unsigned long submitted) 4579{ 4580 /* This is intentionally racy! We peek at the state here, then 4581 * validate inside the RPS worker. 4582 */ 4583 if (!(dev_priv->mm.busy && 4584 dev_priv->rps.enabled && 4585 dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) 4586 return; 4587 4588 /* Force a RPS boost (and don't count it against the client) if 4589 * the GPU is severely congested. 4590 */ 4591 if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) 4592 rps = NULL; 4593 4594 spin_lock(&dev_priv->rps.client_lock); 4595 if (rps == NULL || list_empty(&rps->link)) { 4596 spin_lock_irq(&dev_priv->irq_lock); 4597 if (dev_priv->rps.interrupts_enabled) { 4598 dev_priv->rps.client_boost = true; 4599 queue_work(dev_priv->wq, &dev_priv->rps.work); 4600 } 4601 spin_unlock_irq(&dev_priv->irq_lock); 4602 4603 if (rps != NULL) { 4604 list_add(&rps->link, &dev_priv->rps.clients); 4605 rps->boosts++; 4606 } else 4607 dev_priv->rps.boosts++; 4608 } 4609 spin_unlock(&dev_priv->rps.client_lock); 4610} 4611 4612void intel_set_rps(struct drm_device *dev, u8 val) 4613{ 4614 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) 4615 valleyview_set_rps(dev, val); 4616 else 4617 gen6_set_rps(dev, val); 4618} 4619 4620static void gen9_disable_rc6(struct drm_device *dev) 4621{ 4622 struct drm_i915_private *dev_priv = dev->dev_private; 4623 4624 I915_WRITE(GEN6_RC_CONTROL, 0); 4625 I915_WRITE(GEN9_PG_ENABLE, 0); 4626} 4627 4628static void gen9_disable_rps(struct drm_device *dev) 4629{ 4630 struct drm_i915_private *dev_priv = dev->dev_private; 4631 4632 I915_WRITE(GEN6_RP_CONTROL, 0); 4633} 4634 4635static void gen6_disable_rps(struct drm_device *dev) 4636{ 4637 struct drm_i915_private *dev_priv = dev->dev_private; 4638 4639 I915_WRITE(GEN6_RC_CONTROL, 0); 4640 I915_WRITE(GEN6_RPNSWREQ, 1 << 31); 4641 I915_WRITE(GEN6_RP_CONTROL, 0); 4642} 4643 4644static void cherryview_disable_rps(struct drm_device *dev) 4645{ 4646 struct drm_i915_private *dev_priv = dev->dev_private; 4647 4648 I915_WRITE(GEN6_RC_CONTROL, 0); 4649} 4650 4651static void valleyview_disable_rps(struct drm_device *dev) 4652{ 4653 struct drm_i915_private *dev_priv = dev->dev_private; 4654 4655 /* we're doing forcewake before Disabling RC6, 4656 * This what the BIOS expects when going into suspend */ 4657 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4658 4659 I915_WRITE(GEN6_RC_CONTROL, 0); 4660 4661 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4662} 4663 4664static void intel_print_rc6_info(struct drm_device *dev, u32 mode) 4665{ 4666 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { 4667 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) 4668 mode = GEN6_RC_CTL_RC6_ENABLE; 4669 else 4670 mode = 0; 4671 } 4672 if (HAS_RC6p(dev)) 4673 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n", 4674 onoff(mode & GEN6_RC_CTL_RC6_ENABLE), 4675 onoff(mode & GEN6_RC_CTL_RC6p_ENABLE), 4676 onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE)); 4677 4678 else 4679 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n", 4680 onoff(mode & GEN6_RC_CTL_RC6_ENABLE)); 4681} 4682 4683static bool bxt_check_bios_rc6_setup(const struct drm_device *dev) 4684{ 4685 struct drm_i915_private *dev_priv = to_i915(dev); 4686 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4687 bool enable_rc6 = true; 4688 unsigned long rc6_ctx_base; 4689 4690 if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) { 4691 DRM_DEBUG_KMS("RC6 Base location not set properly.\n"); 4692 enable_rc6 = false; 4693 } 4694 4695 /* 4696 * The exact context size is not known for BXT, so assume a page size 4697 * for this check. 4698 */ 4699 rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; 4700 if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) && 4701 (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base + 4702 ggtt->stolen_reserved_size))) { 4703 DRM_DEBUG_KMS("RC6 Base address not as expected.\n"); 4704 enable_rc6 = false; 4705 } 4706 4707 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) && 4708 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) && 4709 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) && 4710 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) { 4711 DRM_DEBUG_KMS("Engine Idle wait time not set properly.\n"); 4712 enable_rc6 = false; 4713 } 4714 4715 if (!(I915_READ(GEN6_RC_CONTROL) & (GEN6_RC_CTL_RC6_ENABLE | 4716 GEN6_RC_CTL_HW_ENABLE)) && 4717 ((I915_READ(GEN6_RC_CONTROL) & GEN6_RC_CTL_HW_ENABLE) || 4718 !(I915_READ(GEN6_RC_STATE) & RC6_STATE))) { 4719 DRM_DEBUG_KMS("HW/SW RC6 is not enabled by BIOS.\n"); 4720 enable_rc6 = false; 4721 } 4722 4723 return enable_rc6; 4724} 4725 4726int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) 4727{ 4728 /* No RC6 before Ironlake and code is gone for ilk. */ 4729 if (INTEL_INFO(dev)->gen < 6) 4730 return 0; 4731 4732 if (!enable_rc6) 4733 return 0; 4734 4735 if (IS_BROXTON(dev) && !bxt_check_bios_rc6_setup(dev)) { 4736 DRM_INFO("RC6 disabled by BIOS\n"); 4737 return 0; 4738 } 4739 4740 /* Respect the kernel parameter if it is set */ 4741 if (enable_rc6 >= 0) { 4742 int mask; 4743 4744 if (HAS_RC6p(dev)) 4745 mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | 4746 INTEL_RC6pp_ENABLE; 4747 else 4748 mask = INTEL_RC6_ENABLE; 4749 4750 if ((enable_rc6 & mask) != enable_rc6) 4751 DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n", 4752 enable_rc6 & mask, enable_rc6, mask); 4753 4754 return enable_rc6 & mask; 4755 } 4756 4757 if (IS_IVYBRIDGE(dev)) 4758 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); 4759 4760 return INTEL_RC6_ENABLE; 4761} 4762 4763int intel_enable_rc6(const struct drm_device *dev) 4764{ 4765 return i915.enable_rc6; 4766} 4767 4768static void gen6_init_rps_frequencies(struct drm_device *dev) 4769{ 4770 struct drm_i915_private *dev_priv = dev->dev_private; 4771 uint32_t rp_state_cap; 4772 u32 ddcc_status = 0; 4773 int ret; 4774 4775 /* All of these values are in units of 50MHz */ 4776 dev_priv->rps.cur_freq = 0; 4777 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 4778 if (IS_BROXTON(dev)) { 4779 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); 4780 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; 4781 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 4782 dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; 4783 } else { 4784 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); 4785 dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; 4786 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 4787 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; 4788 } 4789 4790 /* hw_max = RP0 until we check for overclocking */ 4791 dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; 4792 4793 dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; 4794 if (IS_HASWELL(dev) || IS_BROADWELL(dev) || 4795 IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 4796 ret = sandybridge_pcode_read(dev_priv, 4797 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 4798 &ddcc_status); 4799 if (0 == ret) 4800 dev_priv->rps.efficient_freq = 4801 clamp_t(u8, 4802 ((ddcc_status >> 8) & 0xff), 4803 dev_priv->rps.min_freq, 4804 dev_priv->rps.max_freq); 4805 } 4806 4807 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 4808 /* Store the frequency values in 16.66 MHZ units, which is 4809 the natural hardware unit for SKL */ 4810 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; 4811 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; 4812 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; 4813 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; 4814 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; 4815 } 4816 4817 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 4818 4819 /* Preserve min/max settings in case of re-init */ 4820 if (dev_priv->rps.max_freq_softlimit == 0) 4821 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 4822 4823 if (dev_priv->rps.min_freq_softlimit == 0) { 4824 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4825 dev_priv->rps.min_freq_softlimit = 4826 max_t(int, dev_priv->rps.efficient_freq, 4827 intel_freq_opcode(dev_priv, 450)); 4828 else 4829 dev_priv->rps.min_freq_softlimit = 4830 dev_priv->rps.min_freq; 4831 } 4832} 4833 4834/* See the Gen9_GT_PM_Programming_Guide doc for the below */ 4835static void gen9_enable_rps(struct drm_device *dev) 4836{ 4837 struct drm_i915_private *dev_priv = dev->dev_private; 4838 4839 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4840 4841 gen6_init_rps_frequencies(dev); 4842 4843 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ 4844 if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { 4845 /* 4846 * BIOS could leave the Hw Turbo enabled, so need to explicitly 4847 * clear out the Control register just to avoid inconsitency 4848 * with debugfs interface, which will show Turbo as enabled 4849 * only and that is not expected by the User after adding the 4850 * WaGsvDisableTurbo. Apart from this there is no problem even 4851 * if the Turbo is left enabled in the Control register, as the 4852 * Up/Down interrupts would remain masked. 4853 */ 4854 gen9_disable_rps(dev); 4855 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4856 return; 4857 } 4858 4859 /* Program defaults and thresholds for RPS*/ 4860 I915_WRITE(GEN6_RC_VIDEO_FREQ, 4861 GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); 4862 4863 /* 1 second timeout*/ 4864 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 4865 GT_INTERVAL_FROM_US(dev_priv, 1000000)); 4866 4867 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); 4868 4869 /* Leaning on the below call to gen6_set_rps to program/setup the 4870 * Up/Down EI & threshold registers, as well as the RP_CONTROL, 4871 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ 4872 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 4873 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 4874 4875 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4876} 4877 4878static void gen9_enable_rc6(struct drm_device *dev) 4879{ 4880 struct drm_i915_private *dev_priv = dev->dev_private; 4881 struct intel_engine_cs *engine; 4882 uint32_t rc6_mask = 0; 4883 4884 /* 1a: Software RC state - RC0 */ 4885 I915_WRITE(GEN6_RC_STATE, 0); 4886 4887 /* 1b: Get forcewake during program sequence. Although the driver 4888 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 4889 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4890 4891 /* 2a: Disable RC states. */ 4892 I915_WRITE(GEN6_RC_CONTROL, 0); 4893 4894 /* 2b: Program RC6 thresholds.*/ 4895 4896 /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */ 4897 if (IS_SKYLAKE(dev)) 4898 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); 4899 else 4900 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); 4901 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 4902 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 4903 for_each_engine(engine, dev_priv) 4904 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 4905 4906 if (HAS_GUC_UCODE(dev)) 4907 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); 4908 4909 I915_WRITE(GEN6_RC_SLEEP, 0); 4910 4911 /* 2c: Program Coarse Power Gating Policies. */ 4912 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25); 4913 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); 4914 4915 /* 3a: Enable RC6 */ 4916 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 4917 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 4918 DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); 4919 /* WaRsUseTimeoutMode */ 4920 if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) || 4921 IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { 4922 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */ 4923 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4924 GEN7_RC_CTL_TO_MODE | 4925 rc6_mask); 4926 } else { 4927 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ 4928 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4929 GEN6_RC_CTL_EI_MODE(1) | 4930 rc6_mask); 4931 } 4932 4933 /* 4934 * 3b: Enable Coarse Power Gating only when RC6 is enabled. 4935 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. 4936 */ 4937 if (NEEDS_WaRsDisableCoarsePowerGating(dev)) 4938 I915_WRITE(GEN9_PG_ENABLE, 0); 4939 else 4940 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 4941 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0); 4942 4943 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4944 4945} 4946 4947static void gen8_enable_rps(struct drm_device *dev) 4948{ 4949 struct drm_i915_private *dev_priv = dev->dev_private; 4950 struct intel_engine_cs *engine; 4951 uint32_t rc6_mask = 0; 4952 4953 /* 1a: Software RC state - RC0 */ 4954 I915_WRITE(GEN6_RC_STATE, 0); 4955 4956 /* 1c & 1d: Get forcewake during program sequence. Although the driver 4957 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 4958 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4959 4960 /* 2a: Disable RC states. */ 4961 I915_WRITE(GEN6_RC_CONTROL, 0); 4962 4963 /* Initialize rps frequencies */ 4964 gen6_init_rps_frequencies(dev); 4965 4966 /* 2b: Program RC6 thresholds.*/ 4967 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 4968 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 4969 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 4970 for_each_engine(engine, dev_priv) 4971 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 4972 I915_WRITE(GEN6_RC_SLEEP, 0); 4973 if (IS_BROADWELL(dev)) 4974 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 4975 else 4976 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ 4977 4978 /* 3: Enable RC6 */ 4979 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 4980 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 4981 intel_print_rc6_info(dev, rc6_mask); 4982 if (IS_BROADWELL(dev)) 4983 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4984 GEN7_RC_CTL_TO_MODE | 4985 rc6_mask); 4986 else 4987 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4988 GEN6_RC_CTL_EI_MODE(1) | 4989 rc6_mask); 4990 4991 /* 4 Program defaults and thresholds for RPS*/ 4992 I915_WRITE(GEN6_RPNSWREQ, 4993 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 4994 I915_WRITE(GEN6_RC_VIDEO_FREQ, 4995 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 4996 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ 4997 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ 4998 4999 /* Docs recommend 900MHz, and 300 MHz respectively */ 5000 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 5001 dev_priv->rps.max_freq_softlimit << 24 | 5002 dev_priv->rps.min_freq_softlimit << 16); 5003 5004 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ 5005 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ 5006 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ 5007 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ 5008 5009 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5010 5011 /* 5: Enable RPS */ 5012 I915_WRITE(GEN6_RP_CONTROL, 5013 GEN6_RP_MEDIA_TURBO | 5014 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5015 GEN6_RP_MEDIA_IS_GFX | 5016 GEN6_RP_ENABLE | 5017 GEN6_RP_UP_BUSY_AVG | 5018 GEN6_RP_DOWN_IDLE_AVG); 5019 5020 /* 6: Ring frequency + overclocking (our driver does this later */ 5021 5022 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 5023 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 5024 5025 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5026} 5027 5028static void gen6_enable_rps(struct drm_device *dev) 5029{ 5030 struct drm_i915_private *dev_priv = dev->dev_private; 5031 struct intel_engine_cs *engine; 5032 u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; 5033 u32 gtfifodbg; 5034 int rc6_mode; 5035 int ret; 5036 5037 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5038 5039 /* Here begins a magic sequence of register writes to enable 5040 * auto-downclocking. 5041 * 5042 * Perhaps there might be some value in exposing these to 5043 * userspace... 5044 */ 5045 I915_WRITE(GEN6_RC_STATE, 0); 5046 5047 /* Clear the DBG now so we don't confuse earlier errors */ 5048 gtfifodbg = I915_READ(GTFIFODBG); 5049 if (gtfifodbg) { 5050 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); 5051 I915_WRITE(GTFIFODBG, gtfifodbg); 5052 } 5053 5054 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5055 5056 /* Initialize rps frequencies */ 5057 gen6_init_rps_frequencies(dev); 5058 5059 /* disable the counters and set deterministic thresholds */ 5060 I915_WRITE(GEN6_RC_CONTROL, 0); 5061 5062 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); 5063 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); 5064 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); 5065 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 5066 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 5067 5068 for_each_engine(engine, dev_priv) 5069 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5070 5071 I915_WRITE(GEN6_RC_SLEEP, 0); 5072 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); 5073 if (IS_IVYBRIDGE(dev)) 5074 I915_WRITE(GEN6_RC6_THRESHOLD, 125000); 5075 else 5076 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); 5077 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); 5078 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 5079 5080 /* Check if we are enabling RC6 */ 5081 rc6_mode = intel_enable_rc6(dev_priv->dev); 5082 if (rc6_mode & INTEL_RC6_ENABLE) 5083 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; 5084 5085 /* We don't use those on Haswell */ 5086 if (!IS_HASWELL(dev)) { 5087 if (rc6_mode & INTEL_RC6p_ENABLE) 5088 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; 5089 5090 if (rc6_mode & INTEL_RC6pp_ENABLE) 5091 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; 5092 } 5093 5094 intel_print_rc6_info(dev, rc6_mask); 5095 5096 I915_WRITE(GEN6_RC_CONTROL, 5097 rc6_mask | 5098 GEN6_RC_CTL_EI_MODE(1) | 5099 GEN6_RC_CTL_HW_ENABLE); 5100 5101 /* Power down if completely idle for over 50ms */ 5102 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); 5103 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5104 5105 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0); 5106 if (ret) 5107 DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); 5108 5109 ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); 5110 if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ 5111 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", 5112 (dev_priv->rps.max_freq_softlimit & 0xff) * 50, 5113 (pcu_mbox & 0xff) * 50); 5114 dev_priv->rps.max_freq = pcu_mbox & 0xff; 5115 } 5116 5117 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 5118 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 5119 5120 rc6vids = 0; 5121 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 5122 if (IS_GEN6(dev) && ret) { 5123 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); 5124 } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { 5125 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", 5126 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); 5127 rc6vids &= 0xffff00; 5128 rc6vids |= GEN6_ENCODE_RC6_VID(450); 5129 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); 5130 if (ret) 5131 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); 5132 } 5133 5134 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5135} 5136 5137static void __gen6_update_ring_freq(struct drm_device *dev) 5138{ 5139 struct drm_i915_private *dev_priv = dev->dev_private; 5140 int min_freq = 15; 5141 unsigned int gpu_freq; 5142 unsigned int max_ia_freq, min_ring_freq; 5143 unsigned int max_gpu_freq, min_gpu_freq; 5144 int scaling_factor = 180; 5145 struct cpufreq_policy *policy; 5146 5147 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5148 5149 policy = cpufreq_cpu_get(0); 5150 if (policy) { 5151 max_ia_freq = policy->cpuinfo.max_freq; 5152 cpufreq_cpu_put(policy); 5153 } else { 5154 /* 5155 * Default to measured freq if none found, PCU will ensure we 5156 * don't go over 5157 */ 5158 max_ia_freq = tsc_khz; 5159 } 5160 5161 /* Convert from kHz to MHz */ 5162 max_ia_freq /= 1000; 5163 5164 min_ring_freq = I915_READ(DCLK) & 0xf; 5165 /* convert DDR frequency from units of 266.6MHz to bandwidth */ 5166 min_ring_freq = mult_frac(min_ring_freq, 8, 3); 5167 5168 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 5169 /* Convert GT frequency to 50 HZ units */ 5170 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; 5171 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; 5172 } else { 5173 min_gpu_freq = dev_priv->rps.min_freq; 5174 max_gpu_freq = dev_priv->rps.max_freq; 5175 } 5176 5177 /* 5178 * For each potential GPU frequency, load a ring frequency we'd like 5179 * to use for memory access. We do this by specifying the IA frequency 5180 * the PCU should use as a reference to determine the ring frequency. 5181 */ 5182 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { 5183 int diff = max_gpu_freq - gpu_freq; 5184 unsigned int ia_freq = 0, ring_freq = 0; 5185 5186 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 5187 /* 5188 * ring_freq = 2 * GT. ring_freq is in 100MHz units 5189 * No floor required for ring frequency on SKL. 5190 */ 5191 ring_freq = gpu_freq; 5192 } else if (INTEL_INFO(dev)->gen >= 8) { 5193 /* max(2 * GT, DDR). NB: GT is 50MHz units */ 5194 ring_freq = max(min_ring_freq, gpu_freq); 5195 } else if (IS_HASWELL(dev)) { 5196 ring_freq = mult_frac(gpu_freq, 5, 4); 5197 ring_freq = max(min_ring_freq, ring_freq); 5198 /* leave ia_freq as the default, chosen by cpufreq */ 5199 } else { 5200 /* On older processors, there is no separate ring 5201 * clock domain, so in order to boost the bandwidth 5202 * of the ring, we need to upclock the CPU (ia_freq). 5203 * 5204 * For GPU frequencies less than 750MHz, 5205 * just use the lowest ring freq. 5206 */ 5207 if (gpu_freq < min_freq) 5208 ia_freq = 800; 5209 else 5210 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); 5211 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); 5212 } 5213 5214 sandybridge_pcode_write(dev_priv, 5215 GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 5216 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | 5217 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | 5218 gpu_freq); 5219 } 5220} 5221 5222void gen6_update_ring_freq(struct drm_device *dev) 5223{ 5224 struct drm_i915_private *dev_priv = dev->dev_private; 5225 5226 if (!HAS_CORE_RING_FREQ(dev)) 5227 return; 5228 5229 mutex_lock(&dev_priv->rps.hw_lock); 5230 __gen6_update_ring_freq(dev); 5231 mutex_unlock(&dev_priv->rps.hw_lock); 5232} 5233 5234static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) 5235{ 5236 struct drm_device *dev = dev_priv->dev; 5237 u32 val, rp0; 5238 5239 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 5240 5241 switch (INTEL_INFO(dev)->eu_total) { 5242 case 8: 5243 /* (2 * 4) config */ 5244 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); 5245 break; 5246 case 12: 5247 /* (2 * 6) config */ 5248 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT); 5249 break; 5250 case 16: 5251 /* (2 * 8) config */ 5252 default: 5253 /* Setting (2 * 8) Min RP0 for any other combination */ 5254 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT); 5255 break; 5256 } 5257 5258 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK); 5259 5260 return rp0; 5261} 5262 5263static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) 5264{ 5265 u32 val, rpe; 5266 5267 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); 5268 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 5269 5270 return rpe; 5271} 5272 5273static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) 5274{ 5275 u32 val, rp1; 5276 5277 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 5278 rp1 = (val & FB_GFX_FREQ_FUSE_MASK); 5279 5280 return rp1; 5281} 5282 5283static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) 5284{ 5285 u32 val, rp1; 5286 5287 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 5288 5289 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 5290 5291 return rp1; 5292} 5293 5294static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) 5295{ 5296 u32 val, rp0; 5297 5298 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 5299 5300 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 5301 /* Clamp to max */ 5302 rp0 = min_t(u32, rp0, 0xea); 5303 5304 return rp0; 5305} 5306 5307static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) 5308{ 5309 u32 val, rpe; 5310 5311 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 5312 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 5313 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 5314 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 5315 5316 return rpe; 5317} 5318 5319static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) 5320{ 5321 u32 val; 5322 5323 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; 5324 /* 5325 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 5326 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 5327 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 5328 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 5329 * to make sure it matches what Punit accepts. 5330 */ 5331 return max_t(u32, val, 0xc0); 5332} 5333 5334/* Check that the pctx buffer wasn't move under us. */ 5335static void valleyview_check_pctx(struct drm_i915_private *dev_priv) 5336{ 5337 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 5338 5339 WARN_ON(pctx_addr != dev_priv->mm.stolen_base + 5340 dev_priv->vlv_pctx->stolen->start); 5341} 5342 5343 5344/* Check that the pcbr address is not empty. */ 5345static void cherryview_check_pctx(struct drm_i915_private *dev_priv) 5346{ 5347 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 5348 5349 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); 5350} 5351 5352static void cherryview_setup_pctx(struct drm_device *dev) 5353{ 5354 struct drm_i915_private *dev_priv = to_i915(dev); 5355 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5356 unsigned long pctx_paddr, paddr; 5357 u32 pcbr; 5358 int pctx_size = 32*1024; 5359 5360 pcbr = I915_READ(VLV_PCBR); 5361 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { 5362 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 5363 paddr = (dev_priv->mm.stolen_base + 5364 (ggtt->stolen_size - pctx_size)); 5365 5366 pctx_paddr = (paddr & (~4095)); 5367 I915_WRITE(VLV_PCBR, pctx_paddr); 5368 } 5369 5370 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5371} 5372 5373static void valleyview_setup_pctx(struct drm_device *dev) 5374{ 5375 struct drm_i915_private *dev_priv = dev->dev_private; 5376 struct drm_i915_gem_object *pctx; 5377 unsigned long pctx_paddr; 5378 u32 pcbr; 5379 int pctx_size = 24*1024; 5380 5381 mutex_lock(&dev->struct_mutex); 5382 5383 pcbr = I915_READ(VLV_PCBR); 5384 if (pcbr) { 5385 /* BIOS set it up already, grab the pre-alloc'd space */ 5386 int pcbr_offset; 5387 5388 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; 5389 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev, 5390 pcbr_offset, 5391 I915_GTT_OFFSET_NONE, 5392 pctx_size); 5393 goto out; 5394 } 5395 5396 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 5397 5398 /* 5399 * From the Gunit register HAS: 5400 * The Gfx driver is expected to program this register and ensure 5401 * proper allocation within Gfx stolen memory. For example, this 5402 * register should be programmed such than the PCBR range does not 5403 * overlap with other ranges, such as the frame buffer, protected 5404 * memory, or any other relevant ranges. 5405 */ 5406 pctx = i915_gem_object_create_stolen(dev, pctx_size); 5407 if (!pctx) { 5408 DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); 5409 goto out; 5410 } 5411 5412 pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; 5413 I915_WRITE(VLV_PCBR, pctx_paddr); 5414 5415out: 5416 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5417 dev_priv->vlv_pctx = pctx; 5418 mutex_unlock(&dev->struct_mutex); 5419} 5420 5421static void valleyview_cleanup_pctx(struct drm_device *dev) 5422{ 5423 struct drm_i915_private *dev_priv = dev->dev_private; 5424 5425 if (WARN_ON(!dev_priv->vlv_pctx)) 5426 return; 5427 5428 drm_gem_object_unreference_unlocked(&dev_priv->vlv_pctx->base); 5429 dev_priv->vlv_pctx = NULL; 5430} 5431 5432static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) 5433{ 5434 dev_priv->rps.gpll_ref_freq = 5435 vlv_get_cck_clock(dev_priv, "GPLL ref", 5436 CCK_GPLL_CLOCK_CONTROL, 5437 dev_priv->czclk_freq); 5438 5439 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", 5440 dev_priv->rps.gpll_ref_freq); 5441} 5442 5443static void valleyview_init_gt_powersave(struct drm_device *dev) 5444{ 5445 struct drm_i915_private *dev_priv = dev->dev_private; 5446 u32 val; 5447 5448 valleyview_setup_pctx(dev); 5449 5450 vlv_init_gpll_ref_freq(dev_priv); 5451 5452 mutex_lock(&dev_priv->rps.hw_lock); 5453 5454 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5455 switch ((val >> 6) & 3) { 5456 case 0: 5457 case 1: 5458 dev_priv->mem_freq = 800; 5459 break; 5460 case 2: 5461 dev_priv->mem_freq = 1066; 5462 break; 5463 case 3: 5464 dev_priv->mem_freq = 1333; 5465 break; 5466 } 5467 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 5468 5469 dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); 5470 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 5471 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 5472 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 5473 dev_priv->rps.max_freq); 5474 5475 dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); 5476 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 5477 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5478 dev_priv->rps.efficient_freq); 5479 5480 dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); 5481 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 5482 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 5483 dev_priv->rps.rp1_freq); 5484 5485 dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); 5486 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5487 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5488 dev_priv->rps.min_freq); 5489 5490 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 5491 5492 /* Preserve min/max settings in case of re-init */ 5493 if (dev_priv->rps.max_freq_softlimit == 0) 5494 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 5495 5496 if (dev_priv->rps.min_freq_softlimit == 0) 5497 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 5498 5499 mutex_unlock(&dev_priv->rps.hw_lock); 5500} 5501 5502static void cherryview_init_gt_powersave(struct drm_device *dev) 5503{ 5504 struct drm_i915_private *dev_priv = dev->dev_private; 5505 u32 val; 5506 5507 cherryview_setup_pctx(dev); 5508 5509 vlv_init_gpll_ref_freq(dev_priv); 5510 5511 mutex_lock(&dev_priv->rps.hw_lock); 5512 5513 mutex_lock(&dev_priv->sb_lock); 5514 val = vlv_cck_read(dev_priv, CCK_FUSE_REG); 5515 mutex_unlock(&dev_priv->sb_lock); 5516 5517 switch ((val >> 2) & 0x7) { 5518 case 3: 5519 dev_priv->mem_freq = 2000; 5520 break; 5521 default: 5522 dev_priv->mem_freq = 1600; 5523 break; 5524 } 5525 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 5526 5527 dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); 5528 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 5529 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 5530 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 5531 dev_priv->rps.max_freq); 5532 5533 dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); 5534 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 5535 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5536 dev_priv->rps.efficient_freq); 5537 5538 dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); 5539 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", 5540 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 5541 dev_priv->rps.rp1_freq); 5542 5543 /* PUnit validated range is only [RPe, RP0] */ 5544 dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; 5545 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5546 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5547 dev_priv->rps.min_freq); 5548 5549 WARN_ONCE((dev_priv->rps.max_freq | 5550 dev_priv->rps.efficient_freq | 5551 dev_priv->rps.rp1_freq | 5552 dev_priv->rps.min_freq) & 1, 5553 "Odd GPU freq values\n"); 5554 5555 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 5556 5557 /* Preserve min/max settings in case of re-init */ 5558 if (dev_priv->rps.max_freq_softlimit == 0) 5559 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 5560 5561 if (dev_priv->rps.min_freq_softlimit == 0) 5562 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 5563 5564 mutex_unlock(&dev_priv->rps.hw_lock); 5565} 5566 5567static void valleyview_cleanup_gt_powersave(struct drm_device *dev) 5568{ 5569 valleyview_cleanup_pctx(dev); 5570} 5571 5572static void cherryview_enable_rps(struct drm_device *dev) 5573{ 5574 struct drm_i915_private *dev_priv = dev->dev_private; 5575 struct intel_engine_cs *engine; 5576 u32 gtfifodbg, val, rc6_mode = 0, pcbr; 5577 5578 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5579 5580 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | 5581 GT_FIFO_FREE_ENTRIES_CHV); 5582 if (gtfifodbg) { 5583 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 5584 gtfifodbg); 5585 I915_WRITE(GTFIFODBG, gtfifodbg); 5586 } 5587 5588 cherryview_check_pctx(dev_priv); 5589 5590 /* 1a & 1b: Get forcewake during program sequence. Although the driver 5591 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 5592 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5593 5594 /* Disable RC states. */ 5595 I915_WRITE(GEN6_RC_CONTROL, 0); 5596 5597 /* 2a: Program RC6 thresholds.*/ 5598 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 5599 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 5600 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 5601 5602 for_each_engine(engine, dev_priv) 5603 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5604 I915_WRITE(GEN6_RC_SLEEP, 0); 5605 5606 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ 5607 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); 5608 5609 /* allows RC6 residency counter to work */ 5610 I915_WRITE(VLV_COUNTER_CONTROL, 5611 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 5612 VLV_MEDIA_RC6_COUNT_EN | 5613 VLV_RENDER_RC6_COUNT_EN)); 5614 5615 /* For now we assume BIOS is allocating and populating the PCBR */ 5616 pcbr = I915_READ(VLV_PCBR); 5617 5618 /* 3: Enable RC6 */ 5619 if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) && 5620 (pcbr >> VLV_PCBR_ADDR_SHIFT)) 5621 rc6_mode = GEN7_RC_CTL_TO_MODE; 5622 5623 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 5624 5625 /* 4 Program defaults and thresholds for RPS*/ 5626 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 5627 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 5628 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 5629 I915_WRITE(GEN6_RP_UP_EI, 66000); 5630 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 5631 5632 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5633 5634 /* 5: Enable RPS */ 5635 I915_WRITE(GEN6_RP_CONTROL, 5636 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5637 GEN6_RP_MEDIA_IS_GFX | 5638 GEN6_RP_ENABLE | 5639 GEN6_RP_UP_BUSY_AVG | 5640 GEN6_RP_DOWN_IDLE_AVG); 5641 5642 /* Setting Fixed Bias */ 5643 val = VLV_OVERRIDE_EN | 5644 VLV_SOC_TDP_EN | 5645 CHV_BIAS_CPU_50_SOC_50; 5646 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 5647 5648 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5649 5650 /* RPS code assumes GPLL is used */ 5651 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 5652 5653 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 5654 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 5655 5656 dev_priv->rps.cur_freq = (val >> 8) & 0xff; 5657 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", 5658 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), 5659 dev_priv->rps.cur_freq); 5660 5661 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", 5662 intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), 5663 dev_priv->rps.idle_freq); 5664 5665 valleyview_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 5666 5667 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5668} 5669 5670static void valleyview_enable_rps(struct drm_device *dev) 5671{ 5672 struct drm_i915_private *dev_priv = dev->dev_private; 5673 struct intel_engine_cs *engine; 5674 u32 gtfifodbg, val, rc6_mode = 0; 5675 5676 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5677 5678 valleyview_check_pctx(dev_priv); 5679 5680 gtfifodbg = I915_READ(GTFIFODBG); 5681 if (gtfifodbg) { 5682 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 5683 gtfifodbg); 5684 I915_WRITE(GTFIFODBG, gtfifodbg); 5685 } 5686 5687 /* If VLV, Forcewake all wells, else re-direct to regular path */ 5688 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5689 5690 /* Disable RC states. */ 5691 I915_WRITE(GEN6_RC_CONTROL, 0); 5692 5693 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 5694 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 5695 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 5696 I915_WRITE(GEN6_RP_UP_EI, 66000); 5697 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 5698 5699 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5700 5701 I915_WRITE(GEN6_RP_CONTROL, 5702 GEN6_RP_MEDIA_TURBO | 5703 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5704 GEN6_RP_MEDIA_IS_GFX | 5705 GEN6_RP_ENABLE | 5706 GEN6_RP_UP_BUSY_AVG | 5707 GEN6_RP_DOWN_IDLE_CONT); 5708 5709 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 5710 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 5711 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 5712 5713 for_each_engine(engine, dev_priv) 5714 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5715 5716 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); 5717 5718 /* allows RC6 residency counter to work */ 5719 I915_WRITE(VLV_COUNTER_CONTROL, 5720 _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | 5721 VLV_RENDER_RC0_COUNT_EN | 5722 VLV_MEDIA_RC6_COUNT_EN | 5723 VLV_RENDER_RC6_COUNT_EN)); 5724 5725 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 5726 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 5727 5728 intel_print_rc6_info(dev, rc6_mode); 5729 5730 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 5731 5732 /* Setting Fixed Bias */ 5733 val = VLV_OVERRIDE_EN | 5734 VLV_SOC_TDP_EN | 5735 VLV_BIAS_CPU_125_SOC_875; 5736 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 5737 5738 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5739 5740 /* RPS code assumes GPLL is used */ 5741 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 5742 5743 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 5744 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 5745 5746 dev_priv->rps.cur_freq = (val >> 8) & 0xff; 5747 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", 5748 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), 5749 dev_priv->rps.cur_freq); 5750 5751 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", 5752 intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), 5753 dev_priv->rps.idle_freq); 5754 5755 valleyview_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 5756 5757 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5758} 5759 5760static unsigned long intel_pxfreq(u32 vidfreq) 5761{ 5762 unsigned long freq; 5763 int div = (vidfreq & 0x3f0000) >> 16; 5764 int post = (vidfreq & 0x3000) >> 12; 5765 int pre = (vidfreq & 0x7); 5766 5767 if (!pre) 5768 return 0; 5769 5770 freq = ((div * 133333) / ((1<<post) * pre)); 5771 5772 return freq; 5773} 5774 5775static const struct cparams { 5776 u16 i; 5777 u16 t; 5778 u16 m; 5779 u16 c; 5780} cparams[] = { 5781 { 1, 1333, 301, 28664 }, 5782 { 1, 1066, 294, 24460 }, 5783 { 1, 800, 294, 25192 }, 5784 { 0, 1333, 276, 27605 }, 5785 { 0, 1066, 276, 27605 }, 5786 { 0, 800, 231, 23784 }, 5787}; 5788 5789static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) 5790{ 5791 u64 total_count, diff, ret; 5792 u32 count1, count2, count3, m = 0, c = 0; 5793 unsigned long now = jiffies_to_msecs(jiffies), diff1; 5794 int i; 5795 5796 assert_spin_locked(&mchdev_lock); 5797 5798 diff1 = now - dev_priv->ips.last_time1; 5799 5800 /* Prevent division-by-zero if we are asking too fast. 5801 * Also, we don't get interesting results if we are polling 5802 * faster than once in 10ms, so just return the saved value 5803 * in such cases. 5804 */ 5805 if (diff1 <= 10) 5806 return dev_priv->ips.chipset_power; 5807 5808 count1 = I915_READ(DMIEC); 5809 count2 = I915_READ(DDREC); 5810 count3 = I915_READ(CSIEC); 5811 5812 total_count = count1 + count2 + count3; 5813 5814 /* FIXME: handle per-counter overflow */ 5815 if (total_count < dev_priv->ips.last_count1) { 5816 diff = ~0UL - dev_priv->ips.last_count1; 5817 diff += total_count; 5818 } else { 5819 diff = total_count - dev_priv->ips.last_count1; 5820 } 5821 5822 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 5823 if (cparams[i].i == dev_priv->ips.c_m && 5824 cparams[i].t == dev_priv->ips.r_t) { 5825 m = cparams[i].m; 5826 c = cparams[i].c; 5827 break; 5828 } 5829 } 5830 5831 diff = div_u64(diff, diff1); 5832 ret = ((m * diff) + c); 5833 ret = div_u64(ret, 10); 5834 5835 dev_priv->ips.last_count1 = total_count; 5836 dev_priv->ips.last_time1 = now; 5837 5838 dev_priv->ips.chipset_power = ret; 5839 5840 return ret; 5841} 5842 5843unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) 5844{ 5845 struct drm_device *dev = dev_priv->dev; 5846 unsigned long val; 5847 5848 if (INTEL_INFO(dev)->gen != 5) 5849 return 0; 5850 5851 spin_lock_irq(&mchdev_lock); 5852 5853 val = __i915_chipset_val(dev_priv); 5854 5855 spin_unlock_irq(&mchdev_lock); 5856 5857 return val; 5858} 5859 5860unsigned long i915_mch_val(struct drm_i915_private *dev_priv) 5861{ 5862 unsigned long m, x, b; 5863 u32 tsfs; 5864 5865 tsfs = I915_READ(TSFS); 5866 5867 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); 5868 x = I915_READ8(TR1); 5869 5870 b = tsfs & TSFS_INTR_MASK; 5871 5872 return ((m * x) / 127) - b; 5873} 5874 5875static int _pxvid_to_vd(u8 pxvid) 5876{ 5877 if (pxvid == 0) 5878 return 0; 5879 5880 if (pxvid >= 8 && pxvid < 31) 5881 pxvid = 31; 5882 5883 return (pxvid + 2) * 125; 5884} 5885 5886static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) 5887{ 5888 struct drm_device *dev = dev_priv->dev; 5889 const int vd = _pxvid_to_vd(pxvid); 5890 const int vm = vd - 1125; 5891 5892 if (INTEL_INFO(dev)->is_mobile) 5893 return vm > 0 ? vm : 0; 5894 5895 return vd; 5896} 5897 5898static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) 5899{ 5900 u64 now, diff, diffms; 5901 u32 count; 5902 5903 assert_spin_locked(&mchdev_lock); 5904 5905 now = ktime_get_raw_ns(); 5906 diffms = now - dev_priv->ips.last_time2; 5907 do_div(diffms, NSEC_PER_MSEC); 5908 5909 /* Don't divide by 0 */ 5910 if (!diffms) 5911 return; 5912 5913 count = I915_READ(GFXEC); 5914 5915 if (count < dev_priv->ips.last_count2) { 5916 diff = ~0UL - dev_priv->ips.last_count2; 5917 diff += count; 5918 } else { 5919 diff = count - dev_priv->ips.last_count2; 5920 } 5921 5922 dev_priv->ips.last_count2 = count; 5923 dev_priv->ips.last_time2 = now; 5924 5925 /* More magic constants... */ 5926 diff = diff * 1181; 5927 diff = div_u64(diff, diffms * 10); 5928 dev_priv->ips.gfx_power = diff; 5929} 5930 5931void i915_update_gfx_val(struct drm_i915_private *dev_priv) 5932{ 5933 struct drm_device *dev = dev_priv->dev; 5934 5935 if (INTEL_INFO(dev)->gen != 5) 5936 return; 5937 5938 spin_lock_irq(&mchdev_lock); 5939 5940 __i915_update_gfx_val(dev_priv); 5941 5942 spin_unlock_irq(&mchdev_lock); 5943} 5944 5945static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) 5946{ 5947 unsigned long t, corr, state1, corr2, state2; 5948 u32 pxvid, ext_v; 5949 5950 assert_spin_locked(&mchdev_lock); 5951 5952 pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); 5953 pxvid = (pxvid >> 24) & 0x7f; 5954 ext_v = pvid_to_extvid(dev_priv, pxvid); 5955 5956 state1 = ext_v; 5957 5958 t = i915_mch_val(dev_priv); 5959 5960 /* Revel in the empirically derived constants */ 5961 5962 /* Correction factor in 1/100000 units */ 5963 if (t > 80) 5964 corr = ((t * 2349) + 135940); 5965 else if (t >= 50) 5966 corr = ((t * 964) + 29317); 5967 else /* < 50 */ 5968 corr = ((t * 301) + 1004); 5969 5970 corr = corr * ((150142 * state1) / 10000 - 78642); 5971 corr /= 100000; 5972 corr2 = (corr * dev_priv->ips.corr); 5973 5974 state2 = (corr2 * state1) / 10000; 5975 state2 /= 100; /* convert to mW */ 5976 5977 __i915_update_gfx_val(dev_priv); 5978 5979 return dev_priv->ips.gfx_power + state2; 5980} 5981 5982unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) 5983{ 5984 struct drm_device *dev = dev_priv->dev; 5985 unsigned long val; 5986 5987 if (INTEL_INFO(dev)->gen != 5) 5988 return 0; 5989 5990 spin_lock_irq(&mchdev_lock); 5991 5992 val = __i915_gfx_val(dev_priv); 5993 5994 spin_unlock_irq(&mchdev_lock); 5995 5996 return val; 5997} 5998 5999/** 6000 * i915_read_mch_val - return value for IPS use 6001 * 6002 * Calculate and return a value for the IPS driver to use when deciding whether 6003 * we have thermal and power headroom to increase CPU or GPU power budget. 6004 */ 6005unsigned long i915_read_mch_val(void) 6006{ 6007 struct drm_i915_private *dev_priv; 6008 unsigned long chipset_val, graphics_val, ret = 0; 6009 6010 spin_lock_irq(&mchdev_lock); 6011 if (!i915_mch_dev) 6012 goto out_unlock; 6013 dev_priv = i915_mch_dev; 6014 6015 chipset_val = __i915_chipset_val(dev_priv); 6016 graphics_val = __i915_gfx_val(dev_priv); 6017 6018 ret = chipset_val + graphics_val; 6019 6020out_unlock: 6021 spin_unlock_irq(&mchdev_lock); 6022 6023 return ret; 6024} 6025EXPORT_SYMBOL_GPL(i915_read_mch_val); 6026 6027/** 6028 * i915_gpu_raise - raise GPU frequency limit 6029 * 6030 * Raise the limit; IPS indicates we have thermal headroom. 6031 */ 6032bool i915_gpu_raise(void) 6033{ 6034 struct drm_i915_private *dev_priv; 6035 bool ret = true; 6036 6037 spin_lock_irq(&mchdev_lock); 6038 if (!i915_mch_dev) { 6039 ret = false; 6040 goto out_unlock; 6041 } 6042 dev_priv = i915_mch_dev; 6043 6044 if (dev_priv->ips.max_delay > dev_priv->ips.fmax) 6045 dev_priv->ips.max_delay--; 6046 6047out_unlock: 6048 spin_unlock_irq(&mchdev_lock); 6049 6050 return ret; 6051} 6052EXPORT_SYMBOL_GPL(i915_gpu_raise); 6053 6054/** 6055 * i915_gpu_lower - lower GPU frequency limit 6056 * 6057 * IPS indicates we're close to a thermal limit, so throttle back the GPU 6058 * frequency maximum. 6059 */ 6060bool i915_gpu_lower(void) 6061{ 6062 struct drm_i915_private *dev_priv; 6063 bool ret = true; 6064 6065 spin_lock_irq(&mchdev_lock); 6066 if (!i915_mch_dev) { 6067 ret = false; 6068 goto out_unlock; 6069 } 6070 dev_priv = i915_mch_dev; 6071 6072 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) 6073 dev_priv->ips.max_delay++; 6074 6075out_unlock: 6076 spin_unlock_irq(&mchdev_lock); 6077 6078 return ret; 6079} 6080EXPORT_SYMBOL_GPL(i915_gpu_lower); 6081 6082/** 6083 * i915_gpu_busy - indicate GPU business to IPS 6084 * 6085 * Tell the IPS driver whether or not the GPU is busy. 6086 */ 6087bool i915_gpu_busy(void) 6088{ 6089 struct drm_i915_private *dev_priv; 6090 struct intel_engine_cs *engine; 6091 bool ret = false; 6092 6093 spin_lock_irq(&mchdev_lock); 6094 if (!i915_mch_dev) 6095 goto out_unlock; 6096 dev_priv = i915_mch_dev; 6097 6098 for_each_engine(engine, dev_priv) 6099 ret |= !list_empty(&engine->request_list); 6100 6101out_unlock: 6102 spin_unlock_irq(&mchdev_lock); 6103 6104 return ret; 6105} 6106EXPORT_SYMBOL_GPL(i915_gpu_busy); 6107 6108/** 6109 * i915_gpu_turbo_disable - disable graphics turbo 6110 * 6111 * Disable graphics turbo by resetting the max frequency and setting the 6112 * current frequency to the default. 6113 */ 6114bool i915_gpu_turbo_disable(void) 6115{ 6116 struct drm_i915_private *dev_priv; 6117 bool ret = true; 6118 6119 spin_lock_irq(&mchdev_lock); 6120 if (!i915_mch_dev) { 6121 ret = false; 6122 goto out_unlock; 6123 } 6124 dev_priv = i915_mch_dev; 6125 6126 dev_priv->ips.max_delay = dev_priv->ips.fstart; 6127 6128 if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) 6129 ret = false; 6130 6131out_unlock: 6132 spin_unlock_irq(&mchdev_lock); 6133 6134 return ret; 6135} 6136EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 6137 6138/** 6139 * Tells the intel_ips driver that the i915 driver is now loaded, if 6140 * IPS got loaded first. 6141 * 6142 * This awkward dance is so that neither module has to depend on the 6143 * other in order for IPS to do the appropriate communication of 6144 * GPU turbo limits to i915. 6145 */ 6146static void 6147ips_ping_for_i915_load(void) 6148{ 6149 void (*link)(void); 6150 6151 link = symbol_get(ips_link_to_i915_driver); 6152 if (link) { 6153 link(); 6154 symbol_put(ips_link_to_i915_driver); 6155 } 6156} 6157 6158void intel_gpu_ips_init(struct drm_i915_private *dev_priv) 6159{ 6160 /* We only register the i915 ips part with intel-ips once everything is 6161 * set up, to avoid intel-ips sneaking in and reading bogus values. */ 6162 spin_lock_irq(&mchdev_lock); 6163 i915_mch_dev = dev_priv; 6164 spin_unlock_irq(&mchdev_lock); 6165 6166 ips_ping_for_i915_load(); 6167} 6168 6169void intel_gpu_ips_teardown(void) 6170{ 6171 spin_lock_irq(&mchdev_lock); 6172 i915_mch_dev = NULL; 6173 spin_unlock_irq(&mchdev_lock); 6174} 6175 6176static void intel_init_emon(struct drm_device *dev) 6177{ 6178 struct drm_i915_private *dev_priv = dev->dev_private; 6179 u32 lcfuse; 6180 u8 pxw[16]; 6181 int i; 6182 6183 /* Disable to program */ 6184 I915_WRITE(ECR, 0); 6185 POSTING_READ(ECR); 6186 6187 /* Program energy weights for various events */ 6188 I915_WRITE(SDEW, 0x15040d00); 6189 I915_WRITE(CSIEW0, 0x007f0000); 6190 I915_WRITE(CSIEW1, 0x1e220004); 6191 I915_WRITE(CSIEW2, 0x04000004); 6192 6193 for (i = 0; i < 5; i++) 6194 I915_WRITE(PEW(i), 0); 6195 for (i = 0; i < 3; i++) 6196 I915_WRITE(DEW(i), 0); 6197 6198 /* Program P-state weights to account for frequency power adjustment */ 6199 for (i = 0; i < 16; i++) { 6200 u32 pxvidfreq = I915_READ(PXVFREQ(i)); 6201 unsigned long freq = intel_pxfreq(pxvidfreq); 6202 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> 6203 PXVFREQ_PX_SHIFT; 6204 unsigned long val; 6205 6206 val = vid * vid; 6207 val *= (freq / 1000); 6208 val *= 255; 6209 val /= (127*127*900); 6210 if (val > 0xff) 6211 DRM_ERROR("bad pxval: %ld\n", val); 6212 pxw[i] = val; 6213 } 6214 /* Render standby states get 0 weight */ 6215 pxw[14] = 0; 6216 pxw[15] = 0; 6217 6218 for (i = 0; i < 4; i++) { 6219 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | 6220 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); 6221 I915_WRITE(PXW(i), val); 6222 } 6223 6224 /* Adjust magic regs to magic values (more experimental results) */ 6225 I915_WRITE(OGW0, 0); 6226 I915_WRITE(OGW1, 0); 6227 I915_WRITE(EG0, 0x00007f00); 6228 I915_WRITE(EG1, 0x0000000e); 6229 I915_WRITE(EG2, 0x000e0000); 6230 I915_WRITE(EG3, 0x68000300); 6231 I915_WRITE(EG4, 0x42000000); 6232 I915_WRITE(EG5, 0x00140031); 6233 I915_WRITE(EG6, 0); 6234 I915_WRITE(EG7, 0); 6235 6236 for (i = 0; i < 8; i++) 6237 I915_WRITE(PXWL(i), 0); 6238 6239 /* Enable PMON + select events */ 6240 I915_WRITE(ECR, 0x80000019); 6241 6242 lcfuse = I915_READ(LCFUSE02); 6243 6244 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); 6245} 6246 6247void intel_init_gt_powersave(struct drm_device *dev) 6248{ 6249 struct drm_i915_private *dev_priv = dev->dev_private; 6250 6251 /* 6252 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a 6253 * requirement. 6254 */ 6255 if (!i915.enable_rc6) { 6256 DRM_INFO("RC6 disabled, disabling runtime PM support\n"); 6257 intel_runtime_pm_get(dev_priv); 6258 } 6259 6260 if (IS_CHERRYVIEW(dev)) 6261 cherryview_init_gt_powersave(dev); 6262 else if (IS_VALLEYVIEW(dev)) 6263 valleyview_init_gt_powersave(dev); 6264} 6265 6266void intel_cleanup_gt_powersave(struct drm_device *dev) 6267{ 6268 struct drm_i915_private *dev_priv = dev->dev_private; 6269 6270 if (IS_CHERRYVIEW(dev)) 6271 return; 6272 else if (IS_VALLEYVIEW(dev)) 6273 valleyview_cleanup_gt_powersave(dev); 6274 6275 if (!i915.enable_rc6) 6276 intel_runtime_pm_put(dev_priv); 6277} 6278 6279static void gen6_suspend_rps(struct drm_device *dev) 6280{ 6281 struct drm_i915_private *dev_priv = dev->dev_private; 6282 6283 flush_delayed_work(&dev_priv->rps.delayed_resume_work); 6284 6285 gen6_disable_rps_interrupts(dev); 6286} 6287 6288/** 6289 * intel_suspend_gt_powersave - suspend PM work and helper threads 6290 * @dev: drm device 6291 * 6292 * We don't want to disable RC6 or other features here, we just want 6293 * to make sure any work we've queued has finished and won't bother 6294 * us while we're suspended. 6295 */ 6296void intel_suspend_gt_powersave(struct drm_device *dev) 6297{ 6298 struct drm_i915_private *dev_priv = dev->dev_private; 6299 6300 if (INTEL_INFO(dev)->gen < 6) 6301 return; 6302 6303 gen6_suspend_rps(dev); 6304 6305 /* Force GPU to min freq during suspend */ 6306 gen6_rps_idle(dev_priv); 6307} 6308 6309void intel_disable_gt_powersave(struct drm_device *dev) 6310{ 6311 struct drm_i915_private *dev_priv = dev->dev_private; 6312 6313 if (IS_IRONLAKE_M(dev)) { 6314 ironlake_disable_drps(dev); 6315 } else if (INTEL_INFO(dev)->gen >= 6) { 6316 intel_suspend_gt_powersave(dev); 6317 6318 mutex_lock(&dev_priv->rps.hw_lock); 6319 if (INTEL_INFO(dev)->gen >= 9) { 6320 gen9_disable_rc6(dev); 6321 gen9_disable_rps(dev); 6322 } else if (IS_CHERRYVIEW(dev)) 6323 cherryview_disable_rps(dev); 6324 else if (IS_VALLEYVIEW(dev)) 6325 valleyview_disable_rps(dev); 6326 else 6327 gen6_disable_rps(dev); 6328 6329 dev_priv->rps.enabled = false; 6330 mutex_unlock(&dev_priv->rps.hw_lock); 6331 } 6332} 6333 6334static void intel_gen6_powersave_work(struct work_struct *work) 6335{ 6336 struct drm_i915_private *dev_priv = 6337 container_of(work, struct drm_i915_private, 6338 rps.delayed_resume_work.work); 6339 struct drm_device *dev = dev_priv->dev; 6340 6341 mutex_lock(&dev_priv->rps.hw_lock); 6342 6343 gen6_reset_rps_interrupts(dev); 6344 6345 if (IS_CHERRYVIEW(dev)) { 6346 cherryview_enable_rps(dev); 6347 } else if (IS_VALLEYVIEW(dev)) { 6348 valleyview_enable_rps(dev); 6349 } else if (INTEL_INFO(dev)->gen >= 9) { 6350 gen9_enable_rc6(dev); 6351 gen9_enable_rps(dev); 6352 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) 6353 __gen6_update_ring_freq(dev); 6354 } else if (IS_BROADWELL(dev)) { 6355 gen8_enable_rps(dev); 6356 __gen6_update_ring_freq(dev); 6357 } else { 6358 gen6_enable_rps(dev); 6359 __gen6_update_ring_freq(dev); 6360 } 6361 6362 WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); 6363 WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); 6364 6365 WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); 6366 WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); 6367 6368 dev_priv->rps.enabled = true; 6369 6370 gen6_enable_rps_interrupts(dev); 6371 6372 mutex_unlock(&dev_priv->rps.hw_lock); 6373 6374 intel_runtime_pm_put(dev_priv); 6375} 6376 6377void intel_enable_gt_powersave(struct drm_device *dev) 6378{ 6379 struct drm_i915_private *dev_priv = dev->dev_private; 6380 6381 /* Powersaving is controlled by the host when inside a VM */ 6382 if (intel_vgpu_active(dev)) 6383 return; 6384 6385 if (IS_IRONLAKE_M(dev)) { 6386 ironlake_enable_drps(dev); 6387 mutex_lock(&dev->struct_mutex); 6388 intel_init_emon(dev); 6389 mutex_unlock(&dev->struct_mutex); 6390 } else if (INTEL_INFO(dev)->gen >= 6) { 6391 /* 6392 * PCU communication is slow and this doesn't need to be 6393 * done at any specific time, so do this out of our fast path 6394 * to make resume and init faster. 6395 * 6396 * We depend on the HW RC6 power context save/restore 6397 * mechanism when entering D3 through runtime PM suspend. So 6398 * disable RPM until RPS/RC6 is properly setup. We can only 6399 * get here via the driver load/system resume/runtime resume 6400 * paths, so the _noresume version is enough (and in case of 6401 * runtime resume it's necessary). 6402 */ 6403 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, 6404 round_jiffies_up_relative(HZ))) 6405 intel_runtime_pm_get_noresume(dev_priv); 6406 } 6407} 6408 6409void intel_reset_gt_powersave(struct drm_device *dev) 6410{ 6411 struct drm_i915_private *dev_priv = dev->dev_private; 6412 6413 if (INTEL_INFO(dev)->gen < 6) 6414 return; 6415 6416 gen6_suspend_rps(dev); 6417 dev_priv->rps.enabled = false; 6418} 6419 6420static void ibx_init_clock_gating(struct drm_device *dev) 6421{ 6422 struct drm_i915_private *dev_priv = dev->dev_private; 6423 6424 /* 6425 * On Ibex Peak and Cougar Point, we need to disable clock 6426 * gating for the panel power sequencer or it will fail to 6427 * start up when no ports are active. 6428 */ 6429 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); 6430} 6431 6432static void g4x_disable_trickle_feed(struct drm_device *dev) 6433{ 6434 struct drm_i915_private *dev_priv = dev->dev_private; 6435 enum pipe pipe; 6436 6437 for_each_pipe(dev_priv, pipe) { 6438 I915_WRITE(DSPCNTR(pipe), 6439 I915_READ(DSPCNTR(pipe)) | 6440 DISPPLANE_TRICKLE_FEED_DISABLE); 6441 6442 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); 6443 POSTING_READ(DSPSURF(pipe)); 6444 } 6445} 6446 6447static void ilk_init_lp_watermarks(struct drm_device *dev) 6448{ 6449 struct drm_i915_private *dev_priv = dev->dev_private; 6450 6451 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); 6452 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); 6453 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); 6454 6455 /* 6456 * Don't touch WM1S_LP_EN here. 6457 * Doing so could cause underruns. 6458 */ 6459} 6460 6461static void ironlake_init_clock_gating(struct drm_device *dev) 6462{ 6463 struct drm_i915_private *dev_priv = dev->dev_private; 6464 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 6465 6466 /* 6467 * Required for FBC 6468 * WaFbcDisableDpfcClockGating:ilk 6469 */ 6470 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | 6471 ILK_DPFCUNIT_CLOCK_GATE_DISABLE | 6472 ILK_DPFDUNIT_CLOCK_GATE_ENABLE; 6473 6474 I915_WRITE(PCH_3DCGDIS0, 6475 MARIUNIT_CLOCK_GATE_DISABLE | 6476 SVSMUNIT_CLOCK_GATE_DISABLE); 6477 I915_WRITE(PCH_3DCGDIS1, 6478 VFMUNIT_CLOCK_GATE_DISABLE); 6479 6480 /* 6481 * According to the spec the following bits should be set in 6482 * order to enable memory self-refresh 6483 * The bit 22/21 of 0x42004 6484 * The bit 5 of 0x42020 6485 * The bit 15 of 0x45000 6486 */ 6487 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6488 (I915_READ(ILK_DISPLAY_CHICKEN2) | 6489 ILK_DPARB_GATE | ILK_VSDPFD_FULL)); 6490 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; 6491 I915_WRITE(DISP_ARB_CTL, 6492 (I915_READ(DISP_ARB_CTL) | 6493 DISP_FBC_WM_DIS)); 6494 6495 ilk_init_lp_watermarks(dev); 6496 6497 /* 6498 * Based on the document from hardware guys the following bits 6499 * should be set unconditionally in order to enable FBC. 6500 * The bit 22 of 0x42000 6501 * The bit 22 of 0x42004 6502 * The bit 7,8,9 of 0x42020. 6503 */ 6504 if (IS_IRONLAKE_M(dev)) { 6505 /* WaFbcAsynchFlipDisableFbcQueue:ilk */ 6506 I915_WRITE(ILK_DISPLAY_CHICKEN1, 6507 I915_READ(ILK_DISPLAY_CHICKEN1) | 6508 ILK_FBCQ_DIS); 6509 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6510 I915_READ(ILK_DISPLAY_CHICKEN2) | 6511 ILK_DPARB_GATE); 6512 } 6513 6514 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 6515 6516 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6517 I915_READ(ILK_DISPLAY_CHICKEN2) | 6518 ILK_ELPIN_409_SELECT); 6519 I915_WRITE(_3D_CHICKEN2, 6520 _3D_CHICKEN2_WM_READ_PIPELINED << 16 | 6521 _3D_CHICKEN2_WM_READ_PIPELINED); 6522 6523 /* WaDisableRenderCachePipelinedFlush:ilk */ 6524 I915_WRITE(CACHE_MODE_0, 6525 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 6526 6527 /* WaDisable_RenderCache_OperationalFlush:ilk */ 6528 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6529 6530 g4x_disable_trickle_feed(dev); 6531 6532 ibx_init_clock_gating(dev); 6533} 6534 6535static void cpt_init_clock_gating(struct drm_device *dev) 6536{ 6537 struct drm_i915_private *dev_priv = dev->dev_private; 6538 int pipe; 6539 uint32_t val; 6540 6541 /* 6542 * On Ibex Peak and Cougar Point, we need to disable clock 6543 * gating for the panel power sequencer or it will fail to 6544 * start up when no ports are active. 6545 */ 6546 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | 6547 PCH_DPLUNIT_CLOCK_GATE_DISABLE | 6548 PCH_CPUNIT_CLOCK_GATE_DISABLE); 6549 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | 6550 DPLS_EDP_PPS_FIX_DIS); 6551 /* The below fixes the weird display corruption, a few pixels shifted 6552 * downward, on (only) LVDS of some HP laptops with IVY. 6553 */ 6554 for_each_pipe(dev_priv, pipe) { 6555 val = I915_READ(TRANS_CHICKEN2(pipe)); 6556 val |= TRANS_CHICKEN2_TIMING_OVERRIDE; 6557 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 6558 if (dev_priv->vbt.fdi_rx_polarity_inverted) 6559 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 6560 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; 6561 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; 6562 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; 6563 I915_WRITE(TRANS_CHICKEN2(pipe), val); 6564 } 6565 /* WADP0ClockGatingDisable */ 6566 for_each_pipe(dev_priv, pipe) { 6567 I915_WRITE(TRANS_CHICKEN1(pipe), 6568 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 6569 } 6570} 6571 6572static void gen6_check_mch_setup(struct drm_device *dev) 6573{ 6574 struct drm_i915_private *dev_priv = dev->dev_private; 6575 uint32_t tmp; 6576 6577 tmp = I915_READ(MCH_SSKPD); 6578 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) 6579 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", 6580 tmp); 6581} 6582 6583static void gen6_init_clock_gating(struct drm_device *dev) 6584{ 6585 struct drm_i915_private *dev_priv = dev->dev_private; 6586 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 6587 6588 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 6589 6590 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6591 I915_READ(ILK_DISPLAY_CHICKEN2) | 6592 ILK_ELPIN_409_SELECT); 6593 6594 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 6595 I915_WRITE(_3D_CHICKEN, 6596 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); 6597 6598 /* WaDisable_RenderCache_OperationalFlush:snb */ 6599 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6600 6601 /* 6602 * BSpec recoomends 8x4 when MSAA is used, 6603 * however in practice 16x4 seems fastest. 6604 * 6605 * Note that PS/WM thread counts depend on the WIZ hashing 6606 * disable bit, which we don't touch here, but it's good 6607 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6608 */ 6609 I915_WRITE(GEN6_GT_MODE, 6610 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6611 6612 ilk_init_lp_watermarks(dev); 6613 6614 I915_WRITE(CACHE_MODE_0, 6615 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 6616 6617 I915_WRITE(GEN6_UCGCTL1, 6618 I915_READ(GEN6_UCGCTL1) | 6619 GEN6_BLBUNIT_CLOCK_GATE_DISABLE | 6620 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 6621 6622 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock 6623 * gating disable must be set. Failure to set it results in 6624 * flickering pixels due to Z write ordering failures after 6625 * some amount of runtime in the Mesa "fire" demo, and Unigine 6626 * Sanctuary and Tropics, and apparently anything else with 6627 * alpha test or pixel discard. 6628 * 6629 * According to the spec, bit 11 (RCCUNIT) must also be set, 6630 * but we didn't debug actual testcases to find it out. 6631 * 6632 * WaDisableRCCUnitClockGating:snb 6633 * WaDisableRCPBUnitClockGating:snb 6634 */ 6635 I915_WRITE(GEN6_UCGCTL2, 6636 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | 6637 GEN6_RCCUNIT_CLOCK_GATE_DISABLE); 6638 6639 /* WaStripsFansDisableFastClipPerformanceFix:snb */ 6640 I915_WRITE(_3D_CHICKEN3, 6641 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); 6642 6643 /* 6644 * Bspec says: 6645 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and 6646 * 3DSTATE_SF number of SF output attributes is more than 16." 6647 */ 6648 I915_WRITE(_3D_CHICKEN3, 6649 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); 6650 6651 /* 6652 * According to the spec the following bits should be 6653 * set in order to enable memory self-refresh and fbc: 6654 * The bit21 and bit22 of 0x42000 6655 * The bit21 and bit22 of 0x42004 6656 * The bit5 and bit7 of 0x42020 6657 * The bit14 of 0x70180 6658 * The bit14 of 0x71180 6659 * 6660 * WaFbcAsynchFlipDisableFbcQueue:snb 6661 */ 6662 I915_WRITE(ILK_DISPLAY_CHICKEN1, 6663 I915_READ(ILK_DISPLAY_CHICKEN1) | 6664 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); 6665 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6666 I915_READ(ILK_DISPLAY_CHICKEN2) | 6667 ILK_DPARB_GATE | ILK_VSDPFD_FULL); 6668 I915_WRITE(ILK_DSPCLK_GATE_D, 6669 I915_READ(ILK_DSPCLK_GATE_D) | 6670 ILK_DPARBUNIT_CLOCK_GATE_ENABLE | 6671 ILK_DPFDUNIT_CLOCK_GATE_ENABLE); 6672 6673 g4x_disable_trickle_feed(dev); 6674 6675 cpt_init_clock_gating(dev); 6676 6677 gen6_check_mch_setup(dev); 6678} 6679 6680static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) 6681{ 6682 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); 6683 6684 /* 6685 * WaVSThreadDispatchOverride:ivb,vlv 6686 * 6687 * This actually overrides the dispatch 6688 * mode for all thread types. 6689 */ 6690 reg &= ~GEN7_FF_SCHED_MASK; 6691 reg |= GEN7_FF_TS_SCHED_HW; 6692 reg |= GEN7_FF_VS_SCHED_HW; 6693 reg |= GEN7_FF_DS_SCHED_HW; 6694 6695 I915_WRITE(GEN7_FF_THREAD_MODE, reg); 6696} 6697 6698static void lpt_init_clock_gating(struct drm_device *dev) 6699{ 6700 struct drm_i915_private *dev_priv = dev->dev_private; 6701 6702 /* 6703 * TODO: this bit should only be enabled when really needed, then 6704 * disabled when not needed anymore in order to save power. 6705 */ 6706 if (HAS_PCH_LPT_LP(dev)) 6707 I915_WRITE(SOUTH_DSPCLK_GATE_D, 6708 I915_READ(SOUTH_DSPCLK_GATE_D) | 6709 PCH_LP_PARTITION_LEVEL_DISABLE); 6710 6711 /* WADPOClockGatingDisable:hsw */ 6712 I915_WRITE(TRANS_CHICKEN1(PIPE_A), 6713 I915_READ(TRANS_CHICKEN1(PIPE_A)) | 6714 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 6715} 6716 6717static void lpt_suspend_hw(struct drm_device *dev) 6718{ 6719 struct drm_i915_private *dev_priv = dev->dev_private; 6720 6721 if (HAS_PCH_LPT_LP(dev)) { 6722 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); 6723 6724 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; 6725 I915_WRITE(SOUTH_DSPCLK_GATE_D, val); 6726 } 6727} 6728 6729static void kabylake_init_clock_gating(struct drm_device *dev) 6730{ 6731 struct drm_i915_private *dev_priv = dev->dev_private; 6732 6733 gen9_init_clock_gating(dev); 6734 6735 /* WaDisableSDEUnitClockGating:kbl */ 6736 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 6737 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 6738 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 6739 6740 /* WaDisableGamClockGating:kbl */ 6741 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 6742 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 6743 GEN6_GAMUNIT_CLOCK_GATE_DISABLE); 6744 6745 /* WaFbcNukeOnHostModify:kbl */ 6746 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 6747 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 6748} 6749 6750static void skylake_init_clock_gating(struct drm_device *dev) 6751{ 6752 struct drm_i915_private *dev_priv = dev->dev_private; 6753 6754 gen9_init_clock_gating(dev); 6755 6756 /* WaFbcNukeOnHostModify:skl */ 6757 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 6758 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 6759} 6760 6761static void broadwell_init_clock_gating(struct drm_device *dev) 6762{ 6763 struct drm_i915_private *dev_priv = dev->dev_private; 6764 enum pipe pipe; 6765 uint32_t misccpctl; 6766 6767 ilk_init_lp_watermarks(dev); 6768 6769 /* WaSwitchSolVfFArbitrationPriority:bdw */ 6770 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 6771 6772 /* WaPsrDPAMaskVBlankInSRD:bdw */ 6773 I915_WRITE(CHICKEN_PAR1_1, 6774 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); 6775 6776 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ 6777 for_each_pipe(dev_priv, pipe) { 6778 I915_WRITE(CHICKEN_PIPESL_1(pipe), 6779 I915_READ(CHICKEN_PIPESL_1(pipe)) | 6780 BDW_DPRS_MASK_VBLANK_SRD); 6781 } 6782 6783 /* WaVSRefCountFullforceMissDisable:bdw */ 6784 /* WaDSRefCountFullforceMissDisable:bdw */ 6785 I915_WRITE(GEN7_FF_THREAD_MODE, 6786 I915_READ(GEN7_FF_THREAD_MODE) & 6787 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 6788 6789 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 6790 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 6791 6792 /* WaDisableSDEUnitClockGating:bdw */ 6793 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 6794 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 6795 6796 /* 6797 * WaProgramL3SqcReg1Default:bdw 6798 * WaTempDisableDOPClkGating:bdw 6799 */ 6800 misccpctl = I915_READ(GEN7_MISCCPCTL); 6801 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 6802 I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT); 6803 /* 6804 * Wait at least 100 clocks before re-enabling clock gating. See 6805 * the definition of L3SQCREG1 in BSpec. 6806 */ 6807 POSTING_READ(GEN8_L3SQCREG1); 6808 udelay(1); 6809 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 6810 6811 /* 6812 * WaGttCachingOffByDefault:bdw 6813 * GTT cache may not work with big pages, so if those 6814 * are ever enabled GTT cache may need to be disabled. 6815 */ 6816 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 6817 6818 lpt_init_clock_gating(dev); 6819} 6820 6821static void haswell_init_clock_gating(struct drm_device *dev) 6822{ 6823 struct drm_i915_private *dev_priv = dev->dev_private; 6824 6825 ilk_init_lp_watermarks(dev); 6826 6827 /* L3 caching of data atomics doesn't work -- disable it. */ 6828 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 6829 I915_WRITE(HSW_ROW_CHICKEN3, 6830 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); 6831 6832 /* This is required by WaCatErrorRejectionIssue:hsw */ 6833 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 6834 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 6835 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 6836 6837 /* WaVSRefCountFullforceMissDisable:hsw */ 6838 I915_WRITE(GEN7_FF_THREAD_MODE, 6839 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); 6840 6841 /* WaDisable_RenderCache_OperationalFlush:hsw */ 6842 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6843 6844 /* enable HiZ Raw Stall Optimization */ 6845 I915_WRITE(CACHE_MODE_0_GEN7, 6846 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 6847 6848 /* WaDisable4x2SubspanOptimization:hsw */ 6849 I915_WRITE(CACHE_MODE_1, 6850 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 6851 6852 /* 6853 * BSpec recommends 8x4 when MSAA is used, 6854 * however in practice 16x4 seems fastest. 6855 * 6856 * Note that PS/WM thread counts depend on the WIZ hashing 6857 * disable bit, which we don't touch here, but it's good 6858 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6859 */ 6860 I915_WRITE(GEN7_GT_MODE, 6861 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6862 6863 /* WaSampleCChickenBitEnable:hsw */ 6864 I915_WRITE(HALF_SLICE_CHICKEN3, 6865 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); 6866 6867 /* WaSwitchSolVfFArbitrationPriority:hsw */ 6868 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 6869 6870 /* WaRsPkgCStateDisplayPMReq:hsw */ 6871 I915_WRITE(CHICKEN_PAR1_1, 6872 I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); 6873 6874 lpt_init_clock_gating(dev); 6875} 6876 6877static void ivybridge_init_clock_gating(struct drm_device *dev) 6878{ 6879 struct drm_i915_private *dev_priv = dev->dev_private; 6880 uint32_t snpcr; 6881 6882 ilk_init_lp_watermarks(dev); 6883 6884 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); 6885 6886 /* WaDisableEarlyCull:ivb */ 6887 I915_WRITE(_3D_CHICKEN3, 6888 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 6889 6890 /* WaDisableBackToBackFlipFix:ivb */ 6891 I915_WRITE(IVB_CHICKEN3, 6892 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 6893 CHICKEN3_DGMG_DONE_FIX_DISABLE); 6894 6895 /* WaDisablePSDDualDispatchEnable:ivb */ 6896 if (IS_IVB_GT1(dev)) 6897 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 6898 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 6899 6900 /* WaDisable_RenderCache_OperationalFlush:ivb */ 6901 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6902 6903 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 6904 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, 6905 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 6906 6907 /* WaApplyL3ControlAndL3ChickenMode:ivb */ 6908 I915_WRITE(GEN7_L3CNTLREG1, 6909 GEN7_WA_FOR_GEN7_L3_CONTROL); 6910 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, 6911 GEN7_WA_L3_CHICKEN_MODE); 6912 if (IS_IVB_GT1(dev)) 6913 I915_WRITE(GEN7_ROW_CHICKEN2, 6914 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6915 else { 6916 /* must write both registers */ 6917 I915_WRITE(GEN7_ROW_CHICKEN2, 6918 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6919 I915_WRITE(GEN7_ROW_CHICKEN2_GT2, 6920 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6921 } 6922 6923 /* WaForceL3Serialization:ivb */ 6924 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 6925 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 6926 6927 /* 6928 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 6929 * This implements the WaDisableRCZUnitClockGating:ivb workaround. 6930 */ 6931 I915_WRITE(GEN6_UCGCTL2, 6932 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 6933 6934 /* This is required by WaCatErrorRejectionIssue:ivb */ 6935 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 6936 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 6937 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 6938 6939 g4x_disable_trickle_feed(dev); 6940 6941 gen7_setup_fixed_func_scheduler(dev_priv); 6942 6943 if (0) { /* causes HiZ corruption on ivb:gt1 */ 6944 /* enable HiZ Raw Stall Optimization */ 6945 I915_WRITE(CACHE_MODE_0_GEN7, 6946 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 6947 } 6948 6949 /* WaDisable4x2SubspanOptimization:ivb */ 6950 I915_WRITE(CACHE_MODE_1, 6951 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 6952 6953 /* 6954 * BSpec recommends 8x4 when MSAA is used, 6955 * however in practice 16x4 seems fastest. 6956 * 6957 * Note that PS/WM thread counts depend on the WIZ hashing 6958 * disable bit, which we don't touch here, but it's good 6959 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6960 */ 6961 I915_WRITE(GEN7_GT_MODE, 6962 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6963 6964 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); 6965 snpcr &= ~GEN6_MBC_SNPCR_MASK; 6966 snpcr |= GEN6_MBC_SNPCR_MED; 6967 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); 6968 6969 if (!HAS_PCH_NOP(dev)) 6970 cpt_init_clock_gating(dev); 6971 6972 gen6_check_mch_setup(dev); 6973} 6974 6975static void valleyview_init_clock_gating(struct drm_device *dev) 6976{ 6977 struct drm_i915_private *dev_priv = dev->dev_private; 6978 6979 /* WaDisableEarlyCull:vlv */ 6980 I915_WRITE(_3D_CHICKEN3, 6981 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 6982 6983 /* WaDisableBackToBackFlipFix:vlv */ 6984 I915_WRITE(IVB_CHICKEN3, 6985 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 6986 CHICKEN3_DGMG_DONE_FIX_DISABLE); 6987 6988 /* WaPsdDispatchEnable:vlv */ 6989 /* WaDisablePSDDualDispatchEnable:vlv */ 6990 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 6991 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | 6992 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 6993 6994 /* WaDisable_RenderCache_OperationalFlush:vlv */ 6995 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6996 6997 /* WaForceL3Serialization:vlv */ 6998 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 6999 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 7000 7001 /* WaDisableDopClockGating:vlv */ 7002 I915_WRITE(GEN7_ROW_CHICKEN2, 7003 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7004 7005 /* This is required by WaCatErrorRejectionIssue:vlv */ 7006 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7007 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7008 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7009 7010 gen7_setup_fixed_func_scheduler(dev_priv); 7011 7012 /* 7013 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 7014 * This implements the WaDisableRCZUnitClockGating:vlv workaround. 7015 */ 7016 I915_WRITE(GEN6_UCGCTL2, 7017 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 7018 7019 /* WaDisableL3Bank2xClockGate:vlv 7020 * Disabling L3 clock gating- MMIO 940c[25] = 1 7021 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ 7022 I915_WRITE(GEN7_UCGCTL4, 7023 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); 7024 7025 /* 7026 * BSpec says this must be set, even though 7027 * WaDisable4x2SubspanOptimization isn't listed for VLV. 7028 */ 7029 I915_WRITE(CACHE_MODE_1, 7030 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7031 7032 /* 7033 * BSpec recommends 8x4 when MSAA is used, 7034 * however in practice 16x4 seems fastest. 7035 * 7036 * Note that PS/WM thread counts depend on the WIZ hashing 7037 * disable bit, which we don't touch here, but it's good 7038 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7039 */ 7040 I915_WRITE(GEN7_GT_MODE, 7041 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7042 7043 /* 7044 * WaIncreaseL3CreditsForVLVB0:vlv 7045 * This is the hardware default actually. 7046 */ 7047 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 7048 7049 /* 7050 * WaDisableVLVClockGating_VBIIssue:vlv 7051 * Disable clock gating on th GCFG unit to prevent a delay 7052 * in the reporting of vblank events. 7053 */ 7054 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); 7055} 7056 7057static void cherryview_init_clock_gating(struct drm_device *dev) 7058{ 7059 struct drm_i915_private *dev_priv = dev->dev_private; 7060 7061 /* WaVSRefCountFullforceMissDisable:chv */ 7062 /* WaDSRefCountFullforceMissDisable:chv */ 7063 I915_WRITE(GEN7_FF_THREAD_MODE, 7064 I915_READ(GEN7_FF_THREAD_MODE) & 7065 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 7066 7067 /* WaDisableSemaphoreAndSyncFlipWait:chv */ 7068 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 7069 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 7070 7071 /* WaDisableCSUnitClockGating:chv */ 7072 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 7073 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 7074 7075 /* WaDisableSDEUnitClockGating:chv */ 7076 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 7077 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 7078 7079 /* 7080 * GTT cache may not work with big pages, so if those 7081 * are ever enabled GTT cache may need to be disabled. 7082 */ 7083 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 7084} 7085 7086static void g4x_init_clock_gating(struct drm_device *dev) 7087{ 7088 struct drm_i915_private *dev_priv = dev->dev_private; 7089 uint32_t dspclk_gate; 7090 7091 I915_WRITE(RENCLK_GATE_D1, 0); 7092 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | 7093 GS_UNIT_CLOCK_GATE_DISABLE | 7094 CL_UNIT_CLOCK_GATE_DISABLE); 7095 I915_WRITE(RAMCLK_GATE_D, 0); 7096 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | 7097 OVRUNIT_CLOCK_GATE_DISABLE | 7098 OVCUNIT_CLOCK_GATE_DISABLE; 7099 if (IS_GM45(dev)) 7100 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; 7101 I915_WRITE(DSPCLK_GATE_D, dspclk_gate); 7102 7103 /* WaDisableRenderCachePipelinedFlush */ 7104 I915_WRITE(CACHE_MODE_0, 7105 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 7106 7107 /* WaDisable_RenderCache_OperationalFlush:g4x */ 7108 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7109 7110 g4x_disable_trickle_feed(dev); 7111} 7112 7113static void crestline_init_clock_gating(struct drm_device *dev) 7114{ 7115 struct drm_i915_private *dev_priv = dev->dev_private; 7116 7117 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); 7118 I915_WRITE(RENCLK_GATE_D2, 0); 7119 I915_WRITE(DSPCLK_GATE_D, 0); 7120 I915_WRITE(RAMCLK_GATE_D, 0); 7121 I915_WRITE16(DEUC, 0); 7122 I915_WRITE(MI_ARB_STATE, 7123 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7124 7125 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7126 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7127} 7128 7129static void broadwater_init_clock_gating(struct drm_device *dev) 7130{ 7131 struct drm_i915_private *dev_priv = dev->dev_private; 7132 7133 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | 7134 I965_RCC_CLOCK_GATE_DISABLE | 7135 I965_RCPB_CLOCK_GATE_DISABLE | 7136 I965_ISC_CLOCK_GATE_DISABLE | 7137 I965_FBC_CLOCK_GATE_DISABLE); 7138 I915_WRITE(RENCLK_GATE_D2, 0); 7139 I915_WRITE(MI_ARB_STATE, 7140 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7141 7142 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7143 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7144} 7145 7146static void gen3_init_clock_gating(struct drm_device *dev) 7147{ 7148 struct drm_i915_private *dev_priv = dev->dev_private; 7149 u32 dstate = I915_READ(D_STATE); 7150 7151 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | 7152 DSTATE_DOT_CLOCK_GATING; 7153 I915_WRITE(D_STATE, dstate); 7154 7155 if (IS_PINEVIEW(dev)) 7156 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); 7157 7158 /* IIR "flip pending" means done if this bit is set */ 7159 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); 7160 7161 /* interrupts should cause a wake up from C3 */ 7162 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); 7163 7164 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 7165 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 7166 7167 I915_WRITE(MI_ARB_STATE, 7168 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7169} 7170 7171static void i85x_init_clock_gating(struct drm_device *dev) 7172{ 7173 struct drm_i915_private *dev_priv = dev->dev_private; 7174 7175 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); 7176 7177 /* interrupts should cause a wake up from C3 */ 7178 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | 7179 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); 7180 7181 I915_WRITE(MEM_MODE, 7182 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); 7183} 7184 7185static void i830_init_clock_gating(struct drm_device *dev) 7186{ 7187 struct drm_i915_private *dev_priv = dev->dev_private; 7188 7189 I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); 7190 7191 I915_WRITE(MEM_MODE, 7192 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | 7193 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); 7194} 7195 7196void intel_init_clock_gating(struct drm_device *dev) 7197{ 7198 struct drm_i915_private *dev_priv = dev->dev_private; 7199 7200 dev_priv->display.init_clock_gating(dev); 7201} 7202 7203void intel_suspend_hw(struct drm_device *dev) 7204{ 7205 if (HAS_PCH_LPT(dev)) 7206 lpt_suspend_hw(dev); 7207} 7208 7209static void nop_init_clock_gating(struct drm_device *dev) 7210{ 7211 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n"); 7212} 7213 7214/** 7215 * intel_init_clock_gating_hooks - setup the clock gating hooks 7216 * @dev_priv: device private 7217 * 7218 * Setup the hooks that configure which clocks of a given platform can be 7219 * gated and also apply various GT and display specific workarounds for these 7220 * platforms. Note that some GT specific workarounds are applied separately 7221 * when GPU contexts or batchbuffers start their execution. 7222 */ 7223void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) 7224{ 7225 if (IS_SKYLAKE(dev_priv)) 7226 dev_priv->display.init_clock_gating = skylake_init_clock_gating; 7227 else if (IS_KABYLAKE(dev_priv)) 7228 dev_priv->display.init_clock_gating = kabylake_init_clock_gating; 7229 else if (IS_BROXTON(dev_priv)) 7230 dev_priv->display.init_clock_gating = bxt_init_clock_gating; 7231 else if (IS_BROADWELL(dev_priv)) 7232 dev_priv->display.init_clock_gating = broadwell_init_clock_gating; 7233 else if (IS_CHERRYVIEW(dev_priv)) 7234 dev_priv->display.init_clock_gating = cherryview_init_clock_gating; 7235 else if (IS_HASWELL(dev_priv)) 7236 dev_priv->display.init_clock_gating = haswell_init_clock_gating; 7237 else if (IS_IVYBRIDGE(dev_priv)) 7238 dev_priv->display.init_clock_gating = ivybridge_init_clock_gating; 7239 else if (IS_VALLEYVIEW(dev_priv)) 7240 dev_priv->display.init_clock_gating = valleyview_init_clock_gating; 7241 else if (IS_GEN6(dev_priv)) 7242 dev_priv->display.init_clock_gating = gen6_init_clock_gating; 7243 else if (IS_GEN5(dev_priv)) 7244 dev_priv->display.init_clock_gating = ironlake_init_clock_gating; 7245 else if (IS_G4X(dev_priv)) 7246 dev_priv->display.init_clock_gating = g4x_init_clock_gating; 7247 else if (IS_CRESTLINE(dev_priv)) 7248 dev_priv->display.init_clock_gating = crestline_init_clock_gating; 7249 else if (IS_BROADWATER(dev_priv)) 7250 dev_priv->display.init_clock_gating = broadwater_init_clock_gating; 7251 else if (IS_GEN3(dev_priv)) 7252 dev_priv->display.init_clock_gating = gen3_init_clock_gating; 7253 else if (IS_I85X(dev_priv) || IS_I865G(dev_priv)) 7254 dev_priv->display.init_clock_gating = i85x_init_clock_gating; 7255 else if (IS_GEN2(dev_priv)) 7256 dev_priv->display.init_clock_gating = i830_init_clock_gating; 7257 else { 7258 MISSING_CASE(INTEL_DEVID(dev_priv)); 7259 dev_priv->display.init_clock_gating = nop_init_clock_gating; 7260 } 7261} 7262 7263/* Set up chip specific power management-related functions */ 7264void intel_init_pm(struct drm_device *dev) 7265{ 7266 struct drm_i915_private *dev_priv = dev->dev_private; 7267 7268 intel_fbc_init(dev_priv); 7269 7270 /* For cxsr */ 7271 if (IS_PINEVIEW(dev)) 7272 i915_pineview_get_mem_freq(dev); 7273 else if (IS_GEN5(dev)) 7274 i915_ironlake_get_mem_freq(dev); 7275 7276 /* For FIFO watermark updates */ 7277 if (INTEL_INFO(dev)->gen >= 9) { 7278 skl_setup_wm_latency(dev); 7279 dev_priv->display.update_wm = skl_update_wm; 7280 } else if (HAS_PCH_SPLIT(dev)) { 7281 ilk_setup_wm_latency(dev); 7282 7283 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] && 7284 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) || 7285 (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] && 7286 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) { 7287 dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm; 7288 dev_priv->display.compute_intermediate_wm = 7289 ilk_compute_intermediate_wm; 7290 dev_priv->display.initial_watermarks = 7291 ilk_initial_watermarks; 7292 dev_priv->display.optimize_watermarks = 7293 ilk_optimize_watermarks; 7294 } else { 7295 DRM_DEBUG_KMS("Failed to read display plane latency. " 7296 "Disable CxSR\n"); 7297 } 7298 } else if (IS_CHERRYVIEW(dev)) { 7299 vlv_setup_wm_latency(dev); 7300 dev_priv->display.update_wm = vlv_update_wm; 7301 } else if (IS_VALLEYVIEW(dev)) { 7302 vlv_setup_wm_latency(dev); 7303 dev_priv->display.update_wm = vlv_update_wm; 7304 } else if (IS_PINEVIEW(dev)) { 7305 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev), 7306 dev_priv->is_ddr3, 7307 dev_priv->fsb_freq, 7308 dev_priv->mem_freq)) { 7309 DRM_INFO("failed to find known CxSR latency " 7310 "(found ddr%s fsb freq %d, mem freq %d), " 7311 "disabling CxSR\n", 7312 (dev_priv->is_ddr3 == 1) ? "3" : "2", 7313 dev_priv->fsb_freq, dev_priv->mem_freq); 7314 /* Disable CxSR and never update its watermark again */ 7315 intel_set_memory_cxsr(dev_priv, false); 7316 dev_priv->display.update_wm = NULL; 7317 } else 7318 dev_priv->display.update_wm = pineview_update_wm; 7319 } else if (IS_G4X(dev)) { 7320 dev_priv->display.update_wm = g4x_update_wm; 7321 } else if (IS_GEN4(dev)) { 7322 dev_priv->display.update_wm = i965_update_wm; 7323 } else if (IS_GEN3(dev)) { 7324 dev_priv->display.update_wm = i9xx_update_wm; 7325 dev_priv->display.get_fifo_size = i9xx_get_fifo_size; 7326 } else if (IS_GEN2(dev)) { 7327 if (INTEL_INFO(dev)->num_pipes == 1) { 7328 dev_priv->display.update_wm = i845_update_wm; 7329 dev_priv->display.get_fifo_size = i845_get_fifo_size; 7330 } else { 7331 dev_priv->display.update_wm = i9xx_update_wm; 7332 dev_priv->display.get_fifo_size = i830_get_fifo_size; 7333 } 7334 } else { 7335 DRM_ERROR("unexpected fall-through in intel_init_pm\n"); 7336 } 7337} 7338 7339int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) 7340{ 7341 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7342 7343 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 7344 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); 7345 return -EAGAIN; 7346 } 7347 7348 I915_WRITE(GEN6_PCODE_DATA, *val); 7349 I915_WRITE(GEN6_PCODE_DATA1, 0); 7350 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 7351 7352 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, 7353 500)) { 7354 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); 7355 return -ETIMEDOUT; 7356 } 7357 7358 *val = I915_READ(GEN6_PCODE_DATA); 7359 I915_WRITE(GEN6_PCODE_DATA, 0); 7360 7361 return 0; 7362} 7363 7364int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val) 7365{ 7366 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7367 7368 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 7369 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); 7370 return -EAGAIN; 7371 } 7372 7373 I915_WRITE(GEN6_PCODE_DATA, val); 7374 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 7375 7376 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, 7377 500)) { 7378 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); 7379 return -ETIMEDOUT; 7380 } 7381 7382 I915_WRITE(GEN6_PCODE_DATA, 0); 7383 7384 return 0; 7385} 7386 7387static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) 7388{ 7389 /* 7390 * N = val - 0xb7 7391 * Slow = Fast = GPLL ref * N 7392 */ 7393 return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000); 7394} 7395 7396static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) 7397{ 7398 return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7; 7399} 7400 7401static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) 7402{ 7403 /* 7404 * N = val / 2 7405 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 7406 */ 7407 return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000); 7408} 7409 7410static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) 7411{ 7412 /* CHV needs even values */ 7413 return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2; 7414} 7415 7416int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) 7417{ 7418 if (IS_GEN9(dev_priv)) 7419 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 7420 GEN9_FREQ_SCALER); 7421 else if (IS_CHERRYVIEW(dev_priv)) 7422 return chv_gpu_freq(dev_priv, val); 7423 else if (IS_VALLEYVIEW(dev_priv)) 7424 return byt_gpu_freq(dev_priv, val); 7425 else 7426 return val * GT_FREQUENCY_MULTIPLIER; 7427} 7428 7429int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) 7430{ 7431 if (IS_GEN9(dev_priv)) 7432 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 7433 GT_FREQUENCY_MULTIPLIER); 7434 else if (IS_CHERRYVIEW(dev_priv)) 7435 return chv_freq_opcode(dev_priv, val); 7436 else if (IS_VALLEYVIEW(dev_priv)) 7437 return byt_freq_opcode(dev_priv, val); 7438 else 7439 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 7440} 7441 7442struct request_boost { 7443 struct work_struct work; 7444 struct drm_i915_gem_request *req; 7445}; 7446 7447static void __intel_rps_boost_work(struct work_struct *work) 7448{ 7449 struct request_boost *boost = container_of(work, struct request_boost, work); 7450 struct drm_i915_gem_request *req = boost->req; 7451 7452 if (!i915_gem_request_completed(req, true)) 7453 gen6_rps_boost(to_i915(req->engine->dev), NULL, 7454 req->emitted_jiffies); 7455 7456 i915_gem_request_unreference__unlocked(req); 7457 kfree(boost); 7458} 7459 7460void intel_queue_rps_boost_for_request(struct drm_device *dev, 7461 struct drm_i915_gem_request *req) 7462{ 7463 struct request_boost *boost; 7464 7465 if (req == NULL || INTEL_INFO(dev)->gen < 6) 7466 return; 7467 7468 if (i915_gem_request_completed(req, true)) 7469 return; 7470 7471 boost = kmalloc(sizeof(*boost), GFP_ATOMIC); 7472 if (boost == NULL) 7473 return; 7474 7475 i915_gem_request_reference(req); 7476 boost->req = req; 7477 7478 INIT_WORK(&boost->work, __intel_rps_boost_work); 7479 queue_work(to_i915(dev)->wq, &boost->work); 7480} 7481 7482void intel_pm_setup(struct drm_device *dev) 7483{ 7484 struct drm_i915_private *dev_priv = dev->dev_private; 7485 7486 mutex_init(&dev_priv->rps.hw_lock); 7487 spin_lock_init(&dev_priv->rps.client_lock); 7488 7489 INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, 7490 intel_gen6_powersave_work); 7491 INIT_LIST_HEAD(&dev_priv->rps.clients); 7492 INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); 7493 INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); 7494 7495 dev_priv->pm.suspended = false; 7496 atomic_set(&dev_priv->pm.wakeref_count, 0); 7497 atomic_set(&dev_priv->pm.atomic_seq, 0); 7498}