Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.19 7322 lines 208 kB view raw
1/* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eugeni Dodonov <eugeni.dodonov@intel.com> 25 * 26 */ 27 28#include <linux/cpufreq.h> 29#include "i915_drv.h" 30#include "intel_drv.h" 31#include "../../../platform/x86/intel_ips.h" 32#include <linux/module.h> 33 34/** 35 * RC6 is a special power stage which allows the GPU to enter an very 36 * low-voltage mode when idle, using down to 0V while at this stage. This 37 * stage is entered automatically when the GPU is idle when RC6 support is 38 * enabled, and as soon as new workload arises GPU wakes up automatically as well. 39 * 40 * There are different RC6 modes available in Intel GPU, which differentiate 41 * among each other with the latency required to enter and leave RC6 and 42 * voltage consumed by the GPU in different states. 43 * 44 * The combination of the following flags define which states GPU is allowed 45 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and 46 * RC6pp is deepest RC6. Their support by hardware varies according to the 47 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one 48 * which brings the most power savings; deeper states save more power, but 49 * require higher latency to switch to and wake up. 50 */ 51#define INTEL_RC6_ENABLE (1<<0) 52#define INTEL_RC6p_ENABLE (1<<1) 53#define INTEL_RC6pp_ENABLE (1<<2) 54 55/* FBC, or Frame Buffer Compression, is a technique employed to compress the 56 * framebuffer contents in-memory, aiming at reducing the required bandwidth 57 * during in-memory transfers and, therefore, reduce the power packet. 58 * 59 * The benefits of FBC are mostly visible with solid backgrounds and 60 * variation-less patterns. 61 * 62 * FBC-related functionality can be enabled by the means of the 63 * i915.i915_enable_fbc parameter 64 */ 65 66static void gen9_init_clock_gating(struct drm_device *dev) 67{ 68 struct drm_i915_private *dev_priv = dev->dev_private; 69 70 /* 71 * WaDisableSDEUnitClockGating:skl 72 * This seems to be a pre-production w/a. 73 */ 74 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 75 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 76 77 /* 78 * WaDisableDgMirrorFixInHalfSliceChicken5:skl 79 * This is a pre-production w/a. 80 */ 81 I915_WRITE(GEN9_HALF_SLICE_CHICKEN5, 82 I915_READ(GEN9_HALF_SLICE_CHICKEN5) & 83 ~GEN9_DG_MIRROR_FIX_ENABLE); 84 85 /* Wa4x4STCOptimizationDisable:skl */ 86 I915_WRITE(CACHE_MODE_1, 87 _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); 88} 89 90static void i8xx_disable_fbc(struct drm_device *dev) 91{ 92 struct drm_i915_private *dev_priv = dev->dev_private; 93 u32 fbc_ctl; 94 95 dev_priv->fbc.enabled = false; 96 97 /* Disable compression */ 98 fbc_ctl = I915_READ(FBC_CONTROL); 99 if ((fbc_ctl & FBC_CTL_EN) == 0) 100 return; 101 102 fbc_ctl &= ~FBC_CTL_EN; 103 I915_WRITE(FBC_CONTROL, fbc_ctl); 104 105 /* Wait for compressing bit to clear */ 106 if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) { 107 DRM_DEBUG_KMS("FBC idle timed out\n"); 108 return; 109 } 110 111 DRM_DEBUG_KMS("disabled FBC\n"); 112} 113 114static void i8xx_enable_fbc(struct drm_crtc *crtc) 115{ 116 struct drm_device *dev = crtc->dev; 117 struct drm_i915_private *dev_priv = dev->dev_private; 118 struct drm_framebuffer *fb = crtc->primary->fb; 119 struct drm_i915_gem_object *obj = intel_fb_obj(fb); 120 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 121 int cfb_pitch; 122 int i; 123 u32 fbc_ctl; 124 125 dev_priv->fbc.enabled = true; 126 127 cfb_pitch = dev_priv->fbc.size / FBC_LL_SIZE; 128 if (fb->pitches[0] < cfb_pitch) 129 cfb_pitch = fb->pitches[0]; 130 131 /* FBC_CTL wants 32B or 64B units */ 132 if (IS_GEN2(dev)) 133 cfb_pitch = (cfb_pitch / 32) - 1; 134 else 135 cfb_pitch = (cfb_pitch / 64) - 1; 136 137 /* Clear old tags */ 138 for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++) 139 I915_WRITE(FBC_TAG + (i * 4), 0); 140 141 if (IS_GEN4(dev)) { 142 u32 fbc_ctl2; 143 144 /* Set it up... */ 145 fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE; 146 fbc_ctl2 |= FBC_CTL_PLANE(intel_crtc->plane); 147 I915_WRITE(FBC_CONTROL2, fbc_ctl2); 148 I915_WRITE(FBC_FENCE_OFF, crtc->y); 149 } 150 151 /* enable it... */ 152 fbc_ctl = I915_READ(FBC_CONTROL); 153 fbc_ctl &= 0x3fff << FBC_CTL_INTERVAL_SHIFT; 154 fbc_ctl |= FBC_CTL_EN | FBC_CTL_PERIODIC; 155 if (IS_I945GM(dev)) 156 fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */ 157 fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; 158 fbc_ctl |= obj->fence_reg; 159 I915_WRITE(FBC_CONTROL, fbc_ctl); 160 161 DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c\n", 162 cfb_pitch, crtc->y, plane_name(intel_crtc->plane)); 163} 164 165static bool i8xx_fbc_enabled(struct drm_device *dev) 166{ 167 struct drm_i915_private *dev_priv = dev->dev_private; 168 169 return I915_READ(FBC_CONTROL) & FBC_CTL_EN; 170} 171 172static void g4x_enable_fbc(struct drm_crtc *crtc) 173{ 174 struct drm_device *dev = crtc->dev; 175 struct drm_i915_private *dev_priv = dev->dev_private; 176 struct drm_framebuffer *fb = crtc->primary->fb; 177 struct drm_i915_gem_object *obj = intel_fb_obj(fb); 178 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 179 u32 dpfc_ctl; 180 181 dev_priv->fbc.enabled = true; 182 183 dpfc_ctl = DPFC_CTL_PLANE(intel_crtc->plane) | DPFC_SR_EN; 184 if (drm_format_plane_cpp(fb->pixel_format, 0) == 2) 185 dpfc_ctl |= DPFC_CTL_LIMIT_2X; 186 else 187 dpfc_ctl |= DPFC_CTL_LIMIT_1X; 188 dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg; 189 190 I915_WRITE(DPFC_FENCE_YOFF, crtc->y); 191 192 /* enable it... */ 193 I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); 194 195 DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane)); 196} 197 198static void g4x_disable_fbc(struct drm_device *dev) 199{ 200 struct drm_i915_private *dev_priv = dev->dev_private; 201 u32 dpfc_ctl; 202 203 dev_priv->fbc.enabled = false; 204 205 /* Disable compression */ 206 dpfc_ctl = I915_READ(DPFC_CONTROL); 207 if (dpfc_ctl & DPFC_CTL_EN) { 208 dpfc_ctl &= ~DPFC_CTL_EN; 209 I915_WRITE(DPFC_CONTROL, dpfc_ctl); 210 211 DRM_DEBUG_KMS("disabled FBC\n"); 212 } 213} 214 215static bool g4x_fbc_enabled(struct drm_device *dev) 216{ 217 struct drm_i915_private *dev_priv = dev->dev_private; 218 219 return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN; 220} 221 222static void sandybridge_blit_fbc_update(struct drm_device *dev) 223{ 224 struct drm_i915_private *dev_priv = dev->dev_private; 225 u32 blt_ecoskpd; 226 227 /* Make sure blitter notifies FBC of writes */ 228 229 /* Blitter is part of Media powerwell on VLV. No impact of 230 * his param in other platforms for now */ 231 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_MEDIA); 232 233 blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD); 234 blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY << 235 GEN6_BLITTER_LOCK_SHIFT; 236 I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); 237 blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY; 238 I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); 239 blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY << 240 GEN6_BLITTER_LOCK_SHIFT); 241 I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); 242 POSTING_READ(GEN6_BLITTER_ECOSKPD); 243 244 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_MEDIA); 245} 246 247static void ironlake_enable_fbc(struct drm_crtc *crtc) 248{ 249 struct drm_device *dev = crtc->dev; 250 struct drm_i915_private *dev_priv = dev->dev_private; 251 struct drm_framebuffer *fb = crtc->primary->fb; 252 struct drm_i915_gem_object *obj = intel_fb_obj(fb); 253 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 254 u32 dpfc_ctl; 255 256 dev_priv->fbc.enabled = true; 257 258 dpfc_ctl = DPFC_CTL_PLANE(intel_crtc->plane); 259 if (drm_format_plane_cpp(fb->pixel_format, 0) == 2) 260 dev_priv->fbc.threshold++; 261 262 switch (dev_priv->fbc.threshold) { 263 case 4: 264 case 3: 265 dpfc_ctl |= DPFC_CTL_LIMIT_4X; 266 break; 267 case 2: 268 dpfc_ctl |= DPFC_CTL_LIMIT_2X; 269 break; 270 case 1: 271 dpfc_ctl |= DPFC_CTL_LIMIT_1X; 272 break; 273 } 274 dpfc_ctl |= DPFC_CTL_FENCE_EN; 275 if (IS_GEN5(dev)) 276 dpfc_ctl |= obj->fence_reg; 277 278 I915_WRITE(ILK_DPFC_FENCE_YOFF, crtc->y); 279 I915_WRITE(ILK_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj) | ILK_FBC_RT_VALID); 280 /* enable it... */ 281 I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); 282 283 if (IS_GEN6(dev)) { 284 I915_WRITE(SNB_DPFC_CTL_SA, 285 SNB_CPU_FENCE_ENABLE | obj->fence_reg); 286 I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y); 287 sandybridge_blit_fbc_update(dev); 288 } 289 290 DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane)); 291} 292 293static void ironlake_disable_fbc(struct drm_device *dev) 294{ 295 struct drm_i915_private *dev_priv = dev->dev_private; 296 u32 dpfc_ctl; 297 298 dev_priv->fbc.enabled = false; 299 300 /* Disable compression */ 301 dpfc_ctl = I915_READ(ILK_DPFC_CONTROL); 302 if (dpfc_ctl & DPFC_CTL_EN) { 303 dpfc_ctl &= ~DPFC_CTL_EN; 304 I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl); 305 306 DRM_DEBUG_KMS("disabled FBC\n"); 307 } 308} 309 310static bool ironlake_fbc_enabled(struct drm_device *dev) 311{ 312 struct drm_i915_private *dev_priv = dev->dev_private; 313 314 return I915_READ(ILK_DPFC_CONTROL) & DPFC_CTL_EN; 315} 316 317static void gen7_enable_fbc(struct drm_crtc *crtc) 318{ 319 struct drm_device *dev = crtc->dev; 320 struct drm_i915_private *dev_priv = dev->dev_private; 321 struct drm_framebuffer *fb = crtc->primary->fb; 322 struct drm_i915_gem_object *obj = intel_fb_obj(fb); 323 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 324 u32 dpfc_ctl; 325 326 dev_priv->fbc.enabled = true; 327 328 dpfc_ctl = IVB_DPFC_CTL_PLANE(intel_crtc->plane); 329 if (drm_format_plane_cpp(fb->pixel_format, 0) == 2) 330 dev_priv->fbc.threshold++; 331 332 switch (dev_priv->fbc.threshold) { 333 case 4: 334 case 3: 335 dpfc_ctl |= DPFC_CTL_LIMIT_4X; 336 break; 337 case 2: 338 dpfc_ctl |= DPFC_CTL_LIMIT_2X; 339 break; 340 case 1: 341 dpfc_ctl |= DPFC_CTL_LIMIT_1X; 342 break; 343 } 344 345 dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; 346 347 if (dev_priv->fbc.false_color) 348 dpfc_ctl |= FBC_CTL_FALSE_COLOR; 349 350 I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); 351 352 if (IS_IVYBRIDGE(dev)) { 353 /* WaFbcAsynchFlipDisableFbcQueue:ivb */ 354 I915_WRITE(ILK_DISPLAY_CHICKEN1, 355 I915_READ(ILK_DISPLAY_CHICKEN1) | 356 ILK_FBCQ_DIS); 357 } else { 358 /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ 359 I915_WRITE(CHICKEN_PIPESL_1(intel_crtc->pipe), 360 I915_READ(CHICKEN_PIPESL_1(intel_crtc->pipe)) | 361 HSW_FBCQ_DIS); 362 } 363 364 I915_WRITE(SNB_DPFC_CTL_SA, 365 SNB_CPU_FENCE_ENABLE | obj->fence_reg); 366 I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y); 367 368 sandybridge_blit_fbc_update(dev); 369 370 DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane)); 371} 372 373bool intel_fbc_enabled(struct drm_device *dev) 374{ 375 struct drm_i915_private *dev_priv = dev->dev_private; 376 377 return dev_priv->fbc.enabled; 378} 379 380void bdw_fbc_sw_flush(struct drm_device *dev, u32 value) 381{ 382 struct drm_i915_private *dev_priv = dev->dev_private; 383 384 if (!IS_GEN8(dev)) 385 return; 386 387 if (!intel_fbc_enabled(dev)) 388 return; 389 390 I915_WRITE(MSG_FBC_REND_STATE, value); 391} 392 393static void intel_fbc_work_fn(struct work_struct *__work) 394{ 395 struct intel_fbc_work *work = 396 container_of(to_delayed_work(__work), 397 struct intel_fbc_work, work); 398 struct drm_device *dev = work->crtc->dev; 399 struct drm_i915_private *dev_priv = dev->dev_private; 400 401 mutex_lock(&dev->struct_mutex); 402 if (work == dev_priv->fbc.fbc_work) { 403 /* Double check that we haven't switched fb without cancelling 404 * the prior work. 405 */ 406 if (work->crtc->primary->fb == work->fb) { 407 dev_priv->display.enable_fbc(work->crtc); 408 409 dev_priv->fbc.plane = to_intel_crtc(work->crtc)->plane; 410 dev_priv->fbc.fb_id = work->crtc->primary->fb->base.id; 411 dev_priv->fbc.y = work->crtc->y; 412 } 413 414 dev_priv->fbc.fbc_work = NULL; 415 } 416 mutex_unlock(&dev->struct_mutex); 417 418 kfree(work); 419} 420 421static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv) 422{ 423 if (dev_priv->fbc.fbc_work == NULL) 424 return; 425 426 DRM_DEBUG_KMS("cancelling pending FBC enable\n"); 427 428 /* Synchronisation is provided by struct_mutex and checking of 429 * dev_priv->fbc.fbc_work, so we can perform the cancellation 430 * entirely asynchronously. 431 */ 432 if (cancel_delayed_work(&dev_priv->fbc.fbc_work->work)) 433 /* tasklet was killed before being run, clean up */ 434 kfree(dev_priv->fbc.fbc_work); 435 436 /* Mark the work as no longer wanted so that if it does 437 * wake-up (because the work was already running and waiting 438 * for our mutex), it will discover that is no longer 439 * necessary to run. 440 */ 441 dev_priv->fbc.fbc_work = NULL; 442} 443 444static void intel_enable_fbc(struct drm_crtc *crtc) 445{ 446 struct intel_fbc_work *work; 447 struct drm_device *dev = crtc->dev; 448 struct drm_i915_private *dev_priv = dev->dev_private; 449 450 if (!dev_priv->display.enable_fbc) 451 return; 452 453 intel_cancel_fbc_work(dev_priv); 454 455 work = kzalloc(sizeof(*work), GFP_KERNEL); 456 if (work == NULL) { 457 DRM_ERROR("Failed to allocate FBC work structure\n"); 458 dev_priv->display.enable_fbc(crtc); 459 return; 460 } 461 462 work->crtc = crtc; 463 work->fb = crtc->primary->fb; 464 INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn); 465 466 dev_priv->fbc.fbc_work = work; 467 468 /* Delay the actual enabling to let pageflipping cease and the 469 * display to settle before starting the compression. Note that 470 * this delay also serves a second purpose: it allows for a 471 * vblank to pass after disabling the FBC before we attempt 472 * to modify the control registers. 473 * 474 * A more complicated solution would involve tracking vblanks 475 * following the termination of the page-flipping sequence 476 * and indeed performing the enable as a co-routine and not 477 * waiting synchronously upon the vblank. 478 * 479 * WaFbcWaitForVBlankBeforeEnable:ilk,snb 480 */ 481 schedule_delayed_work(&work->work, msecs_to_jiffies(50)); 482} 483 484void intel_disable_fbc(struct drm_device *dev) 485{ 486 struct drm_i915_private *dev_priv = dev->dev_private; 487 488 intel_cancel_fbc_work(dev_priv); 489 490 if (!dev_priv->display.disable_fbc) 491 return; 492 493 dev_priv->display.disable_fbc(dev); 494 dev_priv->fbc.plane = -1; 495} 496 497static bool set_no_fbc_reason(struct drm_i915_private *dev_priv, 498 enum no_fbc_reason reason) 499{ 500 if (dev_priv->fbc.no_fbc_reason == reason) 501 return false; 502 503 dev_priv->fbc.no_fbc_reason = reason; 504 return true; 505} 506 507/** 508 * intel_update_fbc - enable/disable FBC as needed 509 * @dev: the drm_device 510 * 511 * Set up the framebuffer compression hardware at mode set time. We 512 * enable it if possible: 513 * - plane A only (on pre-965) 514 * - no pixel mulitply/line duplication 515 * - no alpha buffer discard 516 * - no dual wide 517 * - framebuffer <= max_hdisplay in width, max_vdisplay in height 518 * 519 * We can't assume that any compression will take place (worst case), 520 * so the compressed buffer has to be the same size as the uncompressed 521 * one. It also must reside (along with the line length buffer) in 522 * stolen memory. 523 * 524 * We need to enable/disable FBC on a global basis. 525 */ 526void intel_update_fbc(struct drm_device *dev) 527{ 528 struct drm_i915_private *dev_priv = dev->dev_private; 529 struct drm_crtc *crtc = NULL, *tmp_crtc; 530 struct intel_crtc *intel_crtc; 531 struct drm_framebuffer *fb; 532 struct drm_i915_gem_object *obj; 533 const struct drm_display_mode *adjusted_mode; 534 unsigned int max_width, max_height; 535 536 if (!HAS_FBC(dev)) { 537 set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED); 538 return; 539 } 540 541 if (!i915.powersave) { 542 if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM)) 543 DRM_DEBUG_KMS("fbc disabled per module param\n"); 544 return; 545 } 546 547 /* 548 * If FBC is already on, we just have to verify that we can 549 * keep it that way... 550 * Need to disable if: 551 * - more than one pipe is active 552 * - changing FBC params (stride, fence, mode) 553 * - new fb is too large to fit in compressed buffer 554 * - going to an unsupported config (interlace, pixel multiply, etc.) 555 */ 556 for_each_crtc(dev, tmp_crtc) { 557 if (intel_crtc_active(tmp_crtc) && 558 to_intel_crtc(tmp_crtc)->primary_enabled) { 559 if (crtc) { 560 if (set_no_fbc_reason(dev_priv, FBC_MULTIPLE_PIPES)) 561 DRM_DEBUG_KMS("more than one pipe active, disabling compression\n"); 562 goto out_disable; 563 } 564 crtc = tmp_crtc; 565 } 566 } 567 568 if (!crtc || crtc->primary->fb == NULL) { 569 if (set_no_fbc_reason(dev_priv, FBC_NO_OUTPUT)) 570 DRM_DEBUG_KMS("no output, disabling\n"); 571 goto out_disable; 572 } 573 574 intel_crtc = to_intel_crtc(crtc); 575 fb = crtc->primary->fb; 576 obj = intel_fb_obj(fb); 577 adjusted_mode = &intel_crtc->config.adjusted_mode; 578 579 if (i915.enable_fbc < 0) { 580 if (set_no_fbc_reason(dev_priv, FBC_CHIP_DEFAULT)) 581 DRM_DEBUG_KMS("disabled per chip default\n"); 582 goto out_disable; 583 } 584 if (!i915.enable_fbc) { 585 if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM)) 586 DRM_DEBUG_KMS("fbc disabled per module param\n"); 587 goto out_disable; 588 } 589 if ((adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) || 590 (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)) { 591 if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE)) 592 DRM_DEBUG_KMS("mode incompatible with compression, " 593 "disabling\n"); 594 goto out_disable; 595 } 596 597 if (INTEL_INFO(dev)->gen >= 8 || IS_HASWELL(dev)) { 598 max_width = 4096; 599 max_height = 4096; 600 } else if (IS_G4X(dev) || INTEL_INFO(dev)->gen >= 5) { 601 max_width = 4096; 602 max_height = 2048; 603 } else { 604 max_width = 2048; 605 max_height = 1536; 606 } 607 if (intel_crtc->config.pipe_src_w > max_width || 608 intel_crtc->config.pipe_src_h > max_height) { 609 if (set_no_fbc_reason(dev_priv, FBC_MODE_TOO_LARGE)) 610 DRM_DEBUG_KMS("mode too large for compression, disabling\n"); 611 goto out_disable; 612 } 613 if ((INTEL_INFO(dev)->gen < 4 || HAS_DDI(dev)) && 614 intel_crtc->plane != PLANE_A) { 615 if (set_no_fbc_reason(dev_priv, FBC_BAD_PLANE)) 616 DRM_DEBUG_KMS("plane not A, disabling compression\n"); 617 goto out_disable; 618 } 619 620 /* The use of a CPU fence is mandatory in order to detect writes 621 * by the CPU to the scanout and trigger updates to the FBC. 622 */ 623 if (obj->tiling_mode != I915_TILING_X || 624 obj->fence_reg == I915_FENCE_REG_NONE) { 625 if (set_no_fbc_reason(dev_priv, FBC_NOT_TILED)) 626 DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n"); 627 goto out_disable; 628 } 629 if (INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev) && 630 to_intel_plane(crtc->primary)->rotation != BIT(DRM_ROTATE_0)) { 631 if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE)) 632 DRM_DEBUG_KMS("Rotation unsupported, disabling\n"); 633 goto out_disable; 634 } 635 636 /* If the kernel debugger is active, always disable compression */ 637 if (in_dbg_master()) 638 goto out_disable; 639 640 if (i915_gem_stolen_setup_compression(dev, obj->base.size, 641 drm_format_plane_cpp(fb->pixel_format, 0))) { 642 if (set_no_fbc_reason(dev_priv, FBC_STOLEN_TOO_SMALL)) 643 DRM_DEBUG_KMS("framebuffer too large, disabling compression\n"); 644 goto out_disable; 645 } 646 647 /* If the scanout has not changed, don't modify the FBC settings. 648 * Note that we make the fundamental assumption that the fb->obj 649 * cannot be unpinned (and have its GTT offset and fence revoked) 650 * without first being decoupled from the scanout and FBC disabled. 651 */ 652 if (dev_priv->fbc.plane == intel_crtc->plane && 653 dev_priv->fbc.fb_id == fb->base.id && 654 dev_priv->fbc.y == crtc->y) 655 return; 656 657 if (intel_fbc_enabled(dev)) { 658 /* We update FBC along two paths, after changing fb/crtc 659 * configuration (modeswitching) and after page-flipping 660 * finishes. For the latter, we know that not only did 661 * we disable the FBC at the start of the page-flip 662 * sequence, but also more than one vblank has passed. 663 * 664 * For the former case of modeswitching, it is possible 665 * to switch between two FBC valid configurations 666 * instantaneously so we do need to disable the FBC 667 * before we can modify its control registers. We also 668 * have to wait for the next vblank for that to take 669 * effect. However, since we delay enabling FBC we can 670 * assume that a vblank has passed since disabling and 671 * that we can safely alter the registers in the deferred 672 * callback. 673 * 674 * In the scenario that we go from a valid to invalid 675 * and then back to valid FBC configuration we have 676 * no strict enforcement that a vblank occurred since 677 * disabling the FBC. However, along all current pipe 678 * disabling paths we do need to wait for a vblank at 679 * some point. And we wait before enabling FBC anyway. 680 */ 681 DRM_DEBUG_KMS("disabling active FBC for update\n"); 682 intel_disable_fbc(dev); 683 } 684 685 intel_enable_fbc(crtc); 686 dev_priv->fbc.no_fbc_reason = FBC_OK; 687 return; 688 689out_disable: 690 /* Multiple disables should be harmless */ 691 if (intel_fbc_enabled(dev)) { 692 DRM_DEBUG_KMS("unsupported config, disabling FBC\n"); 693 intel_disable_fbc(dev); 694 } 695 i915_gem_stolen_cleanup_compression(dev); 696} 697 698static void i915_pineview_get_mem_freq(struct drm_device *dev) 699{ 700 struct drm_i915_private *dev_priv = dev->dev_private; 701 u32 tmp; 702 703 tmp = I915_READ(CLKCFG); 704 705 switch (tmp & CLKCFG_FSB_MASK) { 706 case CLKCFG_FSB_533: 707 dev_priv->fsb_freq = 533; /* 133*4 */ 708 break; 709 case CLKCFG_FSB_800: 710 dev_priv->fsb_freq = 800; /* 200*4 */ 711 break; 712 case CLKCFG_FSB_667: 713 dev_priv->fsb_freq = 667; /* 167*4 */ 714 break; 715 case CLKCFG_FSB_400: 716 dev_priv->fsb_freq = 400; /* 100*4 */ 717 break; 718 } 719 720 switch (tmp & CLKCFG_MEM_MASK) { 721 case CLKCFG_MEM_533: 722 dev_priv->mem_freq = 533; 723 break; 724 case CLKCFG_MEM_667: 725 dev_priv->mem_freq = 667; 726 break; 727 case CLKCFG_MEM_800: 728 dev_priv->mem_freq = 800; 729 break; 730 } 731 732 /* detect pineview DDR3 setting */ 733 tmp = I915_READ(CSHRDDR3CTL); 734 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0; 735} 736 737static void i915_ironlake_get_mem_freq(struct drm_device *dev) 738{ 739 struct drm_i915_private *dev_priv = dev->dev_private; 740 u16 ddrpll, csipll; 741 742 ddrpll = I915_READ16(DDRMPLL1); 743 csipll = I915_READ16(CSIPLL0); 744 745 switch (ddrpll & 0xff) { 746 case 0xc: 747 dev_priv->mem_freq = 800; 748 break; 749 case 0x10: 750 dev_priv->mem_freq = 1066; 751 break; 752 case 0x14: 753 dev_priv->mem_freq = 1333; 754 break; 755 case 0x18: 756 dev_priv->mem_freq = 1600; 757 break; 758 default: 759 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n", 760 ddrpll & 0xff); 761 dev_priv->mem_freq = 0; 762 break; 763 } 764 765 dev_priv->ips.r_t = dev_priv->mem_freq; 766 767 switch (csipll & 0x3ff) { 768 case 0x00c: 769 dev_priv->fsb_freq = 3200; 770 break; 771 case 0x00e: 772 dev_priv->fsb_freq = 3733; 773 break; 774 case 0x010: 775 dev_priv->fsb_freq = 4266; 776 break; 777 case 0x012: 778 dev_priv->fsb_freq = 4800; 779 break; 780 case 0x014: 781 dev_priv->fsb_freq = 5333; 782 break; 783 case 0x016: 784 dev_priv->fsb_freq = 5866; 785 break; 786 case 0x018: 787 dev_priv->fsb_freq = 6400; 788 break; 789 default: 790 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n", 791 csipll & 0x3ff); 792 dev_priv->fsb_freq = 0; 793 break; 794 } 795 796 if (dev_priv->fsb_freq == 3200) { 797 dev_priv->ips.c_m = 0; 798 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { 799 dev_priv->ips.c_m = 1; 800 } else { 801 dev_priv->ips.c_m = 2; 802 } 803} 804 805static const struct cxsr_latency cxsr_latency_table[] = { 806 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */ 807 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */ 808 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */ 809 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */ 810 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */ 811 812 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */ 813 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */ 814 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */ 815 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */ 816 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */ 817 818 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */ 819 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */ 820 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */ 821 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */ 822 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */ 823 824 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */ 825 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */ 826 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */ 827 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */ 828 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */ 829 830 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */ 831 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */ 832 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */ 833 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */ 834 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */ 835 836 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */ 837 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */ 838 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */ 839 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */ 840 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */ 841}; 842 843static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, 844 int is_ddr3, 845 int fsb, 846 int mem) 847{ 848 const struct cxsr_latency *latency; 849 int i; 850 851 if (fsb == 0 || mem == 0) 852 return NULL; 853 854 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { 855 latency = &cxsr_latency_table[i]; 856 if (is_desktop == latency->is_desktop && 857 is_ddr3 == latency->is_ddr3 && 858 fsb == latency->fsb_freq && mem == latency->mem_freq) 859 return latency; 860 } 861 862 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 863 864 return NULL; 865} 866 867void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) 868{ 869 struct drm_device *dev = dev_priv->dev; 870 u32 val; 871 872 if (IS_VALLEYVIEW(dev)) { 873 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0); 874 } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) { 875 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0); 876 } else if (IS_PINEVIEW(dev)) { 877 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN; 878 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0; 879 I915_WRITE(DSPFW3, val); 880 } else if (IS_I945G(dev) || IS_I945GM(dev)) { 881 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) : 882 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN); 883 I915_WRITE(FW_BLC_SELF, val); 884 } else if (IS_I915GM(dev)) { 885 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : 886 _MASKED_BIT_DISABLE(INSTPM_SELF_EN); 887 I915_WRITE(INSTPM, val); 888 } else { 889 return; 890 } 891 892 DRM_DEBUG_KMS("memory self-refresh is %s\n", 893 enable ? "enabled" : "disabled"); 894} 895 896/* 897 * Latency for FIFO fetches is dependent on several factors: 898 * - memory configuration (speed, channels) 899 * - chipset 900 * - current MCH state 901 * It can be fairly high in some situations, so here we assume a fairly 902 * pessimal value. It's a tradeoff between extra memory fetches (if we 903 * set this value too high, the FIFO will fetch frequently to stay full) 904 * and power consumption (set it too low to save power and we might see 905 * FIFO underruns and display "flicker"). 906 * 907 * A value of 5us seems to be a good balance; safe for very low end 908 * platforms but not overly aggressive on lower latency configs. 909 */ 910static const int pessimal_latency_ns = 5000; 911 912static int i9xx_get_fifo_size(struct drm_device *dev, int plane) 913{ 914 struct drm_i915_private *dev_priv = dev->dev_private; 915 uint32_t dsparb = I915_READ(DSPARB); 916 int size; 917 918 size = dsparb & 0x7f; 919 if (plane) 920 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; 921 922 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 923 plane ? "B" : "A", size); 924 925 return size; 926} 927 928static int i830_get_fifo_size(struct drm_device *dev, int plane) 929{ 930 struct drm_i915_private *dev_priv = dev->dev_private; 931 uint32_t dsparb = I915_READ(DSPARB); 932 int size; 933 934 size = dsparb & 0x1ff; 935 if (plane) 936 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; 937 size >>= 1; /* Convert to cachelines */ 938 939 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 940 plane ? "B" : "A", size); 941 942 return size; 943} 944 945static int i845_get_fifo_size(struct drm_device *dev, int plane) 946{ 947 struct drm_i915_private *dev_priv = dev->dev_private; 948 uint32_t dsparb = I915_READ(DSPARB); 949 int size; 950 951 size = dsparb & 0x7f; 952 size >>= 2; /* Convert to cachelines */ 953 954 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 955 plane ? "B" : "A", 956 size); 957 958 return size; 959} 960 961/* Pineview has different values for various configs */ 962static const struct intel_watermark_params pineview_display_wm = { 963 .fifo_size = PINEVIEW_DISPLAY_FIFO, 964 .max_wm = PINEVIEW_MAX_WM, 965 .default_wm = PINEVIEW_DFT_WM, 966 .guard_size = PINEVIEW_GUARD_WM, 967 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 968}; 969static const struct intel_watermark_params pineview_display_hplloff_wm = { 970 .fifo_size = PINEVIEW_DISPLAY_FIFO, 971 .max_wm = PINEVIEW_MAX_WM, 972 .default_wm = PINEVIEW_DFT_HPLLOFF_WM, 973 .guard_size = PINEVIEW_GUARD_WM, 974 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 975}; 976static const struct intel_watermark_params pineview_cursor_wm = { 977 .fifo_size = PINEVIEW_CURSOR_FIFO, 978 .max_wm = PINEVIEW_CURSOR_MAX_WM, 979 .default_wm = PINEVIEW_CURSOR_DFT_WM, 980 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 981 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 982}; 983static const struct intel_watermark_params pineview_cursor_hplloff_wm = { 984 .fifo_size = PINEVIEW_CURSOR_FIFO, 985 .max_wm = PINEVIEW_CURSOR_MAX_WM, 986 .default_wm = PINEVIEW_CURSOR_DFT_WM, 987 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 988 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 989}; 990static const struct intel_watermark_params g4x_wm_info = { 991 .fifo_size = G4X_FIFO_SIZE, 992 .max_wm = G4X_MAX_WM, 993 .default_wm = G4X_MAX_WM, 994 .guard_size = 2, 995 .cacheline_size = G4X_FIFO_LINE_SIZE, 996}; 997static const struct intel_watermark_params g4x_cursor_wm_info = { 998 .fifo_size = I965_CURSOR_FIFO, 999 .max_wm = I965_CURSOR_MAX_WM, 1000 .default_wm = I965_CURSOR_DFT_WM, 1001 .guard_size = 2, 1002 .cacheline_size = G4X_FIFO_LINE_SIZE, 1003}; 1004static const struct intel_watermark_params valleyview_wm_info = { 1005 .fifo_size = VALLEYVIEW_FIFO_SIZE, 1006 .max_wm = VALLEYVIEW_MAX_WM, 1007 .default_wm = VALLEYVIEW_MAX_WM, 1008 .guard_size = 2, 1009 .cacheline_size = G4X_FIFO_LINE_SIZE, 1010}; 1011static const struct intel_watermark_params valleyview_cursor_wm_info = { 1012 .fifo_size = I965_CURSOR_FIFO, 1013 .max_wm = VALLEYVIEW_CURSOR_MAX_WM, 1014 .default_wm = I965_CURSOR_DFT_WM, 1015 .guard_size = 2, 1016 .cacheline_size = G4X_FIFO_LINE_SIZE, 1017}; 1018static const struct intel_watermark_params i965_cursor_wm_info = { 1019 .fifo_size = I965_CURSOR_FIFO, 1020 .max_wm = I965_CURSOR_MAX_WM, 1021 .default_wm = I965_CURSOR_DFT_WM, 1022 .guard_size = 2, 1023 .cacheline_size = I915_FIFO_LINE_SIZE, 1024}; 1025static const struct intel_watermark_params i945_wm_info = { 1026 .fifo_size = I945_FIFO_SIZE, 1027 .max_wm = I915_MAX_WM, 1028 .default_wm = 1, 1029 .guard_size = 2, 1030 .cacheline_size = I915_FIFO_LINE_SIZE, 1031}; 1032static const struct intel_watermark_params i915_wm_info = { 1033 .fifo_size = I915_FIFO_SIZE, 1034 .max_wm = I915_MAX_WM, 1035 .default_wm = 1, 1036 .guard_size = 2, 1037 .cacheline_size = I915_FIFO_LINE_SIZE, 1038}; 1039static const struct intel_watermark_params i830_a_wm_info = { 1040 .fifo_size = I855GM_FIFO_SIZE, 1041 .max_wm = I915_MAX_WM, 1042 .default_wm = 1, 1043 .guard_size = 2, 1044 .cacheline_size = I830_FIFO_LINE_SIZE, 1045}; 1046static const struct intel_watermark_params i830_bc_wm_info = { 1047 .fifo_size = I855GM_FIFO_SIZE, 1048 .max_wm = I915_MAX_WM/2, 1049 .default_wm = 1, 1050 .guard_size = 2, 1051 .cacheline_size = I830_FIFO_LINE_SIZE, 1052}; 1053static const struct intel_watermark_params i845_wm_info = { 1054 .fifo_size = I830_FIFO_SIZE, 1055 .max_wm = I915_MAX_WM, 1056 .default_wm = 1, 1057 .guard_size = 2, 1058 .cacheline_size = I830_FIFO_LINE_SIZE, 1059}; 1060 1061/** 1062 * intel_calculate_wm - calculate watermark level 1063 * @clock_in_khz: pixel clock 1064 * @wm: chip FIFO params 1065 * @pixel_size: display pixel size 1066 * @latency_ns: memory latency for the platform 1067 * 1068 * Calculate the watermark level (the level at which the display plane will 1069 * start fetching from memory again). Each chip has a different display 1070 * FIFO size and allocation, so the caller needs to figure that out and pass 1071 * in the correct intel_watermark_params structure. 1072 * 1073 * As the pixel clock runs, the FIFO will be drained at a rate that depends 1074 * on the pixel size. When it reaches the watermark level, it'll start 1075 * fetching FIFO line sized based chunks from memory until the FIFO fills 1076 * past the watermark point. If the FIFO drains completely, a FIFO underrun 1077 * will occur, and a display engine hang could result. 1078 */ 1079static unsigned long intel_calculate_wm(unsigned long clock_in_khz, 1080 const struct intel_watermark_params *wm, 1081 int fifo_size, 1082 int pixel_size, 1083 unsigned long latency_ns) 1084{ 1085 long entries_required, wm_size; 1086 1087 /* 1088 * Note: we need to make sure we don't overflow for various clock & 1089 * latency values. 1090 * clocks go from a few thousand to several hundred thousand. 1091 * latency is usually a few thousand 1092 */ 1093 entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) / 1094 1000; 1095 entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size); 1096 1097 DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required); 1098 1099 wm_size = fifo_size - (entries_required + wm->guard_size); 1100 1101 DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size); 1102 1103 /* Don't promote wm_size to unsigned... */ 1104 if (wm_size > (long)wm->max_wm) 1105 wm_size = wm->max_wm; 1106 if (wm_size <= 0) 1107 wm_size = wm->default_wm; 1108 1109 /* 1110 * Bspec seems to indicate that the value shouldn't be lower than 1111 * 'burst size + 1'. Certainly 830 is quite unhappy with low values. 1112 * Lets go for 8 which is the burst size since certain platforms 1113 * already use a hardcoded 8 (which is what the spec says should be 1114 * done). 1115 */ 1116 if (wm_size <= 8) 1117 wm_size = 8; 1118 1119 return wm_size; 1120} 1121 1122static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) 1123{ 1124 struct drm_crtc *crtc, *enabled = NULL; 1125 1126 for_each_crtc(dev, crtc) { 1127 if (intel_crtc_active(crtc)) { 1128 if (enabled) 1129 return NULL; 1130 enabled = crtc; 1131 } 1132 } 1133 1134 return enabled; 1135} 1136 1137static void pineview_update_wm(struct drm_crtc *unused_crtc) 1138{ 1139 struct drm_device *dev = unused_crtc->dev; 1140 struct drm_i915_private *dev_priv = dev->dev_private; 1141 struct drm_crtc *crtc; 1142 const struct cxsr_latency *latency; 1143 u32 reg; 1144 unsigned long wm; 1145 1146 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3, 1147 dev_priv->fsb_freq, dev_priv->mem_freq); 1148 if (!latency) { 1149 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 1150 intel_set_memory_cxsr(dev_priv, false); 1151 return; 1152 } 1153 1154 crtc = single_enabled_crtc(dev); 1155 if (crtc) { 1156 const struct drm_display_mode *adjusted_mode; 1157 int pixel_size = crtc->primary->fb->bits_per_pixel / 8; 1158 int clock; 1159 1160 adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode; 1161 clock = adjusted_mode->crtc_clock; 1162 1163 /* Display SR */ 1164 wm = intel_calculate_wm(clock, &pineview_display_wm, 1165 pineview_display_wm.fifo_size, 1166 pixel_size, latency->display_sr); 1167 reg = I915_READ(DSPFW1); 1168 reg &= ~DSPFW_SR_MASK; 1169 reg |= wm << DSPFW_SR_SHIFT; 1170 I915_WRITE(DSPFW1, reg); 1171 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg); 1172 1173 /* cursor SR */ 1174 wm = intel_calculate_wm(clock, &pineview_cursor_wm, 1175 pineview_display_wm.fifo_size, 1176 pixel_size, latency->cursor_sr); 1177 reg = I915_READ(DSPFW3); 1178 reg &= ~DSPFW_CURSOR_SR_MASK; 1179 reg |= (wm & 0x3f) << DSPFW_CURSOR_SR_SHIFT; 1180 I915_WRITE(DSPFW3, reg); 1181 1182 /* Display HPLL off SR */ 1183 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm, 1184 pineview_display_hplloff_wm.fifo_size, 1185 pixel_size, latency->display_hpll_disable); 1186 reg = I915_READ(DSPFW3); 1187 reg &= ~DSPFW_HPLL_SR_MASK; 1188 reg |= wm & DSPFW_HPLL_SR_MASK; 1189 I915_WRITE(DSPFW3, reg); 1190 1191 /* cursor HPLL off SR */ 1192 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, 1193 pineview_display_hplloff_wm.fifo_size, 1194 pixel_size, latency->cursor_hpll_disable); 1195 reg = I915_READ(DSPFW3); 1196 reg &= ~DSPFW_HPLL_CURSOR_MASK; 1197 reg |= (wm & 0x3f) << DSPFW_HPLL_CURSOR_SHIFT; 1198 I915_WRITE(DSPFW3, reg); 1199 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg); 1200 1201 intel_set_memory_cxsr(dev_priv, true); 1202 } else { 1203 intel_set_memory_cxsr(dev_priv, false); 1204 } 1205} 1206 1207static bool g4x_compute_wm0(struct drm_device *dev, 1208 int plane, 1209 const struct intel_watermark_params *display, 1210 int display_latency_ns, 1211 const struct intel_watermark_params *cursor, 1212 int cursor_latency_ns, 1213 int *plane_wm, 1214 int *cursor_wm) 1215{ 1216 struct drm_crtc *crtc; 1217 const struct drm_display_mode *adjusted_mode; 1218 int htotal, hdisplay, clock, pixel_size; 1219 int line_time_us, line_count; 1220 int entries, tlb_miss; 1221 1222 crtc = intel_get_crtc_for_plane(dev, plane); 1223 if (!intel_crtc_active(crtc)) { 1224 *cursor_wm = cursor->guard_size; 1225 *plane_wm = display->guard_size; 1226 return false; 1227 } 1228 1229 adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode; 1230 clock = adjusted_mode->crtc_clock; 1231 htotal = adjusted_mode->crtc_htotal; 1232 hdisplay = to_intel_crtc(crtc)->config.pipe_src_w; 1233 pixel_size = crtc->primary->fb->bits_per_pixel / 8; 1234 1235 /* Use the small buffer method to calculate plane watermark */ 1236 entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000; 1237 tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8; 1238 if (tlb_miss > 0) 1239 entries += tlb_miss; 1240 entries = DIV_ROUND_UP(entries, display->cacheline_size); 1241 *plane_wm = entries + display->guard_size; 1242 if (*plane_wm > (int)display->max_wm) 1243 *plane_wm = display->max_wm; 1244 1245 /* Use the large buffer method to calculate cursor watermark */ 1246 line_time_us = max(htotal * 1000 / clock, 1); 1247 line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; 1248 entries = line_count * to_intel_crtc(crtc)->cursor_width * pixel_size; 1249 tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; 1250 if (tlb_miss > 0) 1251 entries += tlb_miss; 1252 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 1253 *cursor_wm = entries + cursor->guard_size; 1254 if (*cursor_wm > (int)cursor->max_wm) 1255 *cursor_wm = (int)cursor->max_wm; 1256 1257 return true; 1258} 1259 1260/* 1261 * Check the wm result. 1262 * 1263 * If any calculated watermark values is larger than the maximum value that 1264 * can be programmed into the associated watermark register, that watermark 1265 * must be disabled. 1266 */ 1267static bool g4x_check_srwm(struct drm_device *dev, 1268 int display_wm, int cursor_wm, 1269 const struct intel_watermark_params *display, 1270 const struct intel_watermark_params *cursor) 1271{ 1272 DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n", 1273 display_wm, cursor_wm); 1274 1275 if (display_wm > display->max_wm) { 1276 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n", 1277 display_wm, display->max_wm); 1278 return false; 1279 } 1280 1281 if (cursor_wm > cursor->max_wm) { 1282 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n", 1283 cursor_wm, cursor->max_wm); 1284 return false; 1285 } 1286 1287 if (!(display_wm || cursor_wm)) { 1288 DRM_DEBUG_KMS("SR latency is 0, disabling\n"); 1289 return false; 1290 } 1291 1292 return true; 1293} 1294 1295static bool g4x_compute_srwm(struct drm_device *dev, 1296 int plane, 1297 int latency_ns, 1298 const struct intel_watermark_params *display, 1299 const struct intel_watermark_params *cursor, 1300 int *display_wm, int *cursor_wm) 1301{ 1302 struct drm_crtc *crtc; 1303 const struct drm_display_mode *adjusted_mode; 1304 int hdisplay, htotal, pixel_size, clock; 1305 unsigned long line_time_us; 1306 int line_count, line_size; 1307 int small, large; 1308 int entries; 1309 1310 if (!latency_ns) { 1311 *display_wm = *cursor_wm = 0; 1312 return false; 1313 } 1314 1315 crtc = intel_get_crtc_for_plane(dev, plane); 1316 adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode; 1317 clock = adjusted_mode->crtc_clock; 1318 htotal = adjusted_mode->crtc_htotal; 1319 hdisplay = to_intel_crtc(crtc)->config.pipe_src_w; 1320 pixel_size = crtc->primary->fb->bits_per_pixel / 8; 1321 1322 line_time_us = max(htotal * 1000 / clock, 1); 1323 line_count = (latency_ns / line_time_us + 1000) / 1000; 1324 line_size = hdisplay * pixel_size; 1325 1326 /* Use the minimum of the small and large buffer method for primary */ 1327 small = ((clock * pixel_size / 1000) * latency_ns) / 1000; 1328 large = line_count * line_size; 1329 1330 entries = DIV_ROUND_UP(min(small, large), display->cacheline_size); 1331 *display_wm = entries + display->guard_size; 1332 1333 /* calculate the self-refresh watermark for display cursor */ 1334 entries = line_count * pixel_size * to_intel_crtc(crtc)->cursor_width; 1335 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 1336 *cursor_wm = entries + cursor->guard_size; 1337 1338 return g4x_check_srwm(dev, 1339 *display_wm, *cursor_wm, 1340 display, cursor); 1341} 1342 1343static bool vlv_compute_drain_latency(struct drm_crtc *crtc, 1344 int pixel_size, 1345 int *prec_mult, 1346 int *drain_latency) 1347{ 1348 struct drm_device *dev = crtc->dev; 1349 int entries; 1350 int clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock; 1351 1352 if (WARN(clock == 0, "Pixel clock is zero!\n")) 1353 return false; 1354 1355 if (WARN(pixel_size == 0, "Pixel size is zero!\n")) 1356 return false; 1357 1358 entries = DIV_ROUND_UP(clock, 1000) * pixel_size; 1359 if (IS_CHERRYVIEW(dev)) 1360 *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_32 : 1361 DRAIN_LATENCY_PRECISION_16; 1362 else 1363 *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 : 1364 DRAIN_LATENCY_PRECISION_32; 1365 *drain_latency = (64 * (*prec_mult) * 4) / entries; 1366 1367 if (*drain_latency > DRAIN_LATENCY_MASK) 1368 *drain_latency = DRAIN_LATENCY_MASK; 1369 1370 return true; 1371} 1372 1373/* 1374 * Update drain latency registers of memory arbiter 1375 * 1376 * Valleyview SoC has a new memory arbiter and needs drain latency registers 1377 * to be programmed. Each plane has a drain latency multiplier and a drain 1378 * latency value. 1379 */ 1380 1381static void vlv_update_drain_latency(struct drm_crtc *crtc) 1382{ 1383 struct drm_device *dev = crtc->dev; 1384 struct drm_i915_private *dev_priv = dev->dev_private; 1385 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 1386 int pixel_size; 1387 int drain_latency; 1388 enum pipe pipe = intel_crtc->pipe; 1389 int plane_prec, prec_mult, plane_dl; 1390 const int high_precision = IS_CHERRYVIEW(dev) ? 1391 DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64; 1392 1393 plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_HIGH | 1394 DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_HIGH | 1395 (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT)); 1396 1397 if (!intel_crtc_active(crtc)) { 1398 I915_WRITE(VLV_DDL(pipe), plane_dl); 1399 return; 1400 } 1401 1402 /* Primary plane Drain Latency */ 1403 pixel_size = crtc->primary->fb->bits_per_pixel / 8; /* BPP */ 1404 if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { 1405 plane_prec = (prec_mult == high_precision) ? 1406 DDL_PLANE_PRECISION_HIGH : 1407 DDL_PLANE_PRECISION_LOW; 1408 plane_dl |= plane_prec | drain_latency; 1409 } 1410 1411 /* Cursor Drain Latency 1412 * BPP is always 4 for cursor 1413 */ 1414 pixel_size = 4; 1415 1416 /* Program cursor DL only if it is enabled */ 1417 if (intel_crtc->cursor_base && 1418 vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { 1419 plane_prec = (prec_mult == high_precision) ? 1420 DDL_CURSOR_PRECISION_HIGH : 1421 DDL_CURSOR_PRECISION_LOW; 1422 plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT); 1423 } 1424 1425 I915_WRITE(VLV_DDL(pipe), plane_dl); 1426} 1427 1428#define single_plane_enabled(mask) is_power_of_2(mask) 1429 1430static void valleyview_update_wm(struct drm_crtc *crtc) 1431{ 1432 struct drm_device *dev = crtc->dev; 1433 static const int sr_latency_ns = 12000; 1434 struct drm_i915_private *dev_priv = dev->dev_private; 1435 int planea_wm, planeb_wm, cursora_wm, cursorb_wm; 1436 int plane_sr, cursor_sr; 1437 int ignore_plane_sr, ignore_cursor_sr; 1438 unsigned int enabled = 0; 1439 bool cxsr_enabled; 1440 1441 vlv_update_drain_latency(crtc); 1442 1443 if (g4x_compute_wm0(dev, PIPE_A, 1444 &valleyview_wm_info, pessimal_latency_ns, 1445 &valleyview_cursor_wm_info, pessimal_latency_ns, 1446 &planea_wm, &cursora_wm)) 1447 enabled |= 1 << PIPE_A; 1448 1449 if (g4x_compute_wm0(dev, PIPE_B, 1450 &valleyview_wm_info, pessimal_latency_ns, 1451 &valleyview_cursor_wm_info, pessimal_latency_ns, 1452 &planeb_wm, &cursorb_wm)) 1453 enabled |= 1 << PIPE_B; 1454 1455 if (single_plane_enabled(enabled) && 1456 g4x_compute_srwm(dev, ffs(enabled) - 1, 1457 sr_latency_ns, 1458 &valleyview_wm_info, 1459 &valleyview_cursor_wm_info, 1460 &plane_sr, &ignore_cursor_sr) && 1461 g4x_compute_srwm(dev, ffs(enabled) - 1, 1462 2*sr_latency_ns, 1463 &valleyview_wm_info, 1464 &valleyview_cursor_wm_info, 1465 &ignore_plane_sr, &cursor_sr)) { 1466 cxsr_enabled = true; 1467 } else { 1468 cxsr_enabled = false; 1469 intel_set_memory_cxsr(dev_priv, false); 1470 plane_sr = cursor_sr = 0; 1471 } 1472 1473 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " 1474 "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", 1475 planea_wm, cursora_wm, 1476 planeb_wm, cursorb_wm, 1477 plane_sr, cursor_sr); 1478 1479 I915_WRITE(DSPFW1, 1480 (plane_sr << DSPFW_SR_SHIFT) | 1481 (cursorb_wm << DSPFW_CURSORB_SHIFT) | 1482 (planeb_wm << DSPFW_PLANEB_SHIFT) | 1483 (planea_wm << DSPFW_PLANEA_SHIFT)); 1484 I915_WRITE(DSPFW2, 1485 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | 1486 (cursora_wm << DSPFW_CURSORA_SHIFT)); 1487 I915_WRITE(DSPFW3, 1488 (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) | 1489 (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); 1490 1491 if (cxsr_enabled) 1492 intel_set_memory_cxsr(dev_priv, true); 1493} 1494 1495static void cherryview_update_wm(struct drm_crtc *crtc) 1496{ 1497 struct drm_device *dev = crtc->dev; 1498 static const int sr_latency_ns = 12000; 1499 struct drm_i915_private *dev_priv = dev->dev_private; 1500 int planea_wm, planeb_wm, planec_wm; 1501 int cursora_wm, cursorb_wm, cursorc_wm; 1502 int plane_sr, cursor_sr; 1503 int ignore_plane_sr, ignore_cursor_sr; 1504 unsigned int enabled = 0; 1505 bool cxsr_enabled; 1506 1507 vlv_update_drain_latency(crtc); 1508 1509 if (g4x_compute_wm0(dev, PIPE_A, 1510 &valleyview_wm_info, pessimal_latency_ns, 1511 &valleyview_cursor_wm_info, pessimal_latency_ns, 1512 &planea_wm, &cursora_wm)) 1513 enabled |= 1 << PIPE_A; 1514 1515 if (g4x_compute_wm0(dev, PIPE_B, 1516 &valleyview_wm_info, pessimal_latency_ns, 1517 &valleyview_cursor_wm_info, pessimal_latency_ns, 1518 &planeb_wm, &cursorb_wm)) 1519 enabled |= 1 << PIPE_B; 1520 1521 if (g4x_compute_wm0(dev, PIPE_C, 1522 &valleyview_wm_info, pessimal_latency_ns, 1523 &valleyview_cursor_wm_info, pessimal_latency_ns, 1524 &planec_wm, &cursorc_wm)) 1525 enabled |= 1 << PIPE_C; 1526 1527 if (single_plane_enabled(enabled) && 1528 g4x_compute_srwm(dev, ffs(enabled) - 1, 1529 sr_latency_ns, 1530 &valleyview_wm_info, 1531 &valleyview_cursor_wm_info, 1532 &plane_sr, &ignore_cursor_sr) && 1533 g4x_compute_srwm(dev, ffs(enabled) - 1, 1534 2*sr_latency_ns, 1535 &valleyview_wm_info, 1536 &valleyview_cursor_wm_info, 1537 &ignore_plane_sr, &cursor_sr)) { 1538 cxsr_enabled = true; 1539 } else { 1540 cxsr_enabled = false; 1541 intel_set_memory_cxsr(dev_priv, false); 1542 plane_sr = cursor_sr = 0; 1543 } 1544 1545 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " 1546 "B: plane=%d, cursor=%d, C: plane=%d, cursor=%d, " 1547 "SR: plane=%d, cursor=%d\n", 1548 planea_wm, cursora_wm, 1549 planeb_wm, cursorb_wm, 1550 planec_wm, cursorc_wm, 1551 plane_sr, cursor_sr); 1552 1553 I915_WRITE(DSPFW1, 1554 (plane_sr << DSPFW_SR_SHIFT) | 1555 (cursorb_wm << DSPFW_CURSORB_SHIFT) | 1556 (planeb_wm << DSPFW_PLANEB_SHIFT) | 1557 (planea_wm << DSPFW_PLANEA_SHIFT)); 1558 I915_WRITE(DSPFW2, 1559 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | 1560 (cursora_wm << DSPFW_CURSORA_SHIFT)); 1561 I915_WRITE(DSPFW3, 1562 (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) | 1563 (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); 1564 I915_WRITE(DSPFW9_CHV, 1565 (I915_READ(DSPFW9_CHV) & ~(DSPFW_PLANEC_MASK | 1566 DSPFW_CURSORC_MASK)) | 1567 (planec_wm << DSPFW_PLANEC_SHIFT) | 1568 (cursorc_wm << DSPFW_CURSORC_SHIFT)); 1569 1570 if (cxsr_enabled) 1571 intel_set_memory_cxsr(dev_priv, true); 1572} 1573 1574static void valleyview_update_sprite_wm(struct drm_plane *plane, 1575 struct drm_crtc *crtc, 1576 uint32_t sprite_width, 1577 uint32_t sprite_height, 1578 int pixel_size, 1579 bool enabled, bool scaled) 1580{ 1581 struct drm_device *dev = crtc->dev; 1582 struct drm_i915_private *dev_priv = dev->dev_private; 1583 int pipe = to_intel_plane(plane)->pipe; 1584 int sprite = to_intel_plane(plane)->plane; 1585 int drain_latency; 1586 int plane_prec; 1587 int sprite_dl; 1588 int prec_mult; 1589 const int high_precision = IS_CHERRYVIEW(dev) ? 1590 DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64; 1591 1592 sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_HIGH(sprite) | 1593 (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite))); 1594 1595 if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, 1596 &drain_latency)) { 1597 plane_prec = (prec_mult == high_precision) ? 1598 DDL_SPRITE_PRECISION_HIGH(sprite) : 1599 DDL_SPRITE_PRECISION_LOW(sprite); 1600 sprite_dl |= plane_prec | 1601 (drain_latency << DDL_SPRITE_SHIFT(sprite)); 1602 } 1603 1604 I915_WRITE(VLV_DDL(pipe), sprite_dl); 1605} 1606 1607static void g4x_update_wm(struct drm_crtc *crtc) 1608{ 1609 struct drm_device *dev = crtc->dev; 1610 static const int sr_latency_ns = 12000; 1611 struct drm_i915_private *dev_priv = dev->dev_private; 1612 int planea_wm, planeb_wm, cursora_wm, cursorb_wm; 1613 int plane_sr, cursor_sr; 1614 unsigned int enabled = 0; 1615 bool cxsr_enabled; 1616 1617 if (g4x_compute_wm0(dev, PIPE_A, 1618 &g4x_wm_info, pessimal_latency_ns, 1619 &g4x_cursor_wm_info, pessimal_latency_ns, 1620 &planea_wm, &cursora_wm)) 1621 enabled |= 1 << PIPE_A; 1622 1623 if (g4x_compute_wm0(dev, PIPE_B, 1624 &g4x_wm_info, pessimal_latency_ns, 1625 &g4x_cursor_wm_info, pessimal_latency_ns, 1626 &planeb_wm, &cursorb_wm)) 1627 enabled |= 1 << PIPE_B; 1628 1629 if (single_plane_enabled(enabled) && 1630 g4x_compute_srwm(dev, ffs(enabled) - 1, 1631 sr_latency_ns, 1632 &g4x_wm_info, 1633 &g4x_cursor_wm_info, 1634 &plane_sr, &cursor_sr)) { 1635 cxsr_enabled = true; 1636 } else { 1637 cxsr_enabled = false; 1638 intel_set_memory_cxsr(dev_priv, false); 1639 plane_sr = cursor_sr = 0; 1640 } 1641 1642 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " 1643 "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", 1644 planea_wm, cursora_wm, 1645 planeb_wm, cursorb_wm, 1646 plane_sr, cursor_sr); 1647 1648 I915_WRITE(DSPFW1, 1649 (plane_sr << DSPFW_SR_SHIFT) | 1650 (cursorb_wm << DSPFW_CURSORB_SHIFT) | 1651 (planeb_wm << DSPFW_PLANEB_SHIFT) | 1652 (planea_wm << DSPFW_PLANEA_SHIFT)); 1653 I915_WRITE(DSPFW2, 1654 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | 1655 (cursora_wm << DSPFW_CURSORA_SHIFT)); 1656 /* HPLL off in SR has some issues on G4x... disable it */ 1657 I915_WRITE(DSPFW3, 1658 (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) | 1659 (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); 1660 1661 if (cxsr_enabled) 1662 intel_set_memory_cxsr(dev_priv, true); 1663} 1664 1665static void i965_update_wm(struct drm_crtc *unused_crtc) 1666{ 1667 struct drm_device *dev = unused_crtc->dev; 1668 struct drm_i915_private *dev_priv = dev->dev_private; 1669 struct drm_crtc *crtc; 1670 int srwm = 1; 1671 int cursor_sr = 16; 1672 bool cxsr_enabled; 1673 1674 /* Calc sr entries for one plane configs */ 1675 crtc = single_enabled_crtc(dev); 1676 if (crtc) { 1677 /* self-refresh has much higher latency */ 1678 static const int sr_latency_ns = 12000; 1679 const struct drm_display_mode *adjusted_mode = 1680 &to_intel_crtc(crtc)->config.adjusted_mode; 1681 int clock = adjusted_mode->crtc_clock; 1682 int htotal = adjusted_mode->crtc_htotal; 1683 int hdisplay = to_intel_crtc(crtc)->config.pipe_src_w; 1684 int pixel_size = crtc->primary->fb->bits_per_pixel / 8; 1685 unsigned long line_time_us; 1686 int entries; 1687 1688 line_time_us = max(htotal * 1000 / clock, 1); 1689 1690 /* Use ns/us then divide to preserve precision */ 1691 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1692 pixel_size * hdisplay; 1693 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE); 1694 srwm = I965_FIFO_SIZE - entries; 1695 if (srwm < 0) 1696 srwm = 1; 1697 srwm &= 0x1ff; 1698 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", 1699 entries, srwm); 1700 1701 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1702 pixel_size * to_intel_crtc(crtc)->cursor_width; 1703 entries = DIV_ROUND_UP(entries, 1704 i965_cursor_wm_info.cacheline_size); 1705 cursor_sr = i965_cursor_wm_info.fifo_size - 1706 (entries + i965_cursor_wm_info.guard_size); 1707 1708 if (cursor_sr > i965_cursor_wm_info.max_wm) 1709 cursor_sr = i965_cursor_wm_info.max_wm; 1710 1711 DRM_DEBUG_KMS("self-refresh watermark: display plane %d " 1712 "cursor %d\n", srwm, cursor_sr); 1713 1714 cxsr_enabled = true; 1715 } else { 1716 cxsr_enabled = false; 1717 /* Turn off self refresh if both pipes are enabled */ 1718 intel_set_memory_cxsr(dev_priv, false); 1719 } 1720 1721 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", 1722 srwm); 1723 1724 /* 965 has limitations... */ 1725 I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) | 1726 (8 << DSPFW_CURSORB_SHIFT) | 1727 (8 << DSPFW_PLANEB_SHIFT) | 1728 (8 << DSPFW_PLANEA_SHIFT)); 1729 I915_WRITE(DSPFW2, (8 << DSPFW_CURSORA_SHIFT) | 1730 (8 << DSPFW_PLANEC_SHIFT_OLD)); 1731 /* update cursor SR watermark */ 1732 I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); 1733 1734 if (cxsr_enabled) 1735 intel_set_memory_cxsr(dev_priv, true); 1736} 1737 1738static void i9xx_update_wm(struct drm_crtc *unused_crtc) 1739{ 1740 struct drm_device *dev = unused_crtc->dev; 1741 struct drm_i915_private *dev_priv = dev->dev_private; 1742 const struct intel_watermark_params *wm_info; 1743 uint32_t fwater_lo; 1744 uint32_t fwater_hi; 1745 int cwm, srwm = 1; 1746 int fifo_size; 1747 int planea_wm, planeb_wm; 1748 struct drm_crtc *crtc, *enabled = NULL; 1749 1750 if (IS_I945GM(dev)) 1751 wm_info = &i945_wm_info; 1752 else if (!IS_GEN2(dev)) 1753 wm_info = &i915_wm_info; 1754 else 1755 wm_info = &i830_a_wm_info; 1756 1757 fifo_size = dev_priv->display.get_fifo_size(dev, 0); 1758 crtc = intel_get_crtc_for_plane(dev, 0); 1759 if (intel_crtc_active(crtc)) { 1760 const struct drm_display_mode *adjusted_mode; 1761 int cpp = crtc->primary->fb->bits_per_pixel / 8; 1762 if (IS_GEN2(dev)) 1763 cpp = 4; 1764 1765 adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode; 1766 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1767 wm_info, fifo_size, cpp, 1768 pessimal_latency_ns); 1769 enabled = crtc; 1770 } else { 1771 planea_wm = fifo_size - wm_info->guard_size; 1772 if (planea_wm > (long)wm_info->max_wm) 1773 planea_wm = wm_info->max_wm; 1774 } 1775 1776 if (IS_GEN2(dev)) 1777 wm_info = &i830_bc_wm_info; 1778 1779 fifo_size = dev_priv->display.get_fifo_size(dev, 1); 1780 crtc = intel_get_crtc_for_plane(dev, 1); 1781 if (intel_crtc_active(crtc)) { 1782 const struct drm_display_mode *adjusted_mode; 1783 int cpp = crtc->primary->fb->bits_per_pixel / 8; 1784 if (IS_GEN2(dev)) 1785 cpp = 4; 1786 1787 adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode; 1788 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1789 wm_info, fifo_size, cpp, 1790 pessimal_latency_ns); 1791 if (enabled == NULL) 1792 enabled = crtc; 1793 else 1794 enabled = NULL; 1795 } else { 1796 planeb_wm = fifo_size - wm_info->guard_size; 1797 if (planeb_wm > (long)wm_info->max_wm) 1798 planeb_wm = wm_info->max_wm; 1799 } 1800 1801 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); 1802 1803 if (IS_I915GM(dev) && enabled) { 1804 struct drm_i915_gem_object *obj; 1805 1806 obj = intel_fb_obj(enabled->primary->fb); 1807 1808 /* self-refresh seems busted with untiled */ 1809 if (obj->tiling_mode == I915_TILING_NONE) 1810 enabled = NULL; 1811 } 1812 1813 /* 1814 * Overlay gets an aggressive default since video jitter is bad. 1815 */ 1816 cwm = 2; 1817 1818 /* Play safe and disable self-refresh before adjusting watermarks. */ 1819 intel_set_memory_cxsr(dev_priv, false); 1820 1821 /* Calc sr entries for one plane configs */ 1822 if (HAS_FW_BLC(dev) && enabled) { 1823 /* self-refresh has much higher latency */ 1824 static const int sr_latency_ns = 6000; 1825 const struct drm_display_mode *adjusted_mode = 1826 &to_intel_crtc(enabled)->config.adjusted_mode; 1827 int clock = adjusted_mode->crtc_clock; 1828 int htotal = adjusted_mode->crtc_htotal; 1829 int hdisplay = to_intel_crtc(enabled)->config.pipe_src_w; 1830 int pixel_size = enabled->primary->fb->bits_per_pixel / 8; 1831 unsigned long line_time_us; 1832 int entries; 1833 1834 line_time_us = max(htotal * 1000 / clock, 1); 1835 1836 /* Use ns/us then divide to preserve precision */ 1837 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1838 pixel_size * hdisplay; 1839 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); 1840 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); 1841 srwm = wm_info->fifo_size - entries; 1842 if (srwm < 0) 1843 srwm = 1; 1844 1845 if (IS_I945G(dev) || IS_I945GM(dev)) 1846 I915_WRITE(FW_BLC_SELF, 1847 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); 1848 else if (IS_I915GM(dev)) 1849 I915_WRITE(FW_BLC_SELF, srwm & 0x3f); 1850 } 1851 1852 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", 1853 planea_wm, planeb_wm, cwm, srwm); 1854 1855 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f); 1856 fwater_hi = (cwm & 0x1f); 1857 1858 /* Set request length to 8 cachelines per fetch */ 1859 fwater_lo = fwater_lo | (1 << 24) | (1 << 8); 1860 fwater_hi = fwater_hi | (1 << 8); 1861 1862 I915_WRITE(FW_BLC, fwater_lo); 1863 I915_WRITE(FW_BLC2, fwater_hi); 1864 1865 if (enabled) 1866 intel_set_memory_cxsr(dev_priv, true); 1867} 1868 1869static void i845_update_wm(struct drm_crtc *unused_crtc) 1870{ 1871 struct drm_device *dev = unused_crtc->dev; 1872 struct drm_i915_private *dev_priv = dev->dev_private; 1873 struct drm_crtc *crtc; 1874 const struct drm_display_mode *adjusted_mode; 1875 uint32_t fwater_lo; 1876 int planea_wm; 1877 1878 crtc = single_enabled_crtc(dev); 1879 if (crtc == NULL) 1880 return; 1881 1882 adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode; 1883 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1884 &i845_wm_info, 1885 dev_priv->display.get_fifo_size(dev, 0), 1886 4, pessimal_latency_ns); 1887 fwater_lo = I915_READ(FW_BLC) & ~0xfff; 1888 fwater_lo |= (3<<8) | planea_wm; 1889 1890 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm); 1891 1892 I915_WRITE(FW_BLC, fwater_lo); 1893} 1894 1895static uint32_t ilk_pipe_pixel_rate(struct drm_device *dev, 1896 struct drm_crtc *crtc) 1897{ 1898 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 1899 uint32_t pixel_rate; 1900 1901 pixel_rate = intel_crtc->config.adjusted_mode.crtc_clock; 1902 1903 /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to 1904 * adjust the pixel_rate here. */ 1905 1906 if (intel_crtc->config.pch_pfit.enabled) { 1907 uint64_t pipe_w, pipe_h, pfit_w, pfit_h; 1908 uint32_t pfit_size = intel_crtc->config.pch_pfit.size; 1909 1910 pipe_w = intel_crtc->config.pipe_src_w; 1911 pipe_h = intel_crtc->config.pipe_src_h; 1912 pfit_w = (pfit_size >> 16) & 0xFFFF; 1913 pfit_h = pfit_size & 0xFFFF; 1914 if (pipe_w < pfit_w) 1915 pipe_w = pfit_w; 1916 if (pipe_h < pfit_h) 1917 pipe_h = pfit_h; 1918 1919 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, 1920 pfit_w * pfit_h); 1921 } 1922 1923 return pixel_rate; 1924} 1925 1926/* latency must be in 0.1us units. */ 1927static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel, 1928 uint32_t latency) 1929{ 1930 uint64_t ret; 1931 1932 if (WARN(latency == 0, "Latency value missing\n")) 1933 return UINT_MAX; 1934 1935 ret = (uint64_t) pixel_rate * bytes_per_pixel * latency; 1936 ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2; 1937 1938 return ret; 1939} 1940 1941/* latency must be in 0.1us units. */ 1942static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 1943 uint32_t horiz_pixels, uint8_t bytes_per_pixel, 1944 uint32_t latency) 1945{ 1946 uint32_t ret; 1947 1948 if (WARN(latency == 0, "Latency value missing\n")) 1949 return UINT_MAX; 1950 1951 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 1952 ret = (ret + 1) * horiz_pixels * bytes_per_pixel; 1953 ret = DIV_ROUND_UP(ret, 64) + 2; 1954 return ret; 1955} 1956 1957static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, 1958 uint8_t bytes_per_pixel) 1959{ 1960 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2; 1961} 1962 1963struct skl_pipe_wm_parameters { 1964 bool active; 1965 uint32_t pipe_htotal; 1966 uint32_t pixel_rate; /* in KHz */ 1967 struct intel_plane_wm_parameters plane[I915_MAX_PLANES]; 1968 struct intel_plane_wm_parameters cursor; 1969}; 1970 1971struct ilk_pipe_wm_parameters { 1972 bool active; 1973 uint32_t pipe_htotal; 1974 uint32_t pixel_rate; 1975 struct intel_plane_wm_parameters pri; 1976 struct intel_plane_wm_parameters spr; 1977 struct intel_plane_wm_parameters cur; 1978}; 1979 1980struct ilk_wm_maximums { 1981 uint16_t pri; 1982 uint16_t spr; 1983 uint16_t cur; 1984 uint16_t fbc; 1985}; 1986 1987/* used in computing the new watermarks state */ 1988struct intel_wm_config { 1989 unsigned int num_pipes_active; 1990 bool sprites_enabled; 1991 bool sprites_scaled; 1992}; 1993 1994/* 1995 * For both WM_PIPE and WM_LP. 1996 * mem_value must be in 0.1us units. 1997 */ 1998static uint32_t ilk_compute_pri_wm(const struct ilk_pipe_wm_parameters *params, 1999 uint32_t mem_value, 2000 bool is_lp) 2001{ 2002 uint32_t method1, method2; 2003 2004 if (!params->active || !params->pri.enabled) 2005 return 0; 2006 2007 method1 = ilk_wm_method1(params->pixel_rate, 2008 params->pri.bytes_per_pixel, 2009 mem_value); 2010 2011 if (!is_lp) 2012 return method1; 2013 2014 method2 = ilk_wm_method2(params->pixel_rate, 2015 params->pipe_htotal, 2016 params->pri.horiz_pixels, 2017 params->pri.bytes_per_pixel, 2018 mem_value); 2019 2020 return min(method1, method2); 2021} 2022 2023/* 2024 * For both WM_PIPE and WM_LP. 2025 * mem_value must be in 0.1us units. 2026 */ 2027static uint32_t ilk_compute_spr_wm(const struct ilk_pipe_wm_parameters *params, 2028 uint32_t mem_value) 2029{ 2030 uint32_t method1, method2; 2031 2032 if (!params->active || !params->spr.enabled) 2033 return 0; 2034 2035 method1 = ilk_wm_method1(params->pixel_rate, 2036 params->spr.bytes_per_pixel, 2037 mem_value); 2038 method2 = ilk_wm_method2(params->pixel_rate, 2039 params->pipe_htotal, 2040 params->spr.horiz_pixels, 2041 params->spr.bytes_per_pixel, 2042 mem_value); 2043 return min(method1, method2); 2044} 2045 2046/* 2047 * For both WM_PIPE and WM_LP. 2048 * mem_value must be in 0.1us units. 2049 */ 2050static uint32_t ilk_compute_cur_wm(const struct ilk_pipe_wm_parameters *params, 2051 uint32_t mem_value) 2052{ 2053 if (!params->active || !params->cur.enabled) 2054 return 0; 2055 2056 return ilk_wm_method2(params->pixel_rate, 2057 params->pipe_htotal, 2058 params->cur.horiz_pixels, 2059 params->cur.bytes_per_pixel, 2060 mem_value); 2061} 2062 2063/* Only for WM_LP. */ 2064static uint32_t ilk_compute_fbc_wm(const struct ilk_pipe_wm_parameters *params, 2065 uint32_t pri_val) 2066{ 2067 if (!params->active || !params->pri.enabled) 2068 return 0; 2069 2070 return ilk_wm_fbc(pri_val, 2071 params->pri.horiz_pixels, 2072 params->pri.bytes_per_pixel); 2073} 2074 2075static unsigned int ilk_display_fifo_size(const struct drm_device *dev) 2076{ 2077 if (INTEL_INFO(dev)->gen >= 8) 2078 return 3072; 2079 else if (INTEL_INFO(dev)->gen >= 7) 2080 return 768; 2081 else 2082 return 512; 2083} 2084 2085static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev, 2086 int level, bool is_sprite) 2087{ 2088 if (INTEL_INFO(dev)->gen >= 8) 2089 /* BDW primary/sprite plane watermarks */ 2090 return level == 0 ? 255 : 2047; 2091 else if (INTEL_INFO(dev)->gen >= 7) 2092 /* IVB/HSW primary/sprite plane watermarks */ 2093 return level == 0 ? 127 : 1023; 2094 else if (!is_sprite) 2095 /* ILK/SNB primary plane watermarks */ 2096 return level == 0 ? 127 : 511; 2097 else 2098 /* ILK/SNB sprite plane watermarks */ 2099 return level == 0 ? 63 : 255; 2100} 2101 2102static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev, 2103 int level) 2104{ 2105 if (INTEL_INFO(dev)->gen >= 7) 2106 return level == 0 ? 63 : 255; 2107 else 2108 return level == 0 ? 31 : 63; 2109} 2110 2111static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev) 2112{ 2113 if (INTEL_INFO(dev)->gen >= 8) 2114 return 31; 2115 else 2116 return 15; 2117} 2118 2119/* Calculate the maximum primary/sprite plane watermark */ 2120static unsigned int ilk_plane_wm_max(const struct drm_device *dev, 2121 int level, 2122 const struct intel_wm_config *config, 2123 enum intel_ddb_partitioning ddb_partitioning, 2124 bool is_sprite) 2125{ 2126 unsigned int fifo_size = ilk_display_fifo_size(dev); 2127 2128 /* if sprites aren't enabled, sprites get nothing */ 2129 if (is_sprite && !config->sprites_enabled) 2130 return 0; 2131 2132 /* HSW allows LP1+ watermarks even with multiple pipes */ 2133 if (level == 0 || config->num_pipes_active > 1) { 2134 fifo_size /= INTEL_INFO(dev)->num_pipes; 2135 2136 /* 2137 * For some reason the non self refresh 2138 * FIFO size is only half of the self 2139 * refresh FIFO size on ILK/SNB. 2140 */ 2141 if (INTEL_INFO(dev)->gen <= 6) 2142 fifo_size /= 2; 2143 } 2144 2145 if (config->sprites_enabled) { 2146 /* level 0 is always calculated with 1:1 split */ 2147 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) { 2148 if (is_sprite) 2149 fifo_size *= 5; 2150 fifo_size /= 6; 2151 } else { 2152 fifo_size /= 2; 2153 } 2154 } 2155 2156 /* clamp to max that the registers can hold */ 2157 return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite)); 2158} 2159 2160/* Calculate the maximum cursor plane watermark */ 2161static unsigned int ilk_cursor_wm_max(const struct drm_device *dev, 2162 int level, 2163 const struct intel_wm_config *config) 2164{ 2165 /* HSW LP1+ watermarks w/ multiple pipes */ 2166 if (level > 0 && config->num_pipes_active > 1) 2167 return 64; 2168 2169 /* otherwise just report max that registers can hold */ 2170 return ilk_cursor_wm_reg_max(dev, level); 2171} 2172 2173static void ilk_compute_wm_maximums(const struct drm_device *dev, 2174 int level, 2175 const struct intel_wm_config *config, 2176 enum intel_ddb_partitioning ddb_partitioning, 2177 struct ilk_wm_maximums *max) 2178{ 2179 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false); 2180 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true); 2181 max->cur = ilk_cursor_wm_max(dev, level, config); 2182 max->fbc = ilk_fbc_wm_reg_max(dev); 2183} 2184 2185static void ilk_compute_wm_reg_maximums(struct drm_device *dev, 2186 int level, 2187 struct ilk_wm_maximums *max) 2188{ 2189 max->pri = ilk_plane_wm_reg_max(dev, level, false); 2190 max->spr = ilk_plane_wm_reg_max(dev, level, true); 2191 max->cur = ilk_cursor_wm_reg_max(dev, level); 2192 max->fbc = ilk_fbc_wm_reg_max(dev); 2193} 2194 2195static bool ilk_validate_wm_level(int level, 2196 const struct ilk_wm_maximums *max, 2197 struct intel_wm_level *result) 2198{ 2199 bool ret; 2200 2201 /* already determined to be invalid? */ 2202 if (!result->enable) 2203 return false; 2204 2205 result->enable = result->pri_val <= max->pri && 2206 result->spr_val <= max->spr && 2207 result->cur_val <= max->cur; 2208 2209 ret = result->enable; 2210 2211 /* 2212 * HACK until we can pre-compute everything, 2213 * and thus fail gracefully if LP0 watermarks 2214 * are exceeded... 2215 */ 2216 if (level == 0 && !result->enable) { 2217 if (result->pri_val > max->pri) 2218 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n", 2219 level, result->pri_val, max->pri); 2220 if (result->spr_val > max->spr) 2221 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n", 2222 level, result->spr_val, max->spr); 2223 if (result->cur_val > max->cur) 2224 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n", 2225 level, result->cur_val, max->cur); 2226 2227 result->pri_val = min_t(uint32_t, result->pri_val, max->pri); 2228 result->spr_val = min_t(uint32_t, result->spr_val, max->spr); 2229 result->cur_val = min_t(uint32_t, result->cur_val, max->cur); 2230 result->enable = true; 2231 } 2232 2233 return ret; 2234} 2235 2236static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, 2237 int level, 2238 const struct ilk_pipe_wm_parameters *p, 2239 struct intel_wm_level *result) 2240{ 2241 uint16_t pri_latency = dev_priv->wm.pri_latency[level]; 2242 uint16_t spr_latency = dev_priv->wm.spr_latency[level]; 2243 uint16_t cur_latency = dev_priv->wm.cur_latency[level]; 2244 2245 /* WM1+ latency values stored in 0.5us units */ 2246 if (level > 0) { 2247 pri_latency *= 5; 2248 spr_latency *= 5; 2249 cur_latency *= 5; 2250 } 2251 2252 result->pri_val = ilk_compute_pri_wm(p, pri_latency, level); 2253 result->spr_val = ilk_compute_spr_wm(p, spr_latency); 2254 result->cur_val = ilk_compute_cur_wm(p, cur_latency); 2255 result->fbc_val = ilk_compute_fbc_wm(p, result->pri_val); 2256 result->enable = true; 2257} 2258 2259static uint32_t 2260hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) 2261{ 2262 struct drm_i915_private *dev_priv = dev->dev_private; 2263 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 2264 struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode; 2265 u32 linetime, ips_linetime; 2266 2267 if (!intel_crtc_active(crtc)) 2268 return 0; 2269 2270 /* The WM are computed with base on how long it takes to fill a single 2271 * row at the given clock rate, multiplied by 8. 2272 * */ 2273 linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8, 2274 mode->crtc_clock); 2275 ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8, 2276 intel_ddi_get_cdclk_freq(dev_priv)); 2277 2278 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | 2279 PIPE_WM_LINETIME_TIME(linetime); 2280} 2281 2282static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) 2283{ 2284 struct drm_i915_private *dev_priv = dev->dev_private; 2285 2286 if (IS_GEN9(dev)) { 2287 uint32_t val; 2288 int ret, i; 2289 int level, max_level = ilk_wm_max_level(dev); 2290 2291 /* read the first set of memory latencies[0:3] */ 2292 val = 0; /* data0 to be programmed to 0 for first set */ 2293 mutex_lock(&dev_priv->rps.hw_lock); 2294 ret = sandybridge_pcode_read(dev_priv, 2295 GEN9_PCODE_READ_MEM_LATENCY, 2296 &val); 2297 mutex_unlock(&dev_priv->rps.hw_lock); 2298 2299 if (ret) { 2300 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2301 return; 2302 } 2303 2304 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2305 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2306 GEN9_MEM_LATENCY_LEVEL_MASK; 2307 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2308 GEN9_MEM_LATENCY_LEVEL_MASK; 2309 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2310 GEN9_MEM_LATENCY_LEVEL_MASK; 2311 2312 /* read the second set of memory latencies[4:7] */ 2313 val = 1; /* data0 to be programmed to 1 for second set */ 2314 mutex_lock(&dev_priv->rps.hw_lock); 2315 ret = sandybridge_pcode_read(dev_priv, 2316 GEN9_PCODE_READ_MEM_LATENCY, 2317 &val); 2318 mutex_unlock(&dev_priv->rps.hw_lock); 2319 if (ret) { 2320 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2321 return; 2322 } 2323 2324 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2325 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2326 GEN9_MEM_LATENCY_LEVEL_MASK; 2327 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2328 GEN9_MEM_LATENCY_LEVEL_MASK; 2329 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2330 GEN9_MEM_LATENCY_LEVEL_MASK; 2331 2332 /* 2333 * punit doesn't take into account the read latency so we need 2334 * to add 2us to the various latency levels we retrieve from 2335 * the punit. 2336 * - W0 is a bit special in that it's the only level that 2337 * can't be disabled if we want to have display working, so 2338 * we always add 2us there. 2339 * - For levels >=1, punit returns 0us latency when they are 2340 * disabled, so we respect that and don't add 2us then 2341 * 2342 * Additionally, if a level n (n > 1) has a 0us latency, all 2343 * levels m (m >= n) need to be disabled. We make sure to 2344 * sanitize the values out of the punit to satisfy this 2345 * requirement. 2346 */ 2347 wm[0] += 2; 2348 for (level = 1; level <= max_level; level++) 2349 if (wm[level] != 0) 2350 wm[level] += 2; 2351 else { 2352 for (i = level + 1; i <= max_level; i++) 2353 wm[i] = 0; 2354 2355 break; 2356 } 2357 } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { 2358 uint64_t sskpd = I915_READ64(MCH_SSKPD); 2359 2360 wm[0] = (sskpd >> 56) & 0xFF; 2361 if (wm[0] == 0) 2362 wm[0] = sskpd & 0xF; 2363 wm[1] = (sskpd >> 4) & 0xFF; 2364 wm[2] = (sskpd >> 12) & 0xFF; 2365 wm[3] = (sskpd >> 20) & 0x1FF; 2366 wm[4] = (sskpd >> 32) & 0x1FF; 2367 } else if (INTEL_INFO(dev)->gen >= 6) { 2368 uint32_t sskpd = I915_READ(MCH_SSKPD); 2369 2370 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; 2371 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK; 2372 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK; 2373 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK; 2374 } else if (INTEL_INFO(dev)->gen >= 5) { 2375 uint32_t mltr = I915_READ(MLTR_ILK); 2376 2377 /* ILK primary LP0 latency is 700 ns */ 2378 wm[0] = 7; 2379 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK; 2380 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK; 2381 } 2382} 2383 2384static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5]) 2385{ 2386 /* ILK sprite LP0 latency is 1300 ns */ 2387 if (INTEL_INFO(dev)->gen == 5) 2388 wm[0] = 13; 2389} 2390 2391static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5]) 2392{ 2393 /* ILK cursor LP0 latency is 1300 ns */ 2394 if (INTEL_INFO(dev)->gen == 5) 2395 wm[0] = 13; 2396 2397 /* WaDoubleCursorLP3Latency:ivb */ 2398 if (IS_IVYBRIDGE(dev)) 2399 wm[3] *= 2; 2400} 2401 2402int ilk_wm_max_level(const struct drm_device *dev) 2403{ 2404 /* how many WM levels are we expecting */ 2405 if (IS_GEN9(dev)) 2406 return 7; 2407 else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2408 return 4; 2409 else if (INTEL_INFO(dev)->gen >= 6) 2410 return 3; 2411 else 2412 return 2; 2413} 2414 2415static void intel_print_wm_latency(struct drm_device *dev, 2416 const char *name, 2417 const uint16_t wm[8]) 2418{ 2419 int level, max_level = ilk_wm_max_level(dev); 2420 2421 for (level = 0; level <= max_level; level++) { 2422 unsigned int latency = wm[level]; 2423 2424 if (latency == 0) { 2425 DRM_ERROR("%s WM%d latency not provided\n", 2426 name, level); 2427 continue; 2428 } 2429 2430 /* 2431 * - latencies are in us on gen9. 2432 * - before then, WM1+ latency values are in 0.5us units 2433 */ 2434 if (IS_GEN9(dev)) 2435 latency *= 10; 2436 else if (level > 0) 2437 latency *= 5; 2438 2439 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", 2440 name, level, wm[level], 2441 latency / 10, latency % 10); 2442 } 2443} 2444 2445static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, 2446 uint16_t wm[5], uint16_t min) 2447{ 2448 int level, max_level = ilk_wm_max_level(dev_priv->dev); 2449 2450 if (wm[0] >= min) 2451 return false; 2452 2453 wm[0] = max(wm[0], min); 2454 for (level = 1; level <= max_level; level++) 2455 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5)); 2456 2457 return true; 2458} 2459 2460static void snb_wm_latency_quirk(struct drm_device *dev) 2461{ 2462 struct drm_i915_private *dev_priv = dev->dev_private; 2463 bool changed; 2464 2465 /* 2466 * The BIOS provided WM memory latency values are often 2467 * inadequate for high resolution displays. Adjust them. 2468 */ 2469 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) | 2470 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) | 2471 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); 2472 2473 if (!changed) 2474 return; 2475 2476 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n"); 2477 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency); 2478 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency); 2479 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency); 2480} 2481 2482static void ilk_setup_wm_latency(struct drm_device *dev) 2483{ 2484 struct drm_i915_private *dev_priv = dev->dev_private; 2485 2486 intel_read_wm_latency(dev, dev_priv->wm.pri_latency); 2487 2488 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency, 2489 sizeof(dev_priv->wm.pri_latency)); 2490 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency, 2491 sizeof(dev_priv->wm.pri_latency)); 2492 2493 intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency); 2494 intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency); 2495 2496 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency); 2497 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency); 2498 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency); 2499 2500 if (IS_GEN6(dev)) 2501 snb_wm_latency_quirk(dev); 2502} 2503 2504static void skl_setup_wm_latency(struct drm_device *dev) 2505{ 2506 struct drm_i915_private *dev_priv = dev->dev_private; 2507 2508 intel_read_wm_latency(dev, dev_priv->wm.skl_latency); 2509 intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency); 2510} 2511 2512static void ilk_compute_wm_parameters(struct drm_crtc *crtc, 2513 struct ilk_pipe_wm_parameters *p) 2514{ 2515 struct drm_device *dev = crtc->dev; 2516 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 2517 enum pipe pipe = intel_crtc->pipe; 2518 struct drm_plane *plane; 2519 2520 if (!intel_crtc_active(crtc)) 2521 return; 2522 2523 p->active = true; 2524 p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal; 2525 p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc); 2526 p->pri.bytes_per_pixel = crtc->primary->fb->bits_per_pixel / 8; 2527 p->cur.bytes_per_pixel = 4; 2528 p->pri.horiz_pixels = intel_crtc->config.pipe_src_w; 2529 p->cur.horiz_pixels = intel_crtc->cursor_width; 2530 /* TODO: for now, assume primary and cursor planes are always enabled. */ 2531 p->pri.enabled = true; 2532 p->cur.enabled = true; 2533 2534 drm_for_each_legacy_plane(plane, &dev->mode_config.plane_list) { 2535 struct intel_plane *intel_plane = to_intel_plane(plane); 2536 2537 if (intel_plane->pipe == pipe) { 2538 p->spr = intel_plane->wm; 2539 break; 2540 } 2541 } 2542} 2543 2544static void ilk_compute_wm_config(struct drm_device *dev, 2545 struct intel_wm_config *config) 2546{ 2547 struct intel_crtc *intel_crtc; 2548 2549 /* Compute the currently _active_ config */ 2550 for_each_intel_crtc(dev, intel_crtc) { 2551 const struct intel_pipe_wm *wm = &intel_crtc->wm.active; 2552 2553 if (!wm->pipe_enabled) 2554 continue; 2555 2556 config->sprites_enabled |= wm->sprites_enabled; 2557 config->sprites_scaled |= wm->sprites_scaled; 2558 config->num_pipes_active++; 2559 } 2560} 2561 2562/* Compute new watermarks for the pipe */ 2563static bool intel_compute_pipe_wm(struct drm_crtc *crtc, 2564 const struct ilk_pipe_wm_parameters *params, 2565 struct intel_pipe_wm *pipe_wm) 2566{ 2567 struct drm_device *dev = crtc->dev; 2568 const struct drm_i915_private *dev_priv = dev->dev_private; 2569 int level, max_level = ilk_wm_max_level(dev); 2570 /* LP0 watermark maximums depend on this pipe alone */ 2571 struct intel_wm_config config = { 2572 .num_pipes_active = 1, 2573 .sprites_enabled = params->spr.enabled, 2574 .sprites_scaled = params->spr.scaled, 2575 }; 2576 struct ilk_wm_maximums max; 2577 2578 pipe_wm->pipe_enabled = params->active; 2579 pipe_wm->sprites_enabled = params->spr.enabled; 2580 pipe_wm->sprites_scaled = params->spr.scaled; 2581 2582 /* ILK/SNB: LP2+ watermarks only w/o sprites */ 2583 if (INTEL_INFO(dev)->gen <= 6 && params->spr.enabled) 2584 max_level = 1; 2585 2586 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */ 2587 if (params->spr.scaled) 2588 max_level = 0; 2589 2590 ilk_compute_wm_level(dev_priv, 0, params, &pipe_wm->wm[0]); 2591 2592 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2593 pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc); 2594 2595 /* LP0 watermarks always use 1/2 DDB partitioning */ 2596 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max); 2597 2598 /* At least LP0 must be valid */ 2599 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) 2600 return false; 2601 2602 ilk_compute_wm_reg_maximums(dev, 1, &max); 2603 2604 for (level = 1; level <= max_level; level++) { 2605 struct intel_wm_level wm = {}; 2606 2607 ilk_compute_wm_level(dev_priv, level, params, &wm); 2608 2609 /* 2610 * Disable any watermark level that exceeds the 2611 * register maximums since such watermarks are 2612 * always invalid. 2613 */ 2614 if (!ilk_validate_wm_level(level, &max, &wm)) 2615 break; 2616 2617 pipe_wm->wm[level] = wm; 2618 } 2619 2620 return true; 2621} 2622 2623/* 2624 * Merge the watermarks from all active pipes for a specific level. 2625 */ 2626static void ilk_merge_wm_level(struct drm_device *dev, 2627 int level, 2628 struct intel_wm_level *ret_wm) 2629{ 2630 const struct intel_crtc *intel_crtc; 2631 2632 ret_wm->enable = true; 2633 2634 for_each_intel_crtc(dev, intel_crtc) { 2635 const struct intel_pipe_wm *active = &intel_crtc->wm.active; 2636 const struct intel_wm_level *wm = &active->wm[level]; 2637 2638 if (!active->pipe_enabled) 2639 continue; 2640 2641 /* 2642 * The watermark values may have been used in the past, 2643 * so we must maintain them in the registers for some 2644 * time even if the level is now disabled. 2645 */ 2646 if (!wm->enable) 2647 ret_wm->enable = false; 2648 2649 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val); 2650 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val); 2651 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val); 2652 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val); 2653 } 2654} 2655 2656/* 2657 * Merge all low power watermarks for all active pipes. 2658 */ 2659static void ilk_wm_merge(struct drm_device *dev, 2660 const struct intel_wm_config *config, 2661 const struct ilk_wm_maximums *max, 2662 struct intel_pipe_wm *merged) 2663{ 2664 int level, max_level = ilk_wm_max_level(dev); 2665 int last_enabled_level = max_level; 2666 2667 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ 2668 if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) && 2669 config->num_pipes_active > 1) 2670 return; 2671 2672 /* ILK: FBC WM must be disabled always */ 2673 merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6; 2674 2675 /* merge each WM1+ level */ 2676 for (level = 1; level <= max_level; level++) { 2677 struct intel_wm_level *wm = &merged->wm[level]; 2678 2679 ilk_merge_wm_level(dev, level, wm); 2680 2681 if (level > last_enabled_level) 2682 wm->enable = false; 2683 else if (!ilk_validate_wm_level(level, max, wm)) 2684 /* make sure all following levels get disabled */ 2685 last_enabled_level = level - 1; 2686 2687 /* 2688 * The spec says it is preferred to disable 2689 * FBC WMs instead of disabling a WM level. 2690 */ 2691 if (wm->fbc_val > max->fbc) { 2692 if (wm->enable) 2693 merged->fbc_wm_enabled = false; 2694 wm->fbc_val = 0; 2695 } 2696 } 2697 2698 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */ 2699 /* 2700 * FIXME this is racy. FBC might get enabled later. 2701 * What we should check here is whether FBC can be 2702 * enabled sometime later. 2703 */ 2704 if (IS_GEN5(dev) && !merged->fbc_wm_enabled && intel_fbc_enabled(dev)) { 2705 for (level = 2; level <= max_level; level++) { 2706 struct intel_wm_level *wm = &merged->wm[level]; 2707 2708 wm->enable = false; 2709 } 2710 } 2711} 2712 2713static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm) 2714{ 2715 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */ 2716 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable); 2717} 2718 2719/* The value we need to program into the WM_LPx latency field */ 2720static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level) 2721{ 2722 struct drm_i915_private *dev_priv = dev->dev_private; 2723 2724 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2725 return 2 * level; 2726 else 2727 return dev_priv->wm.pri_latency[level]; 2728} 2729 2730static void ilk_compute_wm_results(struct drm_device *dev, 2731 const struct intel_pipe_wm *merged, 2732 enum intel_ddb_partitioning partitioning, 2733 struct ilk_wm_values *results) 2734{ 2735 struct intel_crtc *intel_crtc; 2736 int level, wm_lp; 2737 2738 results->enable_fbc_wm = merged->fbc_wm_enabled; 2739 results->partitioning = partitioning; 2740 2741 /* LP1+ register values */ 2742 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2743 const struct intel_wm_level *r; 2744 2745 level = ilk_wm_lp_to_level(wm_lp, merged); 2746 2747 r = &merged->wm[level]; 2748 2749 /* 2750 * Maintain the watermark values even if the level is 2751 * disabled. Doing otherwise could cause underruns. 2752 */ 2753 results->wm_lp[wm_lp - 1] = 2754 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) | 2755 (r->pri_val << WM1_LP_SR_SHIFT) | 2756 r->cur_val; 2757 2758 if (r->enable) 2759 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN; 2760 2761 if (INTEL_INFO(dev)->gen >= 8) 2762 results->wm_lp[wm_lp - 1] |= 2763 r->fbc_val << WM1_LP_FBC_SHIFT_BDW; 2764 else 2765 results->wm_lp[wm_lp - 1] |= 2766 r->fbc_val << WM1_LP_FBC_SHIFT; 2767 2768 /* 2769 * Always set WM1S_LP_EN when spr_val != 0, even if the 2770 * level is disabled. Doing otherwise could cause underruns. 2771 */ 2772 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) { 2773 WARN_ON(wm_lp != 1); 2774 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val; 2775 } else 2776 results->wm_lp_spr[wm_lp - 1] = r->spr_val; 2777 } 2778 2779 /* LP0 register values */ 2780 for_each_intel_crtc(dev, intel_crtc) { 2781 enum pipe pipe = intel_crtc->pipe; 2782 const struct intel_wm_level *r = 2783 &intel_crtc->wm.active.wm[0]; 2784 2785 if (WARN_ON(!r->enable)) 2786 continue; 2787 2788 results->wm_linetime[pipe] = intel_crtc->wm.active.linetime; 2789 2790 results->wm_pipe[pipe] = 2791 (r->pri_val << WM0_PIPE_PLANE_SHIFT) | 2792 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) | 2793 r->cur_val; 2794 } 2795} 2796 2797/* Find the result with the highest level enabled. Check for enable_fbc_wm in 2798 * case both are at the same level. Prefer r1 in case they're the same. */ 2799static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev, 2800 struct intel_pipe_wm *r1, 2801 struct intel_pipe_wm *r2) 2802{ 2803 int level, max_level = ilk_wm_max_level(dev); 2804 int level1 = 0, level2 = 0; 2805 2806 for (level = 1; level <= max_level; level++) { 2807 if (r1->wm[level].enable) 2808 level1 = level; 2809 if (r2->wm[level].enable) 2810 level2 = level; 2811 } 2812 2813 if (level1 == level2) { 2814 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled) 2815 return r2; 2816 else 2817 return r1; 2818 } else if (level1 > level2) { 2819 return r1; 2820 } else { 2821 return r2; 2822 } 2823} 2824 2825/* dirty bits used to track which watermarks need changes */ 2826#define WM_DIRTY_PIPE(pipe) (1 << (pipe)) 2827#define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe))) 2828#define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp))) 2829#define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3)) 2830#define WM_DIRTY_FBC (1 << 24) 2831#define WM_DIRTY_DDB (1 << 25) 2832 2833static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv, 2834 const struct ilk_wm_values *old, 2835 const struct ilk_wm_values *new) 2836{ 2837 unsigned int dirty = 0; 2838 enum pipe pipe; 2839 int wm_lp; 2840 2841 for_each_pipe(dev_priv, pipe) { 2842 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) { 2843 dirty |= WM_DIRTY_LINETIME(pipe); 2844 /* Must disable LP1+ watermarks too */ 2845 dirty |= WM_DIRTY_LP_ALL; 2846 } 2847 2848 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) { 2849 dirty |= WM_DIRTY_PIPE(pipe); 2850 /* Must disable LP1+ watermarks too */ 2851 dirty |= WM_DIRTY_LP_ALL; 2852 } 2853 } 2854 2855 if (old->enable_fbc_wm != new->enable_fbc_wm) { 2856 dirty |= WM_DIRTY_FBC; 2857 /* Must disable LP1+ watermarks too */ 2858 dirty |= WM_DIRTY_LP_ALL; 2859 } 2860 2861 if (old->partitioning != new->partitioning) { 2862 dirty |= WM_DIRTY_DDB; 2863 /* Must disable LP1+ watermarks too */ 2864 dirty |= WM_DIRTY_LP_ALL; 2865 } 2866 2867 /* LP1+ watermarks already deemed dirty, no need to continue */ 2868 if (dirty & WM_DIRTY_LP_ALL) 2869 return dirty; 2870 2871 /* Find the lowest numbered LP1+ watermark in need of an update... */ 2872 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2873 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] || 2874 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1]) 2875 break; 2876 } 2877 2878 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */ 2879 for (; wm_lp <= 3; wm_lp++) 2880 dirty |= WM_DIRTY_LP(wm_lp); 2881 2882 return dirty; 2883} 2884 2885static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv, 2886 unsigned int dirty) 2887{ 2888 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2889 bool changed = false; 2890 2891 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) { 2892 previous->wm_lp[2] &= ~WM1_LP_SR_EN; 2893 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]); 2894 changed = true; 2895 } 2896 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) { 2897 previous->wm_lp[1] &= ~WM1_LP_SR_EN; 2898 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]); 2899 changed = true; 2900 } 2901 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) { 2902 previous->wm_lp[0] &= ~WM1_LP_SR_EN; 2903 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]); 2904 changed = true; 2905 } 2906 2907 /* 2908 * Don't touch WM1S_LP_EN here. 2909 * Doing so could cause underruns. 2910 */ 2911 2912 return changed; 2913} 2914 2915/* 2916 * The spec says we shouldn't write when we don't need, because every write 2917 * causes WMs to be re-evaluated, expending some power. 2918 */ 2919static void ilk_write_wm_values(struct drm_i915_private *dev_priv, 2920 struct ilk_wm_values *results) 2921{ 2922 struct drm_device *dev = dev_priv->dev; 2923 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2924 unsigned int dirty; 2925 uint32_t val; 2926 2927 dirty = ilk_compute_wm_dirty(dev_priv, previous, results); 2928 if (!dirty) 2929 return; 2930 2931 _ilk_disable_lp_wm(dev_priv, dirty); 2932 2933 if (dirty & WM_DIRTY_PIPE(PIPE_A)) 2934 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]); 2935 if (dirty & WM_DIRTY_PIPE(PIPE_B)) 2936 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]); 2937 if (dirty & WM_DIRTY_PIPE(PIPE_C)) 2938 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]); 2939 2940 if (dirty & WM_DIRTY_LINETIME(PIPE_A)) 2941 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]); 2942 if (dirty & WM_DIRTY_LINETIME(PIPE_B)) 2943 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]); 2944 if (dirty & WM_DIRTY_LINETIME(PIPE_C)) 2945 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]); 2946 2947 if (dirty & WM_DIRTY_DDB) { 2948 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { 2949 val = I915_READ(WM_MISC); 2950 if (results->partitioning == INTEL_DDB_PART_1_2) 2951 val &= ~WM_MISC_DATA_PARTITION_5_6; 2952 else 2953 val |= WM_MISC_DATA_PARTITION_5_6; 2954 I915_WRITE(WM_MISC, val); 2955 } else { 2956 val = I915_READ(DISP_ARB_CTL2); 2957 if (results->partitioning == INTEL_DDB_PART_1_2) 2958 val &= ~DISP_DATA_PARTITION_5_6; 2959 else 2960 val |= DISP_DATA_PARTITION_5_6; 2961 I915_WRITE(DISP_ARB_CTL2, val); 2962 } 2963 } 2964 2965 if (dirty & WM_DIRTY_FBC) { 2966 val = I915_READ(DISP_ARB_CTL); 2967 if (results->enable_fbc_wm) 2968 val &= ~DISP_FBC_WM_DIS; 2969 else 2970 val |= DISP_FBC_WM_DIS; 2971 I915_WRITE(DISP_ARB_CTL, val); 2972 } 2973 2974 if (dirty & WM_DIRTY_LP(1) && 2975 previous->wm_lp_spr[0] != results->wm_lp_spr[0]) 2976 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]); 2977 2978 if (INTEL_INFO(dev)->gen >= 7) { 2979 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1]) 2980 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]); 2981 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2]) 2982 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]); 2983 } 2984 2985 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0]) 2986 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]); 2987 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1]) 2988 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]); 2989 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2]) 2990 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]); 2991 2992 dev_priv->wm.hw = *results; 2993} 2994 2995static bool ilk_disable_lp_wm(struct drm_device *dev) 2996{ 2997 struct drm_i915_private *dev_priv = dev->dev_private; 2998 2999 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); 3000} 3001 3002/* 3003 * On gen9, we need to allocate Display Data Buffer (DDB) portions to the 3004 * different active planes. 3005 */ 3006 3007#define SKL_DDB_SIZE 896 /* in blocks */ 3008 3009static void 3010skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, 3011 struct drm_crtc *for_crtc, 3012 const struct intel_wm_config *config, 3013 const struct skl_pipe_wm_parameters *params, 3014 struct skl_ddb_entry *alloc /* out */) 3015{ 3016 struct drm_crtc *crtc; 3017 unsigned int pipe_size, ddb_size; 3018 int nth_active_pipe; 3019 3020 if (!params->active) { 3021 alloc->start = 0; 3022 alloc->end = 0; 3023 return; 3024 } 3025 3026 ddb_size = SKL_DDB_SIZE; 3027 3028 ddb_size -= 4; /* 4 blocks for bypass path allocation */ 3029 3030 nth_active_pipe = 0; 3031 for_each_crtc(dev, crtc) { 3032 if (!intel_crtc_active(crtc)) 3033 continue; 3034 3035 if (crtc == for_crtc) 3036 break; 3037 3038 nth_active_pipe++; 3039 } 3040 3041 pipe_size = ddb_size / config->num_pipes_active; 3042 alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active; 3043 alloc->end = alloc->start + pipe_size; 3044} 3045 3046static unsigned int skl_cursor_allocation(const struct intel_wm_config *config) 3047{ 3048 if (config->num_pipes_active == 1) 3049 return 32; 3050 3051 return 8; 3052} 3053 3054static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) 3055{ 3056 entry->start = reg & 0x3ff; 3057 entry->end = (reg >> 16) & 0x3ff; 3058 if (entry->end) 3059 entry->end += 1; 3060} 3061 3062void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, 3063 struct skl_ddb_allocation *ddb /* out */) 3064{ 3065 struct drm_device *dev = dev_priv->dev; 3066 enum pipe pipe; 3067 int plane; 3068 u32 val; 3069 3070 for_each_pipe(dev_priv, pipe) { 3071 for_each_plane(pipe, plane) { 3072 val = I915_READ(PLANE_BUF_CFG(pipe, plane)); 3073 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane], 3074 val); 3075 } 3076 3077 val = I915_READ(CUR_BUF_CFG(pipe)); 3078 skl_ddb_entry_init_from_hw(&ddb->cursor[pipe], val); 3079 } 3080} 3081 3082static unsigned int 3083skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p) 3084{ 3085 return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel; 3086} 3087 3088/* 3089 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching 3090 * a 8192x4096@32bpp framebuffer: 3091 * 3 * 4096 * 8192 * 4 < 2^32 3092 */ 3093static unsigned int 3094skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc, 3095 const struct skl_pipe_wm_parameters *params) 3096{ 3097 unsigned int total_data_rate = 0; 3098 int plane; 3099 3100 for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) { 3101 const struct intel_plane_wm_parameters *p; 3102 3103 p = &params->plane[plane]; 3104 if (!p->enabled) 3105 continue; 3106 3107 total_data_rate += skl_plane_relative_data_rate(p); 3108 } 3109 3110 return total_data_rate; 3111} 3112 3113static void 3114skl_allocate_pipe_ddb(struct drm_crtc *crtc, 3115 const struct intel_wm_config *config, 3116 const struct skl_pipe_wm_parameters *params, 3117 struct skl_ddb_allocation *ddb /* out */) 3118{ 3119 struct drm_device *dev = crtc->dev; 3120 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3121 enum pipe pipe = intel_crtc->pipe; 3122 struct skl_ddb_entry *alloc = &ddb->pipe[pipe]; 3123 uint16_t alloc_size, start, cursor_blocks; 3124 unsigned int total_data_rate; 3125 int plane; 3126 3127 skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc); 3128 alloc_size = skl_ddb_entry_size(alloc); 3129 if (alloc_size == 0) { 3130 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); 3131 memset(&ddb->cursor[pipe], 0, sizeof(ddb->cursor[pipe])); 3132 return; 3133 } 3134 3135 cursor_blocks = skl_cursor_allocation(config); 3136 ddb->cursor[pipe].start = alloc->end - cursor_blocks; 3137 ddb->cursor[pipe].end = alloc->end; 3138 3139 alloc_size -= cursor_blocks; 3140 alloc->end -= cursor_blocks; 3141 3142 /* 3143 * Each active plane get a portion of the remaining space, in 3144 * proportion to the amount of data they need to fetch from memory. 3145 * 3146 * FIXME: we may not allocate every single block here. 3147 */ 3148 total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params); 3149 3150 start = alloc->start; 3151 for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) { 3152 const struct intel_plane_wm_parameters *p; 3153 unsigned int data_rate; 3154 uint16_t plane_blocks; 3155 3156 p = &params->plane[plane]; 3157 if (!p->enabled) 3158 continue; 3159 3160 data_rate = skl_plane_relative_data_rate(p); 3161 3162 /* 3163 * promote the expression to 64 bits to avoid overflowing, the 3164 * result is < available as data_rate / total_data_rate < 1 3165 */ 3166 plane_blocks = div_u64((uint64_t)alloc_size * data_rate, 3167 total_data_rate); 3168 3169 ddb->plane[pipe][plane].start = start; 3170 ddb->plane[pipe][plane].end = start + plane_blocks; 3171 3172 start += plane_blocks; 3173 } 3174 3175} 3176 3177static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_config *config) 3178{ 3179 /* TODO: Take into account the scalers once we support them */ 3180 return config->adjusted_mode.crtc_clock; 3181} 3182 3183/* 3184 * The max latency should be 257 (max the punit can code is 255 and we add 2us 3185 * for the read latency) and bytes_per_pixel should always be <= 8, so that 3186 * should allow pixel_rate up to ~2 GHz which seems sufficient since max 3187 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. 3188*/ 3189static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel, 3190 uint32_t latency) 3191{ 3192 uint32_t wm_intermediate_val, ret; 3193 3194 if (latency == 0) 3195 return UINT_MAX; 3196 3197 wm_intermediate_val = latency * pixel_rate * bytes_per_pixel; 3198 ret = DIV_ROUND_UP(wm_intermediate_val, 1000); 3199 3200 return ret; 3201} 3202 3203static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 3204 uint32_t horiz_pixels, uint8_t bytes_per_pixel, 3205 uint32_t latency) 3206{ 3207 uint32_t ret, plane_bytes_per_line, wm_intermediate_val; 3208 3209 if (latency == 0) 3210 return UINT_MAX; 3211 3212 plane_bytes_per_line = horiz_pixels * bytes_per_pixel; 3213 wm_intermediate_val = latency * pixel_rate; 3214 ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * 3215 plane_bytes_per_line; 3216 3217 return ret; 3218} 3219 3220static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb, 3221 const struct intel_crtc *intel_crtc) 3222{ 3223 struct drm_device *dev = intel_crtc->base.dev; 3224 struct drm_i915_private *dev_priv = dev->dev_private; 3225 const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; 3226 enum pipe pipe = intel_crtc->pipe; 3227 3228 if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe], 3229 sizeof(new_ddb->plane[pipe]))) 3230 return true; 3231 3232 if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe], 3233 sizeof(new_ddb->cursor[pipe]))) 3234 return true; 3235 3236 return false; 3237} 3238 3239static void skl_compute_wm_global_parameters(struct drm_device *dev, 3240 struct intel_wm_config *config) 3241{ 3242 struct drm_crtc *crtc; 3243 struct drm_plane *plane; 3244 3245 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) 3246 config->num_pipes_active += intel_crtc_active(crtc); 3247 3248 /* FIXME: I don't think we need those two global parameters on SKL */ 3249 list_for_each_entry(plane, &dev->mode_config.plane_list, head) { 3250 struct intel_plane *intel_plane = to_intel_plane(plane); 3251 3252 config->sprites_enabled |= intel_plane->wm.enabled; 3253 config->sprites_scaled |= intel_plane->wm.scaled; 3254 } 3255} 3256 3257static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc, 3258 struct skl_pipe_wm_parameters *p) 3259{ 3260 struct drm_device *dev = crtc->dev; 3261 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3262 enum pipe pipe = intel_crtc->pipe; 3263 struct drm_plane *plane; 3264 int i = 1; /* Index for sprite planes start */ 3265 3266 p->active = intel_crtc_active(crtc); 3267 if (p->active) { 3268 p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal; 3269 p->pixel_rate = skl_pipe_pixel_rate(&intel_crtc->config); 3270 3271 /* 3272 * For now, assume primary and cursor planes are always enabled. 3273 */ 3274 p->plane[0].enabled = true; 3275 p->plane[0].bytes_per_pixel = 3276 crtc->primary->fb->bits_per_pixel / 8; 3277 p->plane[0].horiz_pixels = intel_crtc->config.pipe_src_w; 3278 p->plane[0].vert_pixels = intel_crtc->config.pipe_src_h; 3279 3280 p->cursor.enabled = true; 3281 p->cursor.bytes_per_pixel = 4; 3282 p->cursor.horiz_pixels = intel_crtc->cursor_width ? 3283 intel_crtc->cursor_width : 64; 3284 } 3285 3286 list_for_each_entry(plane, &dev->mode_config.plane_list, head) { 3287 struct intel_plane *intel_plane = to_intel_plane(plane); 3288 3289 if (intel_plane->pipe == pipe) 3290 p->plane[i++] = intel_plane->wm; 3291 } 3292} 3293 3294static bool skl_compute_plane_wm(struct skl_pipe_wm_parameters *p, 3295 struct intel_plane_wm_parameters *p_params, 3296 uint16_t ddb_allocation, 3297 uint32_t mem_value, 3298 uint16_t *out_blocks, /* out */ 3299 uint8_t *out_lines /* out */) 3300{ 3301 uint32_t method1, method2, plane_bytes_per_line, res_blocks, res_lines; 3302 uint32_t result_bytes; 3303 3304 if (mem_value == 0 || !p->active || !p_params->enabled) 3305 return false; 3306 3307 method1 = skl_wm_method1(p->pixel_rate, 3308 p_params->bytes_per_pixel, 3309 mem_value); 3310 method2 = skl_wm_method2(p->pixel_rate, 3311 p->pipe_htotal, 3312 p_params->horiz_pixels, 3313 p_params->bytes_per_pixel, 3314 mem_value); 3315 3316 plane_bytes_per_line = p_params->horiz_pixels * 3317 p_params->bytes_per_pixel; 3318 3319 /* For now xtile and linear */ 3320 if (((ddb_allocation * 512) / plane_bytes_per_line) >= 1) 3321 result_bytes = min(method1, method2); 3322 else 3323 result_bytes = method1; 3324 3325 res_blocks = DIV_ROUND_UP(result_bytes, 512) + 1; 3326 res_lines = DIV_ROUND_UP(result_bytes, plane_bytes_per_line); 3327 3328 if (res_blocks > ddb_allocation || res_lines > 31) 3329 return false; 3330 3331 *out_blocks = res_blocks; 3332 *out_lines = res_lines; 3333 3334 return true; 3335} 3336 3337static void skl_compute_wm_level(const struct drm_i915_private *dev_priv, 3338 struct skl_ddb_allocation *ddb, 3339 struct skl_pipe_wm_parameters *p, 3340 enum pipe pipe, 3341 int level, 3342 int num_planes, 3343 struct skl_wm_level *result) 3344{ 3345 uint16_t latency = dev_priv->wm.skl_latency[level]; 3346 uint16_t ddb_blocks; 3347 int i; 3348 3349 for (i = 0; i < num_planes; i++) { 3350 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]); 3351 3352 result->plane_en[i] = skl_compute_plane_wm(p, &p->plane[i], 3353 ddb_blocks, 3354 latency, 3355 &result->plane_res_b[i], 3356 &result->plane_res_l[i]); 3357 } 3358 3359 ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]); 3360 result->cursor_en = skl_compute_plane_wm(p, &p->cursor, ddb_blocks, 3361 latency, &result->cursor_res_b, 3362 &result->cursor_res_l); 3363} 3364 3365static uint32_t 3366skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p) 3367{ 3368 if (!intel_crtc_active(crtc)) 3369 return 0; 3370 3371 return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate); 3372 3373} 3374 3375static void skl_compute_transition_wm(struct drm_crtc *crtc, 3376 struct skl_pipe_wm_parameters *params, 3377 struct skl_wm_level *trans_wm /* out */) 3378{ 3379 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3380 int i; 3381 3382 if (!params->active) 3383 return; 3384 3385 /* Until we know more, just disable transition WMs */ 3386 for (i = 0; i < intel_num_planes(intel_crtc); i++) 3387 trans_wm->plane_en[i] = false; 3388 trans_wm->cursor_en = false; 3389} 3390 3391static void skl_compute_pipe_wm(struct drm_crtc *crtc, 3392 struct skl_ddb_allocation *ddb, 3393 struct skl_pipe_wm_parameters *params, 3394 struct skl_pipe_wm *pipe_wm) 3395{ 3396 struct drm_device *dev = crtc->dev; 3397 const struct drm_i915_private *dev_priv = dev->dev_private; 3398 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3399 int level, max_level = ilk_wm_max_level(dev); 3400 3401 for (level = 0; level <= max_level; level++) { 3402 skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe, 3403 level, intel_num_planes(intel_crtc), 3404 &pipe_wm->wm[level]); 3405 } 3406 pipe_wm->linetime = skl_compute_linetime_wm(crtc, params); 3407 3408 skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm); 3409} 3410 3411static void skl_compute_wm_results(struct drm_device *dev, 3412 struct skl_pipe_wm_parameters *p, 3413 struct skl_pipe_wm *p_wm, 3414 struct skl_wm_values *r, 3415 struct intel_crtc *intel_crtc) 3416{ 3417 int level, max_level = ilk_wm_max_level(dev); 3418 enum pipe pipe = intel_crtc->pipe; 3419 uint32_t temp; 3420 int i; 3421 3422 for (level = 0; level <= max_level; level++) { 3423 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3424 temp = 0; 3425 3426 temp |= p_wm->wm[level].plane_res_l[i] << 3427 PLANE_WM_LINES_SHIFT; 3428 temp |= p_wm->wm[level].plane_res_b[i]; 3429 if (p_wm->wm[level].plane_en[i]) 3430 temp |= PLANE_WM_EN; 3431 3432 r->plane[pipe][i][level] = temp; 3433 } 3434 3435 temp = 0; 3436 3437 temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT; 3438 temp |= p_wm->wm[level].cursor_res_b; 3439 3440 if (p_wm->wm[level].cursor_en) 3441 temp |= PLANE_WM_EN; 3442 3443 r->cursor[pipe][level] = temp; 3444 3445 } 3446 3447 /* transition WMs */ 3448 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3449 temp = 0; 3450 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT; 3451 temp |= p_wm->trans_wm.plane_res_b[i]; 3452 if (p_wm->trans_wm.plane_en[i]) 3453 temp |= PLANE_WM_EN; 3454 3455 r->plane_trans[pipe][i] = temp; 3456 } 3457 3458 temp = 0; 3459 temp |= p_wm->trans_wm.cursor_res_l << PLANE_WM_LINES_SHIFT; 3460 temp |= p_wm->trans_wm.cursor_res_b; 3461 if (p_wm->trans_wm.cursor_en) 3462 temp |= PLANE_WM_EN; 3463 3464 r->cursor_trans[pipe] = temp; 3465 3466 r->wm_linetime[pipe] = p_wm->linetime; 3467} 3468 3469static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg, 3470 const struct skl_ddb_entry *entry) 3471{ 3472 if (entry->end) 3473 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start); 3474 else 3475 I915_WRITE(reg, 0); 3476} 3477 3478static void skl_write_wm_values(struct drm_i915_private *dev_priv, 3479 const struct skl_wm_values *new) 3480{ 3481 struct drm_device *dev = dev_priv->dev; 3482 struct intel_crtc *crtc; 3483 3484 list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { 3485 int i, level, max_level = ilk_wm_max_level(dev); 3486 enum pipe pipe = crtc->pipe; 3487 3488 if (!new->dirty[pipe]) 3489 continue; 3490 3491 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]); 3492 3493 for (level = 0; level <= max_level; level++) { 3494 for (i = 0; i < intel_num_planes(crtc); i++) 3495 I915_WRITE(PLANE_WM(pipe, i, level), 3496 new->plane[pipe][i][level]); 3497 I915_WRITE(CUR_WM(pipe, level), 3498 new->cursor[pipe][level]); 3499 } 3500 for (i = 0; i < intel_num_planes(crtc); i++) 3501 I915_WRITE(PLANE_WM_TRANS(pipe, i), 3502 new->plane_trans[pipe][i]); 3503 I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]); 3504 3505 for (i = 0; i < intel_num_planes(crtc); i++) 3506 skl_ddb_entry_write(dev_priv, 3507 PLANE_BUF_CFG(pipe, i), 3508 &new->ddb.plane[pipe][i]); 3509 3510 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), 3511 &new->ddb.cursor[pipe]); 3512 } 3513} 3514 3515/* 3516 * When setting up a new DDB allocation arrangement, we need to correctly 3517 * sequence the times at which the new allocations for the pipes are taken into 3518 * account or we'll have pipes fetching from space previously allocated to 3519 * another pipe. 3520 * 3521 * Roughly the sequence looks like: 3522 * 1. re-allocate the pipe(s) with the allocation being reduced and not 3523 * overlapping with a previous light-up pipe (another way to put it is: 3524 * pipes with their new allocation strickly included into their old ones). 3525 * 2. re-allocate the other pipes that get their allocation reduced 3526 * 3. allocate the pipes having their allocation increased 3527 * 3528 * Steps 1. and 2. are here to take care of the following case: 3529 * - Initially DDB looks like this: 3530 * | B | C | 3531 * - enable pipe A. 3532 * - pipe B has a reduced DDB allocation that overlaps with the old pipe C 3533 * allocation 3534 * | A | B | C | 3535 * 3536 * We need to sequence the re-allocation: C, B, A (and not B, C, A). 3537 */ 3538 3539static void 3540skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass) 3541{ 3542 struct drm_device *dev = dev_priv->dev; 3543 int plane; 3544 3545 DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass); 3546 3547 for_each_plane(pipe, plane) { 3548 I915_WRITE(PLANE_SURF(pipe, plane), 3549 I915_READ(PLANE_SURF(pipe, plane))); 3550 } 3551 I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe))); 3552} 3553 3554static bool 3555skl_ddb_allocation_included(const struct skl_ddb_allocation *old, 3556 const struct skl_ddb_allocation *new, 3557 enum pipe pipe) 3558{ 3559 uint16_t old_size, new_size; 3560 3561 old_size = skl_ddb_entry_size(&old->pipe[pipe]); 3562 new_size = skl_ddb_entry_size(&new->pipe[pipe]); 3563 3564 return old_size != new_size && 3565 new->pipe[pipe].start >= old->pipe[pipe].start && 3566 new->pipe[pipe].end <= old->pipe[pipe].end; 3567} 3568 3569static void skl_flush_wm_values(struct drm_i915_private *dev_priv, 3570 struct skl_wm_values *new_values) 3571{ 3572 struct drm_device *dev = dev_priv->dev; 3573 struct skl_ddb_allocation *cur_ddb, *new_ddb; 3574 bool reallocated[I915_MAX_PIPES] = {false, false, false}; 3575 struct intel_crtc *crtc; 3576 enum pipe pipe; 3577 3578 new_ddb = &new_values->ddb; 3579 cur_ddb = &dev_priv->wm.skl_hw.ddb; 3580 3581 /* 3582 * First pass: flush the pipes with the new allocation contained into 3583 * the old space. 3584 * 3585 * We'll wait for the vblank on those pipes to ensure we can safely 3586 * re-allocate the freed space without this pipe fetching from it. 3587 */ 3588 for_each_intel_crtc(dev, crtc) { 3589 if (!crtc->active) 3590 continue; 3591 3592 pipe = crtc->pipe; 3593 3594 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe)) 3595 continue; 3596 3597 skl_wm_flush_pipe(dev_priv, pipe, 1); 3598 intel_wait_for_vblank(dev, pipe); 3599 3600 reallocated[pipe] = true; 3601 } 3602 3603 3604 /* 3605 * Second pass: flush the pipes that are having their allocation 3606 * reduced, but overlapping with a previous allocation. 3607 * 3608 * Here as well we need to wait for the vblank to make sure the freed 3609 * space is not used anymore. 3610 */ 3611 for_each_intel_crtc(dev, crtc) { 3612 if (!crtc->active) 3613 continue; 3614 3615 pipe = crtc->pipe; 3616 3617 if (reallocated[pipe]) 3618 continue; 3619 3620 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) < 3621 skl_ddb_entry_size(&cur_ddb->pipe[pipe])) { 3622 skl_wm_flush_pipe(dev_priv, pipe, 2); 3623 intel_wait_for_vblank(dev, pipe); 3624 } 3625 3626 reallocated[pipe] = true; 3627 } 3628 3629 /* 3630 * Third pass: flush the pipes that got more space allocated. 3631 * 3632 * We don't need to actively wait for the update here, next vblank 3633 * will just get more DDB space with the correct WM values. 3634 */ 3635 for_each_intel_crtc(dev, crtc) { 3636 if (!crtc->active) 3637 continue; 3638 3639 pipe = crtc->pipe; 3640 3641 /* 3642 * At this point, only the pipes more space than before are 3643 * left to re-allocate. 3644 */ 3645 if (reallocated[pipe]) 3646 continue; 3647 3648 skl_wm_flush_pipe(dev_priv, pipe, 3); 3649 } 3650} 3651 3652static bool skl_update_pipe_wm(struct drm_crtc *crtc, 3653 struct skl_pipe_wm_parameters *params, 3654 struct intel_wm_config *config, 3655 struct skl_ddb_allocation *ddb, /* out */ 3656 struct skl_pipe_wm *pipe_wm /* out */) 3657{ 3658 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3659 3660 skl_compute_wm_pipe_parameters(crtc, params); 3661 skl_allocate_pipe_ddb(crtc, config, params, ddb); 3662 skl_compute_pipe_wm(crtc, ddb, params, pipe_wm); 3663 3664 if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm))) 3665 return false; 3666 3667 intel_crtc->wm.skl_active = *pipe_wm; 3668 return true; 3669} 3670 3671static void skl_update_other_pipe_wm(struct drm_device *dev, 3672 struct drm_crtc *crtc, 3673 struct intel_wm_config *config, 3674 struct skl_wm_values *r) 3675{ 3676 struct intel_crtc *intel_crtc; 3677 struct intel_crtc *this_crtc = to_intel_crtc(crtc); 3678 3679 /* 3680 * If the WM update hasn't changed the allocation for this_crtc (the 3681 * crtc we are currently computing the new WM values for), other 3682 * enabled crtcs will keep the same allocation and we don't need to 3683 * recompute anything for them. 3684 */ 3685 if (!skl_ddb_allocation_changed(&r->ddb, this_crtc)) 3686 return; 3687 3688 /* 3689 * Otherwise, because of this_crtc being freshly enabled/disabled, the 3690 * other active pipes need new DDB allocation and WM values. 3691 */ 3692 list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, 3693 base.head) { 3694 struct skl_pipe_wm_parameters params = {}; 3695 struct skl_pipe_wm pipe_wm = {}; 3696 bool wm_changed; 3697 3698 if (this_crtc->pipe == intel_crtc->pipe) 3699 continue; 3700 3701 if (!intel_crtc->active) 3702 continue; 3703 3704 wm_changed = skl_update_pipe_wm(&intel_crtc->base, 3705 &params, config, 3706 &r->ddb, &pipe_wm); 3707 3708 /* 3709 * If we end up re-computing the other pipe WM values, it's 3710 * because it was really needed, so we expect the WM values to 3711 * be different. 3712 */ 3713 WARN_ON(!wm_changed); 3714 3715 skl_compute_wm_results(dev, &params, &pipe_wm, r, intel_crtc); 3716 r->dirty[intel_crtc->pipe] = true; 3717 } 3718} 3719 3720static void skl_update_wm(struct drm_crtc *crtc) 3721{ 3722 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3723 struct drm_device *dev = crtc->dev; 3724 struct drm_i915_private *dev_priv = dev->dev_private; 3725 struct skl_pipe_wm_parameters params = {}; 3726 struct skl_wm_values *results = &dev_priv->wm.skl_results; 3727 struct skl_pipe_wm pipe_wm = {}; 3728 struct intel_wm_config config = {}; 3729 3730 memset(results, 0, sizeof(*results)); 3731 3732 skl_compute_wm_global_parameters(dev, &config); 3733 3734 if (!skl_update_pipe_wm(crtc, &params, &config, 3735 &results->ddb, &pipe_wm)) 3736 return; 3737 3738 skl_compute_wm_results(dev, &params, &pipe_wm, results, intel_crtc); 3739 results->dirty[intel_crtc->pipe] = true; 3740 3741 skl_update_other_pipe_wm(dev, crtc, &config, results); 3742 skl_write_wm_values(dev_priv, results); 3743 skl_flush_wm_values(dev_priv, results); 3744 3745 /* store the new configuration */ 3746 dev_priv->wm.skl_hw = *results; 3747} 3748 3749static void 3750skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc, 3751 uint32_t sprite_width, uint32_t sprite_height, 3752 int pixel_size, bool enabled, bool scaled) 3753{ 3754 struct intel_plane *intel_plane = to_intel_plane(plane); 3755 3756 intel_plane->wm.enabled = enabled; 3757 intel_plane->wm.scaled = scaled; 3758 intel_plane->wm.horiz_pixels = sprite_width; 3759 intel_plane->wm.vert_pixels = sprite_height; 3760 intel_plane->wm.bytes_per_pixel = pixel_size; 3761 3762 skl_update_wm(crtc); 3763} 3764 3765static void ilk_update_wm(struct drm_crtc *crtc) 3766{ 3767 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3768 struct drm_device *dev = crtc->dev; 3769 struct drm_i915_private *dev_priv = dev->dev_private; 3770 struct ilk_wm_maximums max; 3771 struct ilk_pipe_wm_parameters params = {}; 3772 struct ilk_wm_values results = {}; 3773 enum intel_ddb_partitioning partitioning; 3774 struct intel_pipe_wm pipe_wm = {}; 3775 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; 3776 struct intel_wm_config config = {}; 3777 3778 ilk_compute_wm_parameters(crtc, &params); 3779 3780 intel_compute_pipe_wm(crtc, &params, &pipe_wm); 3781 3782 if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm))) 3783 return; 3784 3785 intel_crtc->wm.active = pipe_wm; 3786 3787 ilk_compute_wm_config(dev, &config); 3788 3789 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); 3790 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); 3791 3792 /* 5/6 split only in single pipe config on IVB+ */ 3793 if (INTEL_INFO(dev)->gen >= 7 && 3794 config.num_pipes_active == 1 && config.sprites_enabled) { 3795 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); 3796 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); 3797 3798 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); 3799 } else { 3800 best_lp_wm = &lp_wm_1_2; 3801 } 3802 3803 partitioning = (best_lp_wm == &lp_wm_1_2) ? 3804 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; 3805 3806 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); 3807 3808 ilk_write_wm_values(dev_priv, &results); 3809} 3810 3811static void 3812ilk_update_sprite_wm(struct drm_plane *plane, 3813 struct drm_crtc *crtc, 3814 uint32_t sprite_width, uint32_t sprite_height, 3815 int pixel_size, bool enabled, bool scaled) 3816{ 3817 struct drm_device *dev = plane->dev; 3818 struct intel_plane *intel_plane = to_intel_plane(plane); 3819 3820 intel_plane->wm.enabled = enabled; 3821 intel_plane->wm.scaled = scaled; 3822 intel_plane->wm.horiz_pixels = sprite_width; 3823 intel_plane->wm.vert_pixels = sprite_width; 3824 intel_plane->wm.bytes_per_pixel = pixel_size; 3825 3826 /* 3827 * IVB workaround: must disable low power watermarks for at least 3828 * one frame before enabling scaling. LP watermarks can be re-enabled 3829 * when scaling is disabled. 3830 * 3831 * WaCxSRDisabledForSpriteScaling:ivb 3832 */ 3833 if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev)) 3834 intel_wait_for_vblank(dev, intel_plane->pipe); 3835 3836 ilk_update_wm(crtc); 3837} 3838 3839static void skl_pipe_wm_active_state(uint32_t val, 3840 struct skl_pipe_wm *active, 3841 bool is_transwm, 3842 bool is_cursor, 3843 int i, 3844 int level) 3845{ 3846 bool is_enabled = (val & PLANE_WM_EN) != 0; 3847 3848 if (!is_transwm) { 3849 if (!is_cursor) { 3850 active->wm[level].plane_en[i] = is_enabled; 3851 active->wm[level].plane_res_b[i] = 3852 val & PLANE_WM_BLOCKS_MASK; 3853 active->wm[level].plane_res_l[i] = 3854 (val >> PLANE_WM_LINES_SHIFT) & 3855 PLANE_WM_LINES_MASK; 3856 } else { 3857 active->wm[level].cursor_en = is_enabled; 3858 active->wm[level].cursor_res_b = 3859 val & PLANE_WM_BLOCKS_MASK; 3860 active->wm[level].cursor_res_l = 3861 (val >> PLANE_WM_LINES_SHIFT) & 3862 PLANE_WM_LINES_MASK; 3863 } 3864 } else { 3865 if (!is_cursor) { 3866 active->trans_wm.plane_en[i] = is_enabled; 3867 active->trans_wm.plane_res_b[i] = 3868 val & PLANE_WM_BLOCKS_MASK; 3869 active->trans_wm.plane_res_l[i] = 3870 (val >> PLANE_WM_LINES_SHIFT) & 3871 PLANE_WM_LINES_MASK; 3872 } else { 3873 active->trans_wm.cursor_en = is_enabled; 3874 active->trans_wm.cursor_res_b = 3875 val & PLANE_WM_BLOCKS_MASK; 3876 active->trans_wm.cursor_res_l = 3877 (val >> PLANE_WM_LINES_SHIFT) & 3878 PLANE_WM_LINES_MASK; 3879 } 3880 } 3881} 3882 3883static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc) 3884{ 3885 struct drm_device *dev = crtc->dev; 3886 struct drm_i915_private *dev_priv = dev->dev_private; 3887 struct skl_wm_values *hw = &dev_priv->wm.skl_hw; 3888 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3889 struct skl_pipe_wm *active = &intel_crtc->wm.skl_active; 3890 enum pipe pipe = intel_crtc->pipe; 3891 int level, i, max_level; 3892 uint32_t temp; 3893 3894 max_level = ilk_wm_max_level(dev); 3895 3896 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 3897 3898 for (level = 0; level <= max_level; level++) { 3899 for (i = 0; i < intel_num_planes(intel_crtc); i++) 3900 hw->plane[pipe][i][level] = 3901 I915_READ(PLANE_WM(pipe, i, level)); 3902 hw->cursor[pipe][level] = I915_READ(CUR_WM(pipe, level)); 3903 } 3904 3905 for (i = 0; i < intel_num_planes(intel_crtc); i++) 3906 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i)); 3907 hw->cursor_trans[pipe] = I915_READ(CUR_WM_TRANS(pipe)); 3908 3909 if (!intel_crtc_active(crtc)) 3910 return; 3911 3912 hw->dirty[pipe] = true; 3913 3914 active->linetime = hw->wm_linetime[pipe]; 3915 3916 for (level = 0; level <= max_level; level++) { 3917 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3918 temp = hw->plane[pipe][i][level]; 3919 skl_pipe_wm_active_state(temp, active, false, 3920 false, i, level); 3921 } 3922 temp = hw->cursor[pipe][level]; 3923 skl_pipe_wm_active_state(temp, active, false, true, i, level); 3924 } 3925 3926 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3927 temp = hw->plane_trans[pipe][i]; 3928 skl_pipe_wm_active_state(temp, active, true, false, i, 0); 3929 } 3930 3931 temp = hw->cursor_trans[pipe]; 3932 skl_pipe_wm_active_state(temp, active, true, true, i, 0); 3933} 3934 3935void skl_wm_get_hw_state(struct drm_device *dev) 3936{ 3937 struct drm_i915_private *dev_priv = dev->dev_private; 3938 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; 3939 struct drm_crtc *crtc; 3940 3941 skl_ddb_get_hw_state(dev_priv, ddb); 3942 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) 3943 skl_pipe_wm_get_hw_state(crtc); 3944} 3945 3946static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) 3947{ 3948 struct drm_device *dev = crtc->dev; 3949 struct drm_i915_private *dev_priv = dev->dev_private; 3950 struct ilk_wm_values *hw = &dev_priv->wm.hw; 3951 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3952 struct intel_pipe_wm *active = &intel_crtc->wm.active; 3953 enum pipe pipe = intel_crtc->pipe; 3954 static const unsigned int wm0_pipe_reg[] = { 3955 [PIPE_A] = WM0_PIPEA_ILK, 3956 [PIPE_B] = WM0_PIPEB_ILK, 3957 [PIPE_C] = WM0_PIPEC_IVB, 3958 }; 3959 3960 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); 3961 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 3962 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 3963 3964 active->pipe_enabled = intel_crtc_active(crtc); 3965 3966 if (active->pipe_enabled) { 3967 u32 tmp = hw->wm_pipe[pipe]; 3968 3969 /* 3970 * For active pipes LP0 watermark is marked as 3971 * enabled, and LP1+ watermaks as disabled since 3972 * we can't really reverse compute them in case 3973 * multiple pipes are active. 3974 */ 3975 active->wm[0].enable = true; 3976 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; 3977 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; 3978 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; 3979 active->linetime = hw->wm_linetime[pipe]; 3980 } else { 3981 int level, max_level = ilk_wm_max_level(dev); 3982 3983 /* 3984 * For inactive pipes, all watermark levels 3985 * should be marked as enabled but zeroed, 3986 * which is what we'd compute them to. 3987 */ 3988 for (level = 0; level <= max_level; level++) 3989 active->wm[level].enable = true; 3990 } 3991} 3992 3993void ilk_wm_get_hw_state(struct drm_device *dev) 3994{ 3995 struct drm_i915_private *dev_priv = dev->dev_private; 3996 struct ilk_wm_values *hw = &dev_priv->wm.hw; 3997 struct drm_crtc *crtc; 3998 3999 for_each_crtc(dev, crtc) 4000 ilk_pipe_wm_get_hw_state(crtc); 4001 4002 hw->wm_lp[0] = I915_READ(WM1_LP_ILK); 4003 hw->wm_lp[1] = I915_READ(WM2_LP_ILK); 4004 hw->wm_lp[2] = I915_READ(WM3_LP_ILK); 4005 4006 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); 4007 if (INTEL_INFO(dev)->gen >= 7) { 4008 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); 4009 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); 4010 } 4011 4012 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4013 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? 4014 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4015 else if (IS_IVYBRIDGE(dev)) 4016 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? 4017 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4018 4019 hw->enable_fbc_wm = 4020 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); 4021} 4022 4023/** 4024 * intel_update_watermarks - update FIFO watermark values based on current modes 4025 * 4026 * Calculate watermark values for the various WM regs based on current mode 4027 * and plane configuration. 4028 * 4029 * There are several cases to deal with here: 4030 * - normal (i.e. non-self-refresh) 4031 * - self-refresh (SR) mode 4032 * - lines are large relative to FIFO size (buffer can hold up to 2) 4033 * - lines are small relative to FIFO size (buffer can hold more than 2 4034 * lines), so need to account for TLB latency 4035 * 4036 * The normal calculation is: 4037 * watermark = dotclock * bytes per pixel * latency 4038 * where latency is platform & configuration dependent (we assume pessimal 4039 * values here). 4040 * 4041 * The SR calculation is: 4042 * watermark = (trunc(latency/line time)+1) * surface width * 4043 * bytes per pixel 4044 * where 4045 * line time = htotal / dotclock 4046 * surface width = hdisplay for normal plane and 64 for cursor 4047 * and latency is assumed to be high, as above. 4048 * 4049 * The final value programmed to the register should always be rounded up, 4050 * and include an extra 2 entries to account for clock crossings. 4051 * 4052 * We don't use the sprite, so we can ignore that. And on Crestline we have 4053 * to set the non-SR watermarks to 8. 4054 */ 4055void intel_update_watermarks(struct drm_crtc *crtc) 4056{ 4057 struct drm_i915_private *dev_priv = crtc->dev->dev_private; 4058 4059 if (dev_priv->display.update_wm) 4060 dev_priv->display.update_wm(crtc); 4061} 4062 4063void intel_update_sprite_watermarks(struct drm_plane *plane, 4064 struct drm_crtc *crtc, 4065 uint32_t sprite_width, 4066 uint32_t sprite_height, 4067 int pixel_size, 4068 bool enabled, bool scaled) 4069{ 4070 struct drm_i915_private *dev_priv = plane->dev->dev_private; 4071 4072 if (dev_priv->display.update_sprite_wm) 4073 dev_priv->display.update_sprite_wm(plane, crtc, 4074 sprite_width, sprite_height, 4075 pixel_size, enabled, scaled); 4076} 4077 4078static struct drm_i915_gem_object * 4079intel_alloc_context_page(struct drm_device *dev) 4080{ 4081 struct drm_i915_gem_object *ctx; 4082 int ret; 4083 4084 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 4085 4086 ctx = i915_gem_alloc_object(dev, 4096); 4087 if (!ctx) { 4088 DRM_DEBUG("failed to alloc power context, RC6 disabled\n"); 4089 return NULL; 4090 } 4091 4092 ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0); 4093 if (ret) { 4094 DRM_ERROR("failed to pin power context: %d\n", ret); 4095 goto err_unref; 4096 } 4097 4098 ret = i915_gem_object_set_to_gtt_domain(ctx, 1); 4099 if (ret) { 4100 DRM_ERROR("failed to set-domain on power context: %d\n", ret); 4101 goto err_unpin; 4102 } 4103 4104 return ctx; 4105 4106err_unpin: 4107 i915_gem_object_ggtt_unpin(ctx); 4108err_unref: 4109 drm_gem_object_unreference(&ctx->base); 4110 return NULL; 4111} 4112 4113/** 4114 * Lock protecting IPS related data structures 4115 */ 4116DEFINE_SPINLOCK(mchdev_lock); 4117 4118/* Global for IPS driver to get at the current i915 device. Protected by 4119 * mchdev_lock. */ 4120static struct drm_i915_private *i915_mch_dev; 4121 4122bool ironlake_set_drps(struct drm_device *dev, u8 val) 4123{ 4124 struct drm_i915_private *dev_priv = dev->dev_private; 4125 u16 rgvswctl; 4126 4127 assert_spin_locked(&mchdev_lock); 4128 4129 rgvswctl = I915_READ16(MEMSWCTL); 4130 if (rgvswctl & MEMCTL_CMD_STS) { 4131 DRM_DEBUG("gpu busy, RCS change rejected\n"); 4132 return false; /* still busy with another command */ 4133 } 4134 4135 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 4136 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; 4137 I915_WRITE16(MEMSWCTL, rgvswctl); 4138 POSTING_READ16(MEMSWCTL); 4139 4140 rgvswctl |= MEMCTL_CMD_STS; 4141 I915_WRITE16(MEMSWCTL, rgvswctl); 4142 4143 return true; 4144} 4145 4146static void ironlake_enable_drps(struct drm_device *dev) 4147{ 4148 struct drm_i915_private *dev_priv = dev->dev_private; 4149 u32 rgvmodectl = I915_READ(MEMMODECTL); 4150 u8 fmax, fmin, fstart, vstart; 4151 4152 spin_lock_irq(&mchdev_lock); 4153 4154 /* Enable temp reporting */ 4155 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); 4156 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); 4157 4158 /* 100ms RC evaluation intervals */ 4159 I915_WRITE(RCUPEI, 100000); 4160 I915_WRITE(RCDNEI, 100000); 4161 4162 /* Set max/min thresholds to 90ms and 80ms respectively */ 4163 I915_WRITE(RCBMAXAVG, 90000); 4164 I915_WRITE(RCBMINAVG, 80000); 4165 4166 I915_WRITE(MEMIHYST, 1); 4167 4168 /* Set up min, max, and cur for interrupt handling */ 4169 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 4170 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 4171 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 4172 MEMMODE_FSTART_SHIFT; 4173 4174 vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >> 4175 PXVFREQ_PX_SHIFT; 4176 4177 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ 4178 dev_priv->ips.fstart = fstart; 4179 4180 dev_priv->ips.max_delay = fstart; 4181 dev_priv->ips.min_delay = fmin; 4182 dev_priv->ips.cur_delay = fstart; 4183 4184 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", 4185 fmax, fmin, fstart); 4186 4187 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 4188 4189 /* 4190 * Interrupts will be enabled in ironlake_irq_postinstall 4191 */ 4192 4193 I915_WRITE(VIDSTART, vstart); 4194 POSTING_READ(VIDSTART); 4195 4196 rgvmodectl |= MEMMODE_SWMODE_EN; 4197 I915_WRITE(MEMMODECTL, rgvmodectl); 4198 4199 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) 4200 DRM_ERROR("stuck trying to change perf mode\n"); 4201 mdelay(1); 4202 4203 ironlake_set_drps(dev, fstart); 4204 4205 dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) + 4206 I915_READ(0x112e0); 4207 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); 4208 dev_priv->ips.last_count2 = I915_READ(0x112f4); 4209 dev_priv->ips.last_time2 = ktime_get_raw_ns(); 4210 4211 spin_unlock_irq(&mchdev_lock); 4212} 4213 4214static void ironlake_disable_drps(struct drm_device *dev) 4215{ 4216 struct drm_i915_private *dev_priv = dev->dev_private; 4217 u16 rgvswctl; 4218 4219 spin_lock_irq(&mchdev_lock); 4220 4221 rgvswctl = I915_READ16(MEMSWCTL); 4222 4223 /* Ack interrupts, disable EFC interrupt */ 4224 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); 4225 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); 4226 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); 4227 I915_WRITE(DEIIR, DE_PCU_EVENT); 4228 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); 4229 4230 /* Go back to the starting frequency */ 4231 ironlake_set_drps(dev, dev_priv->ips.fstart); 4232 mdelay(1); 4233 rgvswctl |= MEMCTL_CMD_STS; 4234 I915_WRITE(MEMSWCTL, rgvswctl); 4235 mdelay(1); 4236 4237 spin_unlock_irq(&mchdev_lock); 4238} 4239 4240/* There's a funny hw issue where the hw returns all 0 when reading from 4241 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value 4242 * ourselves, instead of doing a rmw cycle (which might result in us clearing 4243 * all limits and the gpu stuck at whatever frequency it is at atm). 4244 */ 4245static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val) 4246{ 4247 u32 limits; 4248 4249 /* Only set the down limit when we've reached the lowest level to avoid 4250 * getting more interrupts, otherwise leave this clear. This prevents a 4251 * race in the hw when coming out of rc6: There's a tiny window where 4252 * the hw runs at the minimal clock before selecting the desired 4253 * frequency, if the down threshold expires in that window we will not 4254 * receive a down interrupt. */ 4255 limits = dev_priv->rps.max_freq_softlimit << 24; 4256 if (val <= dev_priv->rps.min_freq_softlimit) 4257 limits |= dev_priv->rps.min_freq_softlimit << 16; 4258 4259 return limits; 4260} 4261 4262static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) 4263{ 4264 int new_power; 4265 4266 new_power = dev_priv->rps.power; 4267 switch (dev_priv->rps.power) { 4268 case LOW_POWER: 4269 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq) 4270 new_power = BETWEEN; 4271 break; 4272 4273 case BETWEEN: 4274 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq) 4275 new_power = LOW_POWER; 4276 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq) 4277 new_power = HIGH_POWER; 4278 break; 4279 4280 case HIGH_POWER: 4281 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq) 4282 new_power = BETWEEN; 4283 break; 4284 } 4285 /* Max/min bins are special */ 4286 if (val == dev_priv->rps.min_freq_softlimit) 4287 new_power = LOW_POWER; 4288 if (val == dev_priv->rps.max_freq_softlimit) 4289 new_power = HIGH_POWER; 4290 if (new_power == dev_priv->rps.power) 4291 return; 4292 4293 /* Note the units here are not exactly 1us, but 1280ns. */ 4294 switch (new_power) { 4295 case LOW_POWER: 4296 /* Upclock if more than 95% busy over 16ms */ 4297 I915_WRITE(GEN6_RP_UP_EI, 12500); 4298 I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800); 4299 4300 /* Downclock if less than 85% busy over 32ms */ 4301 I915_WRITE(GEN6_RP_DOWN_EI, 25000); 4302 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250); 4303 4304 I915_WRITE(GEN6_RP_CONTROL, 4305 GEN6_RP_MEDIA_TURBO | 4306 GEN6_RP_MEDIA_HW_NORMAL_MODE | 4307 GEN6_RP_MEDIA_IS_GFX | 4308 GEN6_RP_ENABLE | 4309 GEN6_RP_UP_BUSY_AVG | 4310 GEN6_RP_DOWN_IDLE_AVG); 4311 break; 4312 4313 case BETWEEN: 4314 /* Upclock if more than 90% busy over 13ms */ 4315 I915_WRITE(GEN6_RP_UP_EI, 10250); 4316 I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225); 4317 4318 /* Downclock if less than 75% busy over 32ms */ 4319 I915_WRITE(GEN6_RP_DOWN_EI, 25000); 4320 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750); 4321 4322 I915_WRITE(GEN6_RP_CONTROL, 4323 GEN6_RP_MEDIA_TURBO | 4324 GEN6_RP_MEDIA_HW_NORMAL_MODE | 4325 GEN6_RP_MEDIA_IS_GFX | 4326 GEN6_RP_ENABLE | 4327 GEN6_RP_UP_BUSY_AVG | 4328 GEN6_RP_DOWN_IDLE_AVG); 4329 break; 4330 4331 case HIGH_POWER: 4332 /* Upclock if more than 85% busy over 10ms */ 4333 I915_WRITE(GEN6_RP_UP_EI, 8000); 4334 I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800); 4335 4336 /* Downclock if less than 60% busy over 32ms */ 4337 I915_WRITE(GEN6_RP_DOWN_EI, 25000); 4338 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000); 4339 4340 I915_WRITE(GEN6_RP_CONTROL, 4341 GEN6_RP_MEDIA_TURBO | 4342 GEN6_RP_MEDIA_HW_NORMAL_MODE | 4343 GEN6_RP_MEDIA_IS_GFX | 4344 GEN6_RP_ENABLE | 4345 GEN6_RP_UP_BUSY_AVG | 4346 GEN6_RP_DOWN_IDLE_AVG); 4347 break; 4348 } 4349 4350 dev_priv->rps.power = new_power; 4351 dev_priv->rps.last_adj = 0; 4352} 4353 4354static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) 4355{ 4356 u32 mask = 0; 4357 4358 if (val > dev_priv->rps.min_freq_softlimit) 4359 mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; 4360 if (val < dev_priv->rps.max_freq_softlimit) 4361 mask |= GEN6_PM_RP_UP_THRESHOLD; 4362 4363 mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED); 4364 mask &= dev_priv->pm_rps_events; 4365 4366 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); 4367} 4368 4369/* gen6_set_rps is called to update the frequency request, but should also be 4370 * called when the range (min_delay and max_delay) is modified so that we can 4371 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ 4372void gen6_set_rps(struct drm_device *dev, u8 val) 4373{ 4374 struct drm_i915_private *dev_priv = dev->dev_private; 4375 4376 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4377 WARN_ON(val > dev_priv->rps.max_freq_softlimit); 4378 WARN_ON(val < dev_priv->rps.min_freq_softlimit); 4379 4380 /* min/max delay may still have been modified so be sure to 4381 * write the limits value. 4382 */ 4383 if (val != dev_priv->rps.cur_freq) { 4384 gen6_set_rps_thresholds(dev_priv, val); 4385 4386 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4387 I915_WRITE(GEN6_RPNSWREQ, 4388 HSW_FREQUENCY(val)); 4389 else 4390 I915_WRITE(GEN6_RPNSWREQ, 4391 GEN6_FREQUENCY(val) | 4392 GEN6_OFFSET(0) | 4393 GEN6_AGGRESSIVE_TURBO); 4394 } 4395 4396 /* Make sure we continue to get interrupts 4397 * until we hit the minimum or maximum frequencies. 4398 */ 4399 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, gen6_rps_limits(dev_priv, val)); 4400 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4401 4402 POSTING_READ(GEN6_RPNSWREQ); 4403 4404 dev_priv->rps.cur_freq = val; 4405 trace_intel_gpu_freq_change(val * 50); 4406} 4407 4408/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down 4409 * 4410 * * If Gfx is Idle, then 4411 * 1. Mask Turbo interrupts 4412 * 2. Bring up Gfx clock 4413 * 3. Change the freq to Rpn and wait till P-Unit updates freq 4414 * 4. Clear the Force GFX CLK ON bit so that Gfx can down 4415 * 5. Unmask Turbo interrupts 4416*/ 4417static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) 4418{ 4419 struct drm_device *dev = dev_priv->dev; 4420 4421 /* Latest VLV doesn't need to force the gfx clock */ 4422 if (dev->pdev->revision >= 0xd) { 4423 valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); 4424 return; 4425 } 4426 4427 /* 4428 * When we are idle. Drop to min voltage state. 4429 */ 4430 4431 if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit) 4432 return; 4433 4434 /* Mask turbo interrupt so that they will not come in between */ 4435 I915_WRITE(GEN6_PMINTRMSK, 4436 gen6_sanitize_rps_pm_mask(dev_priv, ~0)); 4437 4438 vlv_force_gfx_clock(dev_priv, true); 4439 4440 dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit; 4441 4442 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, 4443 dev_priv->rps.min_freq_softlimit); 4444 4445 if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS)) 4446 & GENFREQSTATUS) == 0, 100)) 4447 DRM_ERROR("timed out waiting for Punit\n"); 4448 4449 vlv_force_gfx_clock(dev_priv, false); 4450 4451 I915_WRITE(GEN6_PMINTRMSK, 4452 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); 4453} 4454 4455void gen6_rps_idle(struct drm_i915_private *dev_priv) 4456{ 4457 struct drm_device *dev = dev_priv->dev; 4458 4459 mutex_lock(&dev_priv->rps.hw_lock); 4460 if (dev_priv->rps.enabled) { 4461 if (IS_CHERRYVIEW(dev)) 4462 valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); 4463 else if (IS_VALLEYVIEW(dev)) 4464 vlv_set_rps_idle(dev_priv); 4465 else 4466 gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); 4467 dev_priv->rps.last_adj = 0; 4468 } 4469 mutex_unlock(&dev_priv->rps.hw_lock); 4470} 4471 4472void gen6_rps_boost(struct drm_i915_private *dev_priv) 4473{ 4474 struct drm_device *dev = dev_priv->dev; 4475 4476 mutex_lock(&dev_priv->rps.hw_lock); 4477 if (dev_priv->rps.enabled) { 4478 if (IS_VALLEYVIEW(dev)) 4479 valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); 4480 else 4481 gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); 4482 dev_priv->rps.last_adj = 0; 4483 } 4484 mutex_unlock(&dev_priv->rps.hw_lock); 4485} 4486 4487void valleyview_set_rps(struct drm_device *dev, u8 val) 4488{ 4489 struct drm_i915_private *dev_priv = dev->dev_private; 4490 4491 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4492 WARN_ON(val > dev_priv->rps.max_freq_softlimit); 4493 WARN_ON(val < dev_priv->rps.min_freq_softlimit); 4494 4495 if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), 4496 "Odd GPU freq value\n")) 4497 val &= ~1; 4498 4499 if (val != dev_priv->rps.cur_freq) 4500 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); 4501 4502 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4503 4504 dev_priv->rps.cur_freq = val; 4505 trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv, val)); 4506} 4507 4508static void gen9_disable_rps(struct drm_device *dev) 4509{ 4510 struct drm_i915_private *dev_priv = dev->dev_private; 4511 4512 I915_WRITE(GEN6_RC_CONTROL, 0); 4513} 4514 4515static void gen6_disable_rps(struct drm_device *dev) 4516{ 4517 struct drm_i915_private *dev_priv = dev->dev_private; 4518 4519 I915_WRITE(GEN6_RC_CONTROL, 0); 4520 I915_WRITE(GEN6_RPNSWREQ, 1 << 31); 4521} 4522 4523static void cherryview_disable_rps(struct drm_device *dev) 4524{ 4525 struct drm_i915_private *dev_priv = dev->dev_private; 4526 4527 I915_WRITE(GEN6_RC_CONTROL, 0); 4528} 4529 4530static void valleyview_disable_rps(struct drm_device *dev) 4531{ 4532 struct drm_i915_private *dev_priv = dev->dev_private; 4533 4534 /* we're doing forcewake before Disabling RC6, 4535 * This what the BIOS expects when going into suspend */ 4536 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); 4537 4538 I915_WRITE(GEN6_RC_CONTROL, 0); 4539 4540 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); 4541} 4542 4543static void intel_print_rc6_info(struct drm_device *dev, u32 mode) 4544{ 4545 if (IS_VALLEYVIEW(dev)) { 4546 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) 4547 mode = GEN6_RC_CTL_RC6_ENABLE; 4548 else 4549 mode = 0; 4550 } 4551 if (HAS_RC6p(dev)) 4552 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n", 4553 (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off", 4554 (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off", 4555 (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off"); 4556 4557 else 4558 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n", 4559 (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off"); 4560} 4561 4562static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) 4563{ 4564 /* No RC6 before Ironlake */ 4565 if (INTEL_INFO(dev)->gen < 5) 4566 return 0; 4567 4568 /* RC6 is only on Ironlake mobile not on desktop */ 4569 if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev)) 4570 return 0; 4571 4572 /* Respect the kernel parameter if it is set */ 4573 if (enable_rc6 >= 0) { 4574 int mask; 4575 4576 if (HAS_RC6p(dev)) 4577 mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | 4578 INTEL_RC6pp_ENABLE; 4579 else 4580 mask = INTEL_RC6_ENABLE; 4581 4582 if ((enable_rc6 & mask) != enable_rc6) 4583 DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n", 4584 enable_rc6 & mask, enable_rc6, mask); 4585 4586 return enable_rc6 & mask; 4587 } 4588 4589 /* Disable RC6 on Ironlake */ 4590 if (INTEL_INFO(dev)->gen == 5) 4591 return 0; 4592 4593 if (IS_IVYBRIDGE(dev)) 4594 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); 4595 4596 return INTEL_RC6_ENABLE; 4597} 4598 4599int intel_enable_rc6(const struct drm_device *dev) 4600{ 4601 return i915.enable_rc6; 4602} 4603 4604static void gen6_init_rps_frequencies(struct drm_device *dev) 4605{ 4606 struct drm_i915_private *dev_priv = dev->dev_private; 4607 uint32_t rp_state_cap; 4608 u32 ddcc_status = 0; 4609 int ret; 4610 4611 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); 4612 /* All of these values are in units of 50MHz */ 4613 dev_priv->rps.cur_freq = 0; 4614 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 4615 dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; 4616 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 4617 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; 4618 /* hw_max = RP0 until we check for overclocking */ 4619 dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; 4620 4621 dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; 4622 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { 4623 ret = sandybridge_pcode_read(dev_priv, 4624 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 4625 &ddcc_status); 4626 if (0 == ret) 4627 dev_priv->rps.efficient_freq = 4628 (ddcc_status >> 8) & 0xff; 4629 } 4630 4631 /* Preserve min/max settings in case of re-init */ 4632 if (dev_priv->rps.max_freq_softlimit == 0) 4633 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 4634 4635 if (dev_priv->rps.min_freq_softlimit == 0) { 4636 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4637 dev_priv->rps.min_freq_softlimit = 4638 /* max(RPe, 450 MHz) */ 4639 max(dev_priv->rps.efficient_freq, (u8) 9); 4640 else 4641 dev_priv->rps.min_freq_softlimit = 4642 dev_priv->rps.min_freq; 4643 } 4644} 4645 4646static void gen9_enable_rps(struct drm_device *dev) 4647{ 4648 struct drm_i915_private *dev_priv = dev->dev_private; 4649 struct intel_engine_cs *ring; 4650 uint32_t rc6_mask = 0; 4651 int unused; 4652 4653 /* 1a: Software RC state - RC0 */ 4654 I915_WRITE(GEN6_RC_STATE, 0); 4655 4656 /* 1b: Get forcewake during program sequence. Although the driver 4657 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 4658 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); 4659 4660 /* 2a: Disable RC states. */ 4661 I915_WRITE(GEN6_RC_CONTROL, 0); 4662 4663 /* 2b: Program RC6 thresholds.*/ 4664 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); 4665 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 4666 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 4667 for_each_ring(ring, dev_priv, unused) 4668 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); 4669 I915_WRITE(GEN6_RC_SLEEP, 0); 4670 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ 4671 4672 /* 3a: Enable RC6 */ 4673 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 4674 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 4675 DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 4676 "on" : "off"); 4677 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4678 GEN6_RC_CTL_EI_MODE(1) | 4679 rc6_mask); 4680 4681 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); 4682 4683} 4684 4685static void gen8_enable_rps(struct drm_device *dev) 4686{ 4687 struct drm_i915_private *dev_priv = dev->dev_private; 4688 struct intel_engine_cs *ring; 4689 uint32_t rc6_mask = 0; 4690 int unused; 4691 4692 /* 1a: Software RC state - RC0 */ 4693 I915_WRITE(GEN6_RC_STATE, 0); 4694 4695 /* 1c & 1d: Get forcewake during program sequence. Although the driver 4696 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 4697 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); 4698 4699 /* 2a: Disable RC states. */ 4700 I915_WRITE(GEN6_RC_CONTROL, 0); 4701 4702 /* Initialize rps frequencies */ 4703 gen6_init_rps_frequencies(dev); 4704 4705 /* 2b: Program RC6 thresholds.*/ 4706 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 4707 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 4708 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 4709 for_each_ring(ring, dev_priv, unused) 4710 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); 4711 I915_WRITE(GEN6_RC_SLEEP, 0); 4712 if (IS_BROADWELL(dev)) 4713 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 4714 else 4715 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ 4716 4717 /* 3: Enable RC6 */ 4718 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 4719 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 4720 intel_print_rc6_info(dev, rc6_mask); 4721 if (IS_BROADWELL(dev)) 4722 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4723 GEN7_RC_CTL_TO_MODE | 4724 rc6_mask); 4725 else 4726 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4727 GEN6_RC_CTL_EI_MODE(1) | 4728 rc6_mask); 4729 4730 /* 4 Program defaults and thresholds for RPS*/ 4731 I915_WRITE(GEN6_RPNSWREQ, 4732 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 4733 I915_WRITE(GEN6_RC_VIDEO_FREQ, 4734 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 4735 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ 4736 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ 4737 4738 /* Docs recommend 900MHz, and 300 MHz respectively */ 4739 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 4740 dev_priv->rps.max_freq_softlimit << 24 | 4741 dev_priv->rps.min_freq_softlimit << 16); 4742 4743 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ 4744 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ 4745 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ 4746 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ 4747 4748 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 4749 4750 /* 5: Enable RPS */ 4751 I915_WRITE(GEN6_RP_CONTROL, 4752 GEN6_RP_MEDIA_TURBO | 4753 GEN6_RP_MEDIA_HW_NORMAL_MODE | 4754 GEN6_RP_MEDIA_IS_GFX | 4755 GEN6_RP_ENABLE | 4756 GEN6_RP_UP_BUSY_AVG | 4757 GEN6_RP_DOWN_IDLE_AVG); 4758 4759 /* 6: Ring frequency + overclocking (our driver does this later */ 4760 4761 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 4762 gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); 4763 4764 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); 4765} 4766 4767static void gen6_enable_rps(struct drm_device *dev) 4768{ 4769 struct drm_i915_private *dev_priv = dev->dev_private; 4770 struct intel_engine_cs *ring; 4771 u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; 4772 u32 gtfifodbg; 4773 int rc6_mode; 4774 int i, ret; 4775 4776 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4777 4778 /* Here begins a magic sequence of register writes to enable 4779 * auto-downclocking. 4780 * 4781 * Perhaps there might be some value in exposing these to 4782 * userspace... 4783 */ 4784 I915_WRITE(GEN6_RC_STATE, 0); 4785 4786 /* Clear the DBG now so we don't confuse earlier errors */ 4787 if ((gtfifodbg = I915_READ(GTFIFODBG))) { 4788 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); 4789 I915_WRITE(GTFIFODBG, gtfifodbg); 4790 } 4791 4792 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); 4793 4794 /* Initialize rps frequencies */ 4795 gen6_init_rps_frequencies(dev); 4796 4797 /* disable the counters and set deterministic thresholds */ 4798 I915_WRITE(GEN6_RC_CONTROL, 0); 4799 4800 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); 4801 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); 4802 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); 4803 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 4804 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 4805 4806 for_each_ring(ring, dev_priv, i) 4807 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); 4808 4809 I915_WRITE(GEN6_RC_SLEEP, 0); 4810 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); 4811 if (IS_IVYBRIDGE(dev)) 4812 I915_WRITE(GEN6_RC6_THRESHOLD, 125000); 4813 else 4814 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); 4815 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); 4816 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 4817 4818 /* Check if we are enabling RC6 */ 4819 rc6_mode = intel_enable_rc6(dev_priv->dev); 4820 if (rc6_mode & INTEL_RC6_ENABLE) 4821 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; 4822 4823 /* We don't use those on Haswell */ 4824 if (!IS_HASWELL(dev)) { 4825 if (rc6_mode & INTEL_RC6p_ENABLE) 4826 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; 4827 4828 if (rc6_mode & INTEL_RC6pp_ENABLE) 4829 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; 4830 } 4831 4832 intel_print_rc6_info(dev, rc6_mask); 4833 4834 I915_WRITE(GEN6_RC_CONTROL, 4835 rc6_mask | 4836 GEN6_RC_CTL_EI_MODE(1) | 4837 GEN6_RC_CTL_HW_ENABLE); 4838 4839 /* Power down if completely idle for over 50ms */ 4840 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); 4841 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 4842 4843 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0); 4844 if (ret) 4845 DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); 4846 4847 ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); 4848 if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ 4849 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", 4850 (dev_priv->rps.max_freq_softlimit & 0xff) * 50, 4851 (pcu_mbox & 0xff) * 50); 4852 dev_priv->rps.max_freq = pcu_mbox & 0xff; 4853 } 4854 4855 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 4856 gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); 4857 4858 rc6vids = 0; 4859 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 4860 if (IS_GEN6(dev) && ret) { 4861 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); 4862 } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { 4863 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", 4864 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); 4865 rc6vids &= 0xffff00; 4866 rc6vids |= GEN6_ENCODE_RC6_VID(450); 4867 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); 4868 if (ret) 4869 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); 4870 } 4871 4872 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); 4873} 4874 4875static void __gen6_update_ring_freq(struct drm_device *dev) 4876{ 4877 struct drm_i915_private *dev_priv = dev->dev_private; 4878 int min_freq = 15; 4879 unsigned int gpu_freq; 4880 unsigned int max_ia_freq, min_ring_freq; 4881 int scaling_factor = 180; 4882 struct cpufreq_policy *policy; 4883 4884 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4885 4886 policy = cpufreq_cpu_get(0); 4887 if (policy) { 4888 max_ia_freq = policy->cpuinfo.max_freq; 4889 cpufreq_cpu_put(policy); 4890 } else { 4891 /* 4892 * Default to measured freq if none found, PCU will ensure we 4893 * don't go over 4894 */ 4895 max_ia_freq = tsc_khz; 4896 } 4897 4898 /* Convert from kHz to MHz */ 4899 max_ia_freq /= 1000; 4900 4901 min_ring_freq = I915_READ(DCLK) & 0xf; 4902 /* convert DDR frequency from units of 266.6MHz to bandwidth */ 4903 min_ring_freq = mult_frac(min_ring_freq, 8, 3); 4904 4905 /* 4906 * For each potential GPU frequency, load a ring frequency we'd like 4907 * to use for memory access. We do this by specifying the IA frequency 4908 * the PCU should use as a reference to determine the ring frequency. 4909 */ 4910 for (gpu_freq = dev_priv->rps.max_freq; gpu_freq >= dev_priv->rps.min_freq; 4911 gpu_freq--) { 4912 int diff = dev_priv->rps.max_freq - gpu_freq; 4913 unsigned int ia_freq = 0, ring_freq = 0; 4914 4915 if (INTEL_INFO(dev)->gen >= 8) { 4916 /* max(2 * GT, DDR). NB: GT is 50MHz units */ 4917 ring_freq = max(min_ring_freq, gpu_freq); 4918 } else if (IS_HASWELL(dev)) { 4919 ring_freq = mult_frac(gpu_freq, 5, 4); 4920 ring_freq = max(min_ring_freq, ring_freq); 4921 /* leave ia_freq as the default, chosen by cpufreq */ 4922 } else { 4923 /* On older processors, there is no separate ring 4924 * clock domain, so in order to boost the bandwidth 4925 * of the ring, we need to upclock the CPU (ia_freq). 4926 * 4927 * For GPU frequencies less than 750MHz, 4928 * just use the lowest ring freq. 4929 */ 4930 if (gpu_freq < min_freq) 4931 ia_freq = 800; 4932 else 4933 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); 4934 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); 4935 } 4936 4937 sandybridge_pcode_write(dev_priv, 4938 GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 4939 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | 4940 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | 4941 gpu_freq); 4942 } 4943} 4944 4945void gen6_update_ring_freq(struct drm_device *dev) 4946{ 4947 struct drm_i915_private *dev_priv = dev->dev_private; 4948 4949 if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev)) 4950 return; 4951 4952 mutex_lock(&dev_priv->rps.hw_lock); 4953 __gen6_update_ring_freq(dev); 4954 mutex_unlock(&dev_priv->rps.hw_lock); 4955} 4956 4957static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) 4958{ 4959 u32 val, rp0; 4960 4961 val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); 4962 rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK; 4963 4964 return rp0; 4965} 4966 4967static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) 4968{ 4969 u32 val, rpe; 4970 4971 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); 4972 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 4973 4974 return rpe; 4975} 4976 4977static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) 4978{ 4979 u32 val, rp1; 4980 4981 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 4982 rp1 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK; 4983 4984 return rp1; 4985} 4986 4987static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv) 4988{ 4989 u32 val, rpn; 4990 4991 val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); 4992 rpn = (val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) & PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK; 4993 return rpn; 4994} 4995 4996static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) 4997{ 4998 u32 val, rp1; 4999 5000 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 5001 5002 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 5003 5004 return rp1; 5005} 5006 5007static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) 5008{ 5009 u32 val, rp0; 5010 5011 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 5012 5013 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 5014 /* Clamp to max */ 5015 rp0 = min_t(u32, rp0, 0xea); 5016 5017 return rp0; 5018} 5019 5020static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) 5021{ 5022 u32 val, rpe; 5023 5024 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 5025 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 5026 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 5027 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 5028 5029 return rpe; 5030} 5031 5032static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) 5033{ 5034 return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; 5035} 5036 5037/* Check that the pctx buffer wasn't move under us. */ 5038static void valleyview_check_pctx(struct drm_i915_private *dev_priv) 5039{ 5040 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 5041 5042 WARN_ON(pctx_addr != dev_priv->mm.stolen_base + 5043 dev_priv->vlv_pctx->stolen->start); 5044} 5045 5046 5047/* Check that the pcbr address is not empty. */ 5048static void cherryview_check_pctx(struct drm_i915_private *dev_priv) 5049{ 5050 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 5051 5052 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); 5053} 5054 5055static void cherryview_setup_pctx(struct drm_device *dev) 5056{ 5057 struct drm_i915_private *dev_priv = dev->dev_private; 5058 unsigned long pctx_paddr, paddr; 5059 struct i915_gtt *gtt = &dev_priv->gtt; 5060 u32 pcbr; 5061 int pctx_size = 32*1024; 5062 5063 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 5064 5065 pcbr = I915_READ(VLV_PCBR); 5066 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { 5067 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 5068 paddr = (dev_priv->mm.stolen_base + 5069 (gtt->stolen_size - pctx_size)); 5070 5071 pctx_paddr = (paddr & (~4095)); 5072 I915_WRITE(VLV_PCBR, pctx_paddr); 5073 } 5074 5075 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5076} 5077 5078static void valleyview_setup_pctx(struct drm_device *dev) 5079{ 5080 struct drm_i915_private *dev_priv = dev->dev_private; 5081 struct drm_i915_gem_object *pctx; 5082 unsigned long pctx_paddr; 5083 u32 pcbr; 5084 int pctx_size = 24*1024; 5085 5086 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 5087 5088 pcbr = I915_READ(VLV_PCBR); 5089 if (pcbr) { 5090 /* BIOS set it up already, grab the pre-alloc'd space */ 5091 int pcbr_offset; 5092 5093 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; 5094 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev, 5095 pcbr_offset, 5096 I915_GTT_OFFSET_NONE, 5097 pctx_size); 5098 goto out; 5099 } 5100 5101 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 5102 5103 /* 5104 * From the Gunit register HAS: 5105 * The Gfx driver is expected to program this register and ensure 5106 * proper allocation within Gfx stolen memory. For example, this 5107 * register should be programmed such than the PCBR range does not 5108 * overlap with other ranges, such as the frame buffer, protected 5109 * memory, or any other relevant ranges. 5110 */ 5111 pctx = i915_gem_object_create_stolen(dev, pctx_size); 5112 if (!pctx) { 5113 DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); 5114 return; 5115 } 5116 5117 pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; 5118 I915_WRITE(VLV_PCBR, pctx_paddr); 5119 5120out: 5121 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5122 dev_priv->vlv_pctx = pctx; 5123} 5124 5125static void valleyview_cleanup_pctx(struct drm_device *dev) 5126{ 5127 struct drm_i915_private *dev_priv = dev->dev_private; 5128 5129 if (WARN_ON(!dev_priv->vlv_pctx)) 5130 return; 5131 5132 drm_gem_object_unreference(&dev_priv->vlv_pctx->base); 5133 dev_priv->vlv_pctx = NULL; 5134} 5135 5136static void valleyview_init_gt_powersave(struct drm_device *dev) 5137{ 5138 struct drm_i915_private *dev_priv = dev->dev_private; 5139 u32 val; 5140 5141 valleyview_setup_pctx(dev); 5142 5143 mutex_lock(&dev_priv->rps.hw_lock); 5144 5145 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5146 switch ((val >> 6) & 3) { 5147 case 0: 5148 case 1: 5149 dev_priv->mem_freq = 800; 5150 break; 5151 case 2: 5152 dev_priv->mem_freq = 1066; 5153 break; 5154 case 3: 5155 dev_priv->mem_freq = 1333; 5156 break; 5157 } 5158 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 5159 5160 dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); 5161 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 5162 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 5163 vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq), 5164 dev_priv->rps.max_freq); 5165 5166 dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); 5167 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 5168 vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5169 dev_priv->rps.efficient_freq); 5170 5171 dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); 5172 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 5173 vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 5174 dev_priv->rps.rp1_freq); 5175 5176 dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); 5177 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5178 vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5179 dev_priv->rps.min_freq); 5180 5181 /* Preserve min/max settings in case of re-init */ 5182 if (dev_priv->rps.max_freq_softlimit == 0) 5183 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 5184 5185 if (dev_priv->rps.min_freq_softlimit == 0) 5186 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 5187 5188 mutex_unlock(&dev_priv->rps.hw_lock); 5189} 5190 5191static void cherryview_init_gt_powersave(struct drm_device *dev) 5192{ 5193 struct drm_i915_private *dev_priv = dev->dev_private; 5194 u32 val; 5195 5196 cherryview_setup_pctx(dev); 5197 5198 mutex_lock(&dev_priv->rps.hw_lock); 5199 5200 mutex_lock(&dev_priv->dpio_lock); 5201 val = vlv_cck_read(dev_priv, CCK_FUSE_REG); 5202 mutex_unlock(&dev_priv->dpio_lock); 5203 5204 switch ((val >> 2) & 0x7) { 5205 case 0: 5206 case 1: 5207 dev_priv->rps.cz_freq = 200; 5208 dev_priv->mem_freq = 1600; 5209 break; 5210 case 2: 5211 dev_priv->rps.cz_freq = 267; 5212 dev_priv->mem_freq = 1600; 5213 break; 5214 case 3: 5215 dev_priv->rps.cz_freq = 333; 5216 dev_priv->mem_freq = 2000; 5217 break; 5218 case 4: 5219 dev_priv->rps.cz_freq = 320; 5220 dev_priv->mem_freq = 1600; 5221 break; 5222 case 5: 5223 dev_priv->rps.cz_freq = 400; 5224 dev_priv->mem_freq = 1600; 5225 break; 5226 } 5227 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 5228 5229 dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); 5230 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 5231 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 5232 vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq), 5233 dev_priv->rps.max_freq); 5234 5235 dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); 5236 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 5237 vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5238 dev_priv->rps.efficient_freq); 5239 5240 dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); 5241 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", 5242 vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 5243 dev_priv->rps.rp1_freq); 5244 5245 dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); 5246 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5247 vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5248 dev_priv->rps.min_freq); 5249 5250 WARN_ONCE((dev_priv->rps.max_freq | 5251 dev_priv->rps.efficient_freq | 5252 dev_priv->rps.rp1_freq | 5253 dev_priv->rps.min_freq) & 1, 5254 "Odd GPU freq values\n"); 5255 5256 /* Preserve min/max settings in case of re-init */ 5257 if (dev_priv->rps.max_freq_softlimit == 0) 5258 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 5259 5260 if (dev_priv->rps.min_freq_softlimit == 0) 5261 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 5262 5263 mutex_unlock(&dev_priv->rps.hw_lock); 5264} 5265 5266static void valleyview_cleanup_gt_powersave(struct drm_device *dev) 5267{ 5268 valleyview_cleanup_pctx(dev); 5269} 5270 5271static void cherryview_enable_rps(struct drm_device *dev) 5272{ 5273 struct drm_i915_private *dev_priv = dev->dev_private; 5274 struct intel_engine_cs *ring; 5275 u32 gtfifodbg, val, rc6_mode = 0, pcbr; 5276 int i; 5277 5278 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5279 5280 gtfifodbg = I915_READ(GTFIFODBG); 5281 if (gtfifodbg) { 5282 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 5283 gtfifodbg); 5284 I915_WRITE(GTFIFODBG, gtfifodbg); 5285 } 5286 5287 cherryview_check_pctx(dev_priv); 5288 5289 /* 1a & 1b: Get forcewake during program sequence. Although the driver 5290 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 5291 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); 5292 5293 /* 2a: Program RC6 thresholds.*/ 5294 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 5295 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 5296 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 5297 5298 for_each_ring(ring, dev_priv, i) 5299 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); 5300 I915_WRITE(GEN6_RC_SLEEP, 0); 5301 5302 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ 5303 5304 /* allows RC6 residency counter to work */ 5305 I915_WRITE(VLV_COUNTER_CONTROL, 5306 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 5307 VLV_MEDIA_RC6_COUNT_EN | 5308 VLV_RENDER_RC6_COUNT_EN)); 5309 5310 /* For now we assume BIOS is allocating and populating the PCBR */ 5311 pcbr = I915_READ(VLV_PCBR); 5312 5313 /* 3: Enable RC6 */ 5314 if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) && 5315 (pcbr >> VLV_PCBR_ADDR_SHIFT)) 5316 rc6_mode = GEN6_RC_CTL_EI_MODE(1); 5317 5318 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 5319 5320 /* 4 Program defaults and thresholds for RPS*/ 5321 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 5322 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 5323 I915_WRITE(GEN6_RP_UP_EI, 66000); 5324 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 5325 5326 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5327 5328 /* WaDisablePwrmtrEvent:chv (pre-production hw) */ 5329 I915_WRITE(0xA80C, I915_READ(0xA80C) & 0x00ffffff); 5330 I915_WRITE(0xA810, I915_READ(0xA810) & 0xffffff00); 5331 5332 /* 5: Enable RPS */ 5333 I915_WRITE(GEN6_RP_CONTROL, 5334 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5335 GEN6_RP_MEDIA_IS_GFX | /* WaSetMaskForGfxBusyness:chv (pre-production hw ?) */ 5336 GEN6_RP_ENABLE | 5337 GEN6_RP_UP_BUSY_AVG | 5338 GEN6_RP_DOWN_IDLE_AVG); 5339 5340 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5341 5342 /* RPS code assumes GPLL is used */ 5343 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 5344 5345 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no"); 5346 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 5347 5348 dev_priv->rps.cur_freq = (val >> 8) & 0xff; 5349 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", 5350 vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), 5351 dev_priv->rps.cur_freq); 5352 5353 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", 5354 vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5355 dev_priv->rps.efficient_freq); 5356 5357 valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); 5358 5359 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); 5360} 5361 5362static void valleyview_enable_rps(struct drm_device *dev) 5363{ 5364 struct drm_i915_private *dev_priv = dev->dev_private; 5365 struct intel_engine_cs *ring; 5366 u32 gtfifodbg, val, rc6_mode = 0; 5367 int i; 5368 5369 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5370 5371 valleyview_check_pctx(dev_priv); 5372 5373 if ((gtfifodbg = I915_READ(GTFIFODBG))) { 5374 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 5375 gtfifodbg); 5376 I915_WRITE(GTFIFODBG, gtfifodbg); 5377 } 5378 5379 /* If VLV, Forcewake all wells, else re-direct to regular path */ 5380 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); 5381 5382 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 5383 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 5384 I915_WRITE(GEN6_RP_UP_EI, 66000); 5385 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 5386 5387 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5388 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240); 5389 5390 I915_WRITE(GEN6_RP_CONTROL, 5391 GEN6_RP_MEDIA_TURBO | 5392 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5393 GEN6_RP_MEDIA_IS_GFX | 5394 GEN6_RP_ENABLE | 5395 GEN6_RP_UP_BUSY_AVG | 5396 GEN6_RP_DOWN_IDLE_CONT); 5397 5398 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 5399 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 5400 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 5401 5402 for_each_ring(ring, dev_priv, i) 5403 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); 5404 5405 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); 5406 5407 /* allows RC6 residency counter to work */ 5408 I915_WRITE(VLV_COUNTER_CONTROL, 5409 _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | 5410 VLV_RENDER_RC0_COUNT_EN | 5411 VLV_MEDIA_RC6_COUNT_EN | 5412 VLV_RENDER_RC6_COUNT_EN)); 5413 5414 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 5415 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 5416 5417 intel_print_rc6_info(dev, rc6_mode); 5418 5419 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 5420 5421 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5422 5423 /* RPS code assumes GPLL is used */ 5424 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 5425 5426 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no"); 5427 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 5428 5429 dev_priv->rps.cur_freq = (val >> 8) & 0xff; 5430 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", 5431 vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), 5432 dev_priv->rps.cur_freq); 5433 5434 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", 5435 vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5436 dev_priv->rps.efficient_freq); 5437 5438 valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); 5439 5440 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); 5441} 5442 5443void ironlake_teardown_rc6(struct drm_device *dev) 5444{ 5445 struct drm_i915_private *dev_priv = dev->dev_private; 5446 5447 if (dev_priv->ips.renderctx) { 5448 i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx); 5449 drm_gem_object_unreference(&dev_priv->ips.renderctx->base); 5450 dev_priv->ips.renderctx = NULL; 5451 } 5452 5453 if (dev_priv->ips.pwrctx) { 5454 i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx); 5455 drm_gem_object_unreference(&dev_priv->ips.pwrctx->base); 5456 dev_priv->ips.pwrctx = NULL; 5457 } 5458} 5459 5460static void ironlake_disable_rc6(struct drm_device *dev) 5461{ 5462 struct drm_i915_private *dev_priv = dev->dev_private; 5463 5464 if (I915_READ(PWRCTXA)) { 5465 /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */ 5466 I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT); 5467 wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON), 5468 50); 5469 5470 I915_WRITE(PWRCTXA, 0); 5471 POSTING_READ(PWRCTXA); 5472 5473 I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); 5474 POSTING_READ(RSTDBYCTL); 5475 } 5476} 5477 5478static int ironlake_setup_rc6(struct drm_device *dev) 5479{ 5480 struct drm_i915_private *dev_priv = dev->dev_private; 5481 5482 if (dev_priv->ips.renderctx == NULL) 5483 dev_priv->ips.renderctx = intel_alloc_context_page(dev); 5484 if (!dev_priv->ips.renderctx) 5485 return -ENOMEM; 5486 5487 if (dev_priv->ips.pwrctx == NULL) 5488 dev_priv->ips.pwrctx = intel_alloc_context_page(dev); 5489 if (!dev_priv->ips.pwrctx) { 5490 ironlake_teardown_rc6(dev); 5491 return -ENOMEM; 5492 } 5493 5494 return 0; 5495} 5496 5497static void ironlake_enable_rc6(struct drm_device *dev) 5498{ 5499 struct drm_i915_private *dev_priv = dev->dev_private; 5500 struct intel_engine_cs *ring = &dev_priv->ring[RCS]; 5501 bool was_interruptible; 5502 int ret; 5503 5504 /* rc6 disabled by default due to repeated reports of hanging during 5505 * boot and resume. 5506 */ 5507 if (!intel_enable_rc6(dev)) 5508 return; 5509 5510 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 5511 5512 ret = ironlake_setup_rc6(dev); 5513 if (ret) 5514 return; 5515 5516 was_interruptible = dev_priv->mm.interruptible; 5517 dev_priv->mm.interruptible = false; 5518 5519 /* 5520 * GPU can automatically power down the render unit if given a page 5521 * to save state. 5522 */ 5523 ret = intel_ring_begin(ring, 6); 5524 if (ret) { 5525 ironlake_teardown_rc6(dev); 5526 dev_priv->mm.interruptible = was_interruptible; 5527 return; 5528 } 5529 5530 intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); 5531 intel_ring_emit(ring, MI_SET_CONTEXT); 5532 intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) | 5533 MI_MM_SPACE_GTT | 5534 MI_SAVE_EXT_STATE_EN | 5535 MI_RESTORE_EXT_STATE_EN | 5536 MI_RESTORE_INHIBIT); 5537 intel_ring_emit(ring, MI_SUSPEND_FLUSH); 5538 intel_ring_emit(ring, MI_NOOP); 5539 intel_ring_emit(ring, MI_FLUSH); 5540 intel_ring_advance(ring); 5541 5542 /* 5543 * Wait for the command parser to advance past MI_SET_CONTEXT. The HW 5544 * does an implicit flush, combined with MI_FLUSH above, it should be 5545 * safe to assume that renderctx is valid 5546 */ 5547 ret = intel_ring_idle(ring); 5548 dev_priv->mm.interruptible = was_interruptible; 5549 if (ret) { 5550 DRM_ERROR("failed to enable ironlake power savings\n"); 5551 ironlake_teardown_rc6(dev); 5552 return; 5553 } 5554 5555 I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN); 5556 I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); 5557 5558 intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE); 5559} 5560 5561static unsigned long intel_pxfreq(u32 vidfreq) 5562{ 5563 unsigned long freq; 5564 int div = (vidfreq & 0x3f0000) >> 16; 5565 int post = (vidfreq & 0x3000) >> 12; 5566 int pre = (vidfreq & 0x7); 5567 5568 if (!pre) 5569 return 0; 5570 5571 freq = ((div * 133333) / ((1<<post) * pre)); 5572 5573 return freq; 5574} 5575 5576static const struct cparams { 5577 u16 i; 5578 u16 t; 5579 u16 m; 5580 u16 c; 5581} cparams[] = { 5582 { 1, 1333, 301, 28664 }, 5583 { 1, 1066, 294, 24460 }, 5584 { 1, 800, 294, 25192 }, 5585 { 0, 1333, 276, 27605 }, 5586 { 0, 1066, 276, 27605 }, 5587 { 0, 800, 231, 23784 }, 5588}; 5589 5590static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) 5591{ 5592 u64 total_count, diff, ret; 5593 u32 count1, count2, count3, m = 0, c = 0; 5594 unsigned long now = jiffies_to_msecs(jiffies), diff1; 5595 int i; 5596 5597 assert_spin_locked(&mchdev_lock); 5598 5599 diff1 = now - dev_priv->ips.last_time1; 5600 5601 /* Prevent division-by-zero if we are asking too fast. 5602 * Also, we don't get interesting results if we are polling 5603 * faster than once in 10ms, so just return the saved value 5604 * in such cases. 5605 */ 5606 if (diff1 <= 10) 5607 return dev_priv->ips.chipset_power; 5608 5609 count1 = I915_READ(DMIEC); 5610 count2 = I915_READ(DDREC); 5611 count3 = I915_READ(CSIEC); 5612 5613 total_count = count1 + count2 + count3; 5614 5615 /* FIXME: handle per-counter overflow */ 5616 if (total_count < dev_priv->ips.last_count1) { 5617 diff = ~0UL - dev_priv->ips.last_count1; 5618 diff += total_count; 5619 } else { 5620 diff = total_count - dev_priv->ips.last_count1; 5621 } 5622 5623 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 5624 if (cparams[i].i == dev_priv->ips.c_m && 5625 cparams[i].t == dev_priv->ips.r_t) { 5626 m = cparams[i].m; 5627 c = cparams[i].c; 5628 break; 5629 } 5630 } 5631 5632 diff = div_u64(diff, diff1); 5633 ret = ((m * diff) + c); 5634 ret = div_u64(ret, 10); 5635 5636 dev_priv->ips.last_count1 = total_count; 5637 dev_priv->ips.last_time1 = now; 5638 5639 dev_priv->ips.chipset_power = ret; 5640 5641 return ret; 5642} 5643 5644unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) 5645{ 5646 struct drm_device *dev = dev_priv->dev; 5647 unsigned long val; 5648 5649 if (INTEL_INFO(dev)->gen != 5) 5650 return 0; 5651 5652 spin_lock_irq(&mchdev_lock); 5653 5654 val = __i915_chipset_val(dev_priv); 5655 5656 spin_unlock_irq(&mchdev_lock); 5657 5658 return val; 5659} 5660 5661unsigned long i915_mch_val(struct drm_i915_private *dev_priv) 5662{ 5663 unsigned long m, x, b; 5664 u32 tsfs; 5665 5666 tsfs = I915_READ(TSFS); 5667 5668 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); 5669 x = I915_READ8(TR1); 5670 5671 b = tsfs & TSFS_INTR_MASK; 5672 5673 return ((m * x) / 127) - b; 5674} 5675 5676static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) 5677{ 5678 struct drm_device *dev = dev_priv->dev; 5679 static const struct v_table { 5680 u16 vd; /* in .1 mil */ 5681 u16 vm; /* in .1 mil */ 5682 } v_table[] = { 5683 { 0, 0, }, 5684 { 375, 0, }, 5685 { 500, 0, }, 5686 { 625, 0, }, 5687 { 750, 0, }, 5688 { 875, 0, }, 5689 { 1000, 0, }, 5690 { 1125, 0, }, 5691 { 4125, 3000, }, 5692 { 4125, 3000, }, 5693 { 4125, 3000, }, 5694 { 4125, 3000, }, 5695 { 4125, 3000, }, 5696 { 4125, 3000, }, 5697 { 4125, 3000, }, 5698 { 4125, 3000, }, 5699 { 4125, 3000, }, 5700 { 4125, 3000, }, 5701 { 4125, 3000, }, 5702 { 4125, 3000, }, 5703 { 4125, 3000, }, 5704 { 4125, 3000, }, 5705 { 4125, 3000, }, 5706 { 4125, 3000, }, 5707 { 4125, 3000, }, 5708 { 4125, 3000, }, 5709 { 4125, 3000, }, 5710 { 4125, 3000, }, 5711 { 4125, 3000, }, 5712 { 4125, 3000, }, 5713 { 4125, 3000, }, 5714 { 4125, 3000, }, 5715 { 4250, 3125, }, 5716 { 4375, 3250, }, 5717 { 4500, 3375, }, 5718 { 4625, 3500, }, 5719 { 4750, 3625, }, 5720 { 4875, 3750, }, 5721 { 5000, 3875, }, 5722 { 5125, 4000, }, 5723 { 5250, 4125, }, 5724 { 5375, 4250, }, 5725 { 5500, 4375, }, 5726 { 5625, 4500, }, 5727 { 5750, 4625, }, 5728 { 5875, 4750, }, 5729 { 6000, 4875, }, 5730 { 6125, 5000, }, 5731 { 6250, 5125, }, 5732 { 6375, 5250, }, 5733 { 6500, 5375, }, 5734 { 6625, 5500, }, 5735 { 6750, 5625, }, 5736 { 6875, 5750, }, 5737 { 7000, 5875, }, 5738 { 7125, 6000, }, 5739 { 7250, 6125, }, 5740 { 7375, 6250, }, 5741 { 7500, 6375, }, 5742 { 7625, 6500, }, 5743 { 7750, 6625, }, 5744 { 7875, 6750, }, 5745 { 8000, 6875, }, 5746 { 8125, 7000, }, 5747 { 8250, 7125, }, 5748 { 8375, 7250, }, 5749 { 8500, 7375, }, 5750 { 8625, 7500, }, 5751 { 8750, 7625, }, 5752 { 8875, 7750, }, 5753 { 9000, 7875, }, 5754 { 9125, 8000, }, 5755 { 9250, 8125, }, 5756 { 9375, 8250, }, 5757 { 9500, 8375, }, 5758 { 9625, 8500, }, 5759 { 9750, 8625, }, 5760 { 9875, 8750, }, 5761 { 10000, 8875, }, 5762 { 10125, 9000, }, 5763 { 10250, 9125, }, 5764 { 10375, 9250, }, 5765 { 10500, 9375, }, 5766 { 10625, 9500, }, 5767 { 10750, 9625, }, 5768 { 10875, 9750, }, 5769 { 11000, 9875, }, 5770 { 11125, 10000, }, 5771 { 11250, 10125, }, 5772 { 11375, 10250, }, 5773 { 11500, 10375, }, 5774 { 11625, 10500, }, 5775 { 11750, 10625, }, 5776 { 11875, 10750, }, 5777 { 12000, 10875, }, 5778 { 12125, 11000, }, 5779 { 12250, 11125, }, 5780 { 12375, 11250, }, 5781 { 12500, 11375, }, 5782 { 12625, 11500, }, 5783 { 12750, 11625, }, 5784 { 12875, 11750, }, 5785 { 13000, 11875, }, 5786 { 13125, 12000, }, 5787 { 13250, 12125, }, 5788 { 13375, 12250, }, 5789 { 13500, 12375, }, 5790 { 13625, 12500, }, 5791 { 13750, 12625, }, 5792 { 13875, 12750, }, 5793 { 14000, 12875, }, 5794 { 14125, 13000, }, 5795 { 14250, 13125, }, 5796 { 14375, 13250, }, 5797 { 14500, 13375, }, 5798 { 14625, 13500, }, 5799 { 14750, 13625, }, 5800 { 14875, 13750, }, 5801 { 15000, 13875, }, 5802 { 15125, 14000, }, 5803 { 15250, 14125, }, 5804 { 15375, 14250, }, 5805 { 15500, 14375, }, 5806 { 15625, 14500, }, 5807 { 15750, 14625, }, 5808 { 15875, 14750, }, 5809 { 16000, 14875, }, 5810 { 16125, 15000, }, 5811 }; 5812 if (INTEL_INFO(dev)->is_mobile) 5813 return v_table[pxvid].vm; 5814 else 5815 return v_table[pxvid].vd; 5816} 5817 5818static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) 5819{ 5820 u64 now, diff, diffms; 5821 u32 count; 5822 5823 assert_spin_locked(&mchdev_lock); 5824 5825 now = ktime_get_raw_ns(); 5826 diffms = now - dev_priv->ips.last_time2; 5827 do_div(diffms, NSEC_PER_MSEC); 5828 5829 /* Don't divide by 0 */ 5830 if (!diffms) 5831 return; 5832 5833 count = I915_READ(GFXEC); 5834 5835 if (count < dev_priv->ips.last_count2) { 5836 diff = ~0UL - dev_priv->ips.last_count2; 5837 diff += count; 5838 } else { 5839 diff = count - dev_priv->ips.last_count2; 5840 } 5841 5842 dev_priv->ips.last_count2 = count; 5843 dev_priv->ips.last_time2 = now; 5844 5845 /* More magic constants... */ 5846 diff = diff * 1181; 5847 diff = div_u64(diff, diffms * 10); 5848 dev_priv->ips.gfx_power = diff; 5849} 5850 5851void i915_update_gfx_val(struct drm_i915_private *dev_priv) 5852{ 5853 struct drm_device *dev = dev_priv->dev; 5854 5855 if (INTEL_INFO(dev)->gen != 5) 5856 return; 5857 5858 spin_lock_irq(&mchdev_lock); 5859 5860 __i915_update_gfx_val(dev_priv); 5861 5862 spin_unlock_irq(&mchdev_lock); 5863} 5864 5865static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) 5866{ 5867 unsigned long t, corr, state1, corr2, state2; 5868 u32 pxvid, ext_v; 5869 5870 assert_spin_locked(&mchdev_lock); 5871 5872 pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4)); 5873 pxvid = (pxvid >> 24) & 0x7f; 5874 ext_v = pvid_to_extvid(dev_priv, pxvid); 5875 5876 state1 = ext_v; 5877 5878 t = i915_mch_val(dev_priv); 5879 5880 /* Revel in the empirically derived constants */ 5881 5882 /* Correction factor in 1/100000 units */ 5883 if (t > 80) 5884 corr = ((t * 2349) + 135940); 5885 else if (t >= 50) 5886 corr = ((t * 964) + 29317); 5887 else /* < 50 */ 5888 corr = ((t * 301) + 1004); 5889 5890 corr = corr * ((150142 * state1) / 10000 - 78642); 5891 corr /= 100000; 5892 corr2 = (corr * dev_priv->ips.corr); 5893 5894 state2 = (corr2 * state1) / 10000; 5895 state2 /= 100; /* convert to mW */ 5896 5897 __i915_update_gfx_val(dev_priv); 5898 5899 return dev_priv->ips.gfx_power + state2; 5900} 5901 5902unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) 5903{ 5904 struct drm_device *dev = dev_priv->dev; 5905 unsigned long val; 5906 5907 if (INTEL_INFO(dev)->gen != 5) 5908 return 0; 5909 5910 spin_lock_irq(&mchdev_lock); 5911 5912 val = __i915_gfx_val(dev_priv); 5913 5914 spin_unlock_irq(&mchdev_lock); 5915 5916 return val; 5917} 5918 5919/** 5920 * i915_read_mch_val - return value for IPS use 5921 * 5922 * Calculate and return a value for the IPS driver to use when deciding whether 5923 * we have thermal and power headroom to increase CPU or GPU power budget. 5924 */ 5925unsigned long i915_read_mch_val(void) 5926{ 5927 struct drm_i915_private *dev_priv; 5928 unsigned long chipset_val, graphics_val, ret = 0; 5929 5930 spin_lock_irq(&mchdev_lock); 5931 if (!i915_mch_dev) 5932 goto out_unlock; 5933 dev_priv = i915_mch_dev; 5934 5935 chipset_val = __i915_chipset_val(dev_priv); 5936 graphics_val = __i915_gfx_val(dev_priv); 5937 5938 ret = chipset_val + graphics_val; 5939 5940out_unlock: 5941 spin_unlock_irq(&mchdev_lock); 5942 5943 return ret; 5944} 5945EXPORT_SYMBOL_GPL(i915_read_mch_val); 5946 5947/** 5948 * i915_gpu_raise - raise GPU frequency limit 5949 * 5950 * Raise the limit; IPS indicates we have thermal headroom. 5951 */ 5952bool i915_gpu_raise(void) 5953{ 5954 struct drm_i915_private *dev_priv; 5955 bool ret = true; 5956 5957 spin_lock_irq(&mchdev_lock); 5958 if (!i915_mch_dev) { 5959 ret = false; 5960 goto out_unlock; 5961 } 5962 dev_priv = i915_mch_dev; 5963 5964 if (dev_priv->ips.max_delay > dev_priv->ips.fmax) 5965 dev_priv->ips.max_delay--; 5966 5967out_unlock: 5968 spin_unlock_irq(&mchdev_lock); 5969 5970 return ret; 5971} 5972EXPORT_SYMBOL_GPL(i915_gpu_raise); 5973 5974/** 5975 * i915_gpu_lower - lower GPU frequency limit 5976 * 5977 * IPS indicates we're close to a thermal limit, so throttle back the GPU 5978 * frequency maximum. 5979 */ 5980bool i915_gpu_lower(void) 5981{ 5982 struct drm_i915_private *dev_priv; 5983 bool ret = true; 5984 5985 spin_lock_irq(&mchdev_lock); 5986 if (!i915_mch_dev) { 5987 ret = false; 5988 goto out_unlock; 5989 } 5990 dev_priv = i915_mch_dev; 5991 5992 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) 5993 dev_priv->ips.max_delay++; 5994 5995out_unlock: 5996 spin_unlock_irq(&mchdev_lock); 5997 5998 return ret; 5999} 6000EXPORT_SYMBOL_GPL(i915_gpu_lower); 6001 6002/** 6003 * i915_gpu_busy - indicate GPU business to IPS 6004 * 6005 * Tell the IPS driver whether or not the GPU is busy. 6006 */ 6007bool i915_gpu_busy(void) 6008{ 6009 struct drm_i915_private *dev_priv; 6010 struct intel_engine_cs *ring; 6011 bool ret = false; 6012 int i; 6013 6014 spin_lock_irq(&mchdev_lock); 6015 if (!i915_mch_dev) 6016 goto out_unlock; 6017 dev_priv = i915_mch_dev; 6018 6019 for_each_ring(ring, dev_priv, i) 6020 ret |= !list_empty(&ring->request_list); 6021 6022out_unlock: 6023 spin_unlock_irq(&mchdev_lock); 6024 6025 return ret; 6026} 6027EXPORT_SYMBOL_GPL(i915_gpu_busy); 6028 6029/** 6030 * i915_gpu_turbo_disable - disable graphics turbo 6031 * 6032 * Disable graphics turbo by resetting the max frequency and setting the 6033 * current frequency to the default. 6034 */ 6035bool i915_gpu_turbo_disable(void) 6036{ 6037 struct drm_i915_private *dev_priv; 6038 bool ret = true; 6039 6040 spin_lock_irq(&mchdev_lock); 6041 if (!i915_mch_dev) { 6042 ret = false; 6043 goto out_unlock; 6044 } 6045 dev_priv = i915_mch_dev; 6046 6047 dev_priv->ips.max_delay = dev_priv->ips.fstart; 6048 6049 if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) 6050 ret = false; 6051 6052out_unlock: 6053 spin_unlock_irq(&mchdev_lock); 6054 6055 return ret; 6056} 6057EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 6058 6059/** 6060 * Tells the intel_ips driver that the i915 driver is now loaded, if 6061 * IPS got loaded first. 6062 * 6063 * This awkward dance is so that neither module has to depend on the 6064 * other in order for IPS to do the appropriate communication of 6065 * GPU turbo limits to i915. 6066 */ 6067static void 6068ips_ping_for_i915_load(void) 6069{ 6070 void (*link)(void); 6071 6072 link = symbol_get(ips_link_to_i915_driver); 6073 if (link) { 6074 link(); 6075 symbol_put(ips_link_to_i915_driver); 6076 } 6077} 6078 6079void intel_gpu_ips_init(struct drm_i915_private *dev_priv) 6080{ 6081 /* We only register the i915 ips part with intel-ips once everything is 6082 * set up, to avoid intel-ips sneaking in and reading bogus values. */ 6083 spin_lock_irq(&mchdev_lock); 6084 i915_mch_dev = dev_priv; 6085 spin_unlock_irq(&mchdev_lock); 6086 6087 ips_ping_for_i915_load(); 6088} 6089 6090void intel_gpu_ips_teardown(void) 6091{ 6092 spin_lock_irq(&mchdev_lock); 6093 i915_mch_dev = NULL; 6094 spin_unlock_irq(&mchdev_lock); 6095} 6096 6097static void intel_init_emon(struct drm_device *dev) 6098{ 6099 struct drm_i915_private *dev_priv = dev->dev_private; 6100 u32 lcfuse; 6101 u8 pxw[16]; 6102 int i; 6103 6104 /* Disable to program */ 6105 I915_WRITE(ECR, 0); 6106 POSTING_READ(ECR); 6107 6108 /* Program energy weights for various events */ 6109 I915_WRITE(SDEW, 0x15040d00); 6110 I915_WRITE(CSIEW0, 0x007f0000); 6111 I915_WRITE(CSIEW1, 0x1e220004); 6112 I915_WRITE(CSIEW2, 0x04000004); 6113 6114 for (i = 0; i < 5; i++) 6115 I915_WRITE(PEW + (i * 4), 0); 6116 for (i = 0; i < 3; i++) 6117 I915_WRITE(DEW + (i * 4), 0); 6118 6119 /* Program P-state weights to account for frequency power adjustment */ 6120 for (i = 0; i < 16; i++) { 6121 u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4)); 6122 unsigned long freq = intel_pxfreq(pxvidfreq); 6123 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> 6124 PXVFREQ_PX_SHIFT; 6125 unsigned long val; 6126 6127 val = vid * vid; 6128 val *= (freq / 1000); 6129 val *= 255; 6130 val /= (127*127*900); 6131 if (val > 0xff) 6132 DRM_ERROR("bad pxval: %ld\n", val); 6133 pxw[i] = val; 6134 } 6135 /* Render standby states get 0 weight */ 6136 pxw[14] = 0; 6137 pxw[15] = 0; 6138 6139 for (i = 0; i < 4; i++) { 6140 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | 6141 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); 6142 I915_WRITE(PXW + (i * 4), val); 6143 } 6144 6145 /* Adjust magic regs to magic values (more experimental results) */ 6146 I915_WRITE(OGW0, 0); 6147 I915_WRITE(OGW1, 0); 6148 I915_WRITE(EG0, 0x00007f00); 6149 I915_WRITE(EG1, 0x0000000e); 6150 I915_WRITE(EG2, 0x000e0000); 6151 I915_WRITE(EG3, 0x68000300); 6152 I915_WRITE(EG4, 0x42000000); 6153 I915_WRITE(EG5, 0x00140031); 6154 I915_WRITE(EG6, 0); 6155 I915_WRITE(EG7, 0); 6156 6157 for (i = 0; i < 8; i++) 6158 I915_WRITE(PXWL + (i * 4), 0); 6159 6160 /* Enable PMON + select events */ 6161 I915_WRITE(ECR, 0x80000019); 6162 6163 lcfuse = I915_READ(LCFUSE02); 6164 6165 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); 6166} 6167 6168void intel_init_gt_powersave(struct drm_device *dev) 6169{ 6170 i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6); 6171 6172 if (IS_CHERRYVIEW(dev)) 6173 cherryview_init_gt_powersave(dev); 6174 else if (IS_VALLEYVIEW(dev)) 6175 valleyview_init_gt_powersave(dev); 6176} 6177 6178void intel_cleanup_gt_powersave(struct drm_device *dev) 6179{ 6180 if (IS_CHERRYVIEW(dev)) 6181 return; 6182 else if (IS_VALLEYVIEW(dev)) 6183 valleyview_cleanup_gt_powersave(dev); 6184} 6185 6186static void gen6_suspend_rps(struct drm_device *dev) 6187{ 6188 struct drm_i915_private *dev_priv = dev->dev_private; 6189 6190 flush_delayed_work(&dev_priv->rps.delayed_resume_work); 6191 6192 /* 6193 * TODO: disable RPS interrupts on GEN9+ too once RPS support 6194 * is added for it. 6195 */ 6196 if (INTEL_INFO(dev)->gen < 9) 6197 gen6_disable_rps_interrupts(dev); 6198} 6199 6200/** 6201 * intel_suspend_gt_powersave - suspend PM work and helper threads 6202 * @dev: drm device 6203 * 6204 * We don't want to disable RC6 or other features here, we just want 6205 * to make sure any work we've queued has finished and won't bother 6206 * us while we're suspended. 6207 */ 6208void intel_suspend_gt_powersave(struct drm_device *dev) 6209{ 6210 struct drm_i915_private *dev_priv = dev->dev_private; 6211 6212 if (INTEL_INFO(dev)->gen < 6) 6213 return; 6214 6215 gen6_suspend_rps(dev); 6216 6217 /* Force GPU to min freq during suspend */ 6218 gen6_rps_idle(dev_priv); 6219} 6220 6221void intel_disable_gt_powersave(struct drm_device *dev) 6222{ 6223 struct drm_i915_private *dev_priv = dev->dev_private; 6224 6225 if (IS_IRONLAKE_M(dev)) { 6226 ironlake_disable_drps(dev); 6227 ironlake_disable_rc6(dev); 6228 } else if (INTEL_INFO(dev)->gen >= 6) { 6229 intel_suspend_gt_powersave(dev); 6230 6231 mutex_lock(&dev_priv->rps.hw_lock); 6232 if (INTEL_INFO(dev)->gen >= 9) 6233 gen9_disable_rps(dev); 6234 else if (IS_CHERRYVIEW(dev)) 6235 cherryview_disable_rps(dev); 6236 else if (IS_VALLEYVIEW(dev)) 6237 valleyview_disable_rps(dev); 6238 else 6239 gen6_disable_rps(dev); 6240 6241 dev_priv->rps.enabled = false; 6242 mutex_unlock(&dev_priv->rps.hw_lock); 6243 } 6244} 6245 6246static void intel_gen6_powersave_work(struct work_struct *work) 6247{ 6248 struct drm_i915_private *dev_priv = 6249 container_of(work, struct drm_i915_private, 6250 rps.delayed_resume_work.work); 6251 struct drm_device *dev = dev_priv->dev; 6252 6253 mutex_lock(&dev_priv->rps.hw_lock); 6254 6255 /* 6256 * TODO: reset/enable RPS interrupts on GEN9+ too, once RPS support is 6257 * added for it. 6258 */ 6259 if (INTEL_INFO(dev)->gen < 9) 6260 gen6_reset_rps_interrupts(dev); 6261 6262 if (IS_CHERRYVIEW(dev)) { 6263 cherryview_enable_rps(dev); 6264 } else if (IS_VALLEYVIEW(dev)) { 6265 valleyview_enable_rps(dev); 6266 } else if (INTEL_INFO(dev)->gen >= 9) { 6267 gen9_enable_rps(dev); 6268 } else if (IS_BROADWELL(dev)) { 6269 gen8_enable_rps(dev); 6270 __gen6_update_ring_freq(dev); 6271 } else { 6272 gen6_enable_rps(dev); 6273 __gen6_update_ring_freq(dev); 6274 } 6275 dev_priv->rps.enabled = true; 6276 6277 if (INTEL_INFO(dev)->gen < 9) 6278 gen6_enable_rps_interrupts(dev); 6279 6280 mutex_unlock(&dev_priv->rps.hw_lock); 6281 6282 intel_runtime_pm_put(dev_priv); 6283} 6284 6285void intel_enable_gt_powersave(struct drm_device *dev) 6286{ 6287 struct drm_i915_private *dev_priv = dev->dev_private; 6288 6289 if (IS_IRONLAKE_M(dev)) { 6290 mutex_lock(&dev->struct_mutex); 6291 ironlake_enable_drps(dev); 6292 ironlake_enable_rc6(dev); 6293 intel_init_emon(dev); 6294 mutex_unlock(&dev->struct_mutex); 6295 } else if (INTEL_INFO(dev)->gen >= 6) { 6296 /* 6297 * PCU communication is slow and this doesn't need to be 6298 * done at any specific time, so do this out of our fast path 6299 * to make resume and init faster. 6300 * 6301 * We depend on the HW RC6 power context save/restore 6302 * mechanism when entering D3 through runtime PM suspend. So 6303 * disable RPM until RPS/RC6 is properly setup. We can only 6304 * get here via the driver load/system resume/runtime resume 6305 * paths, so the _noresume version is enough (and in case of 6306 * runtime resume it's necessary). 6307 */ 6308 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, 6309 round_jiffies_up_relative(HZ))) 6310 intel_runtime_pm_get_noresume(dev_priv); 6311 } 6312} 6313 6314void intel_reset_gt_powersave(struct drm_device *dev) 6315{ 6316 struct drm_i915_private *dev_priv = dev->dev_private; 6317 6318 if (INTEL_INFO(dev)->gen < 6) 6319 return; 6320 6321 gen6_suspend_rps(dev); 6322 dev_priv->rps.enabled = false; 6323} 6324 6325static void ibx_init_clock_gating(struct drm_device *dev) 6326{ 6327 struct drm_i915_private *dev_priv = dev->dev_private; 6328 6329 /* 6330 * On Ibex Peak and Cougar Point, we need to disable clock 6331 * gating for the panel power sequencer or it will fail to 6332 * start up when no ports are active. 6333 */ 6334 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); 6335} 6336 6337static void g4x_disable_trickle_feed(struct drm_device *dev) 6338{ 6339 struct drm_i915_private *dev_priv = dev->dev_private; 6340 int pipe; 6341 6342 for_each_pipe(dev_priv, pipe) { 6343 I915_WRITE(DSPCNTR(pipe), 6344 I915_READ(DSPCNTR(pipe)) | 6345 DISPPLANE_TRICKLE_FEED_DISABLE); 6346 intel_flush_primary_plane(dev_priv, pipe); 6347 } 6348} 6349 6350static void ilk_init_lp_watermarks(struct drm_device *dev) 6351{ 6352 struct drm_i915_private *dev_priv = dev->dev_private; 6353 6354 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); 6355 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); 6356 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); 6357 6358 /* 6359 * Don't touch WM1S_LP_EN here. 6360 * Doing so could cause underruns. 6361 */ 6362} 6363 6364static void ironlake_init_clock_gating(struct drm_device *dev) 6365{ 6366 struct drm_i915_private *dev_priv = dev->dev_private; 6367 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 6368 6369 /* 6370 * Required for FBC 6371 * WaFbcDisableDpfcClockGating:ilk 6372 */ 6373 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | 6374 ILK_DPFCUNIT_CLOCK_GATE_DISABLE | 6375 ILK_DPFDUNIT_CLOCK_GATE_ENABLE; 6376 6377 I915_WRITE(PCH_3DCGDIS0, 6378 MARIUNIT_CLOCK_GATE_DISABLE | 6379 SVSMUNIT_CLOCK_GATE_DISABLE); 6380 I915_WRITE(PCH_3DCGDIS1, 6381 VFMUNIT_CLOCK_GATE_DISABLE); 6382 6383 /* 6384 * According to the spec the following bits should be set in 6385 * order to enable memory self-refresh 6386 * The bit 22/21 of 0x42004 6387 * The bit 5 of 0x42020 6388 * The bit 15 of 0x45000 6389 */ 6390 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6391 (I915_READ(ILK_DISPLAY_CHICKEN2) | 6392 ILK_DPARB_GATE | ILK_VSDPFD_FULL)); 6393 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; 6394 I915_WRITE(DISP_ARB_CTL, 6395 (I915_READ(DISP_ARB_CTL) | 6396 DISP_FBC_WM_DIS)); 6397 6398 ilk_init_lp_watermarks(dev); 6399 6400 /* 6401 * Based on the document from hardware guys the following bits 6402 * should be set unconditionally in order to enable FBC. 6403 * The bit 22 of 0x42000 6404 * The bit 22 of 0x42004 6405 * The bit 7,8,9 of 0x42020. 6406 */ 6407 if (IS_IRONLAKE_M(dev)) { 6408 /* WaFbcAsynchFlipDisableFbcQueue:ilk */ 6409 I915_WRITE(ILK_DISPLAY_CHICKEN1, 6410 I915_READ(ILK_DISPLAY_CHICKEN1) | 6411 ILK_FBCQ_DIS); 6412 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6413 I915_READ(ILK_DISPLAY_CHICKEN2) | 6414 ILK_DPARB_GATE); 6415 } 6416 6417 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 6418 6419 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6420 I915_READ(ILK_DISPLAY_CHICKEN2) | 6421 ILK_ELPIN_409_SELECT); 6422 I915_WRITE(_3D_CHICKEN2, 6423 _3D_CHICKEN2_WM_READ_PIPELINED << 16 | 6424 _3D_CHICKEN2_WM_READ_PIPELINED); 6425 6426 /* WaDisableRenderCachePipelinedFlush:ilk */ 6427 I915_WRITE(CACHE_MODE_0, 6428 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 6429 6430 /* WaDisable_RenderCache_OperationalFlush:ilk */ 6431 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6432 6433 g4x_disable_trickle_feed(dev); 6434 6435 ibx_init_clock_gating(dev); 6436} 6437 6438static void cpt_init_clock_gating(struct drm_device *dev) 6439{ 6440 struct drm_i915_private *dev_priv = dev->dev_private; 6441 int pipe; 6442 uint32_t val; 6443 6444 /* 6445 * On Ibex Peak and Cougar Point, we need to disable clock 6446 * gating for the panel power sequencer or it will fail to 6447 * start up when no ports are active. 6448 */ 6449 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | 6450 PCH_DPLUNIT_CLOCK_GATE_DISABLE | 6451 PCH_CPUNIT_CLOCK_GATE_DISABLE); 6452 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | 6453 DPLS_EDP_PPS_FIX_DIS); 6454 /* The below fixes the weird display corruption, a few pixels shifted 6455 * downward, on (only) LVDS of some HP laptops with IVY. 6456 */ 6457 for_each_pipe(dev_priv, pipe) { 6458 val = I915_READ(TRANS_CHICKEN2(pipe)); 6459 val |= TRANS_CHICKEN2_TIMING_OVERRIDE; 6460 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 6461 if (dev_priv->vbt.fdi_rx_polarity_inverted) 6462 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 6463 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; 6464 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; 6465 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; 6466 I915_WRITE(TRANS_CHICKEN2(pipe), val); 6467 } 6468 /* WADP0ClockGatingDisable */ 6469 for_each_pipe(dev_priv, pipe) { 6470 I915_WRITE(TRANS_CHICKEN1(pipe), 6471 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 6472 } 6473} 6474 6475static void gen6_check_mch_setup(struct drm_device *dev) 6476{ 6477 struct drm_i915_private *dev_priv = dev->dev_private; 6478 uint32_t tmp; 6479 6480 tmp = I915_READ(MCH_SSKPD); 6481 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) 6482 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", 6483 tmp); 6484} 6485 6486static void gen6_init_clock_gating(struct drm_device *dev) 6487{ 6488 struct drm_i915_private *dev_priv = dev->dev_private; 6489 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 6490 6491 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 6492 6493 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6494 I915_READ(ILK_DISPLAY_CHICKEN2) | 6495 ILK_ELPIN_409_SELECT); 6496 6497 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 6498 I915_WRITE(_3D_CHICKEN, 6499 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); 6500 6501 /* WaDisable_RenderCache_OperationalFlush:snb */ 6502 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6503 6504 /* 6505 * BSpec recoomends 8x4 when MSAA is used, 6506 * however in practice 16x4 seems fastest. 6507 * 6508 * Note that PS/WM thread counts depend on the WIZ hashing 6509 * disable bit, which we don't touch here, but it's good 6510 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6511 */ 6512 I915_WRITE(GEN6_GT_MODE, 6513 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6514 6515 ilk_init_lp_watermarks(dev); 6516 6517 I915_WRITE(CACHE_MODE_0, 6518 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 6519 6520 I915_WRITE(GEN6_UCGCTL1, 6521 I915_READ(GEN6_UCGCTL1) | 6522 GEN6_BLBUNIT_CLOCK_GATE_DISABLE | 6523 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 6524 6525 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock 6526 * gating disable must be set. Failure to set it results in 6527 * flickering pixels due to Z write ordering failures after 6528 * some amount of runtime in the Mesa "fire" demo, and Unigine 6529 * Sanctuary and Tropics, and apparently anything else with 6530 * alpha test or pixel discard. 6531 * 6532 * According to the spec, bit 11 (RCCUNIT) must also be set, 6533 * but we didn't debug actual testcases to find it out. 6534 * 6535 * WaDisableRCCUnitClockGating:snb 6536 * WaDisableRCPBUnitClockGating:snb 6537 */ 6538 I915_WRITE(GEN6_UCGCTL2, 6539 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | 6540 GEN6_RCCUNIT_CLOCK_GATE_DISABLE); 6541 6542 /* WaStripsFansDisableFastClipPerformanceFix:snb */ 6543 I915_WRITE(_3D_CHICKEN3, 6544 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); 6545 6546 /* 6547 * Bspec says: 6548 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and 6549 * 3DSTATE_SF number of SF output attributes is more than 16." 6550 */ 6551 I915_WRITE(_3D_CHICKEN3, 6552 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); 6553 6554 /* 6555 * According to the spec the following bits should be 6556 * set in order to enable memory self-refresh and fbc: 6557 * The bit21 and bit22 of 0x42000 6558 * The bit21 and bit22 of 0x42004 6559 * The bit5 and bit7 of 0x42020 6560 * The bit14 of 0x70180 6561 * The bit14 of 0x71180 6562 * 6563 * WaFbcAsynchFlipDisableFbcQueue:snb 6564 */ 6565 I915_WRITE(ILK_DISPLAY_CHICKEN1, 6566 I915_READ(ILK_DISPLAY_CHICKEN1) | 6567 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); 6568 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6569 I915_READ(ILK_DISPLAY_CHICKEN2) | 6570 ILK_DPARB_GATE | ILK_VSDPFD_FULL); 6571 I915_WRITE(ILK_DSPCLK_GATE_D, 6572 I915_READ(ILK_DSPCLK_GATE_D) | 6573 ILK_DPARBUNIT_CLOCK_GATE_ENABLE | 6574 ILK_DPFDUNIT_CLOCK_GATE_ENABLE); 6575 6576 g4x_disable_trickle_feed(dev); 6577 6578 cpt_init_clock_gating(dev); 6579 6580 gen6_check_mch_setup(dev); 6581} 6582 6583static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) 6584{ 6585 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); 6586 6587 /* 6588 * WaVSThreadDispatchOverride:ivb,vlv 6589 * 6590 * This actually overrides the dispatch 6591 * mode for all thread types. 6592 */ 6593 reg &= ~GEN7_FF_SCHED_MASK; 6594 reg |= GEN7_FF_TS_SCHED_HW; 6595 reg |= GEN7_FF_VS_SCHED_HW; 6596 reg |= GEN7_FF_DS_SCHED_HW; 6597 6598 I915_WRITE(GEN7_FF_THREAD_MODE, reg); 6599} 6600 6601static void lpt_init_clock_gating(struct drm_device *dev) 6602{ 6603 struct drm_i915_private *dev_priv = dev->dev_private; 6604 6605 /* 6606 * TODO: this bit should only be enabled when really needed, then 6607 * disabled when not needed anymore in order to save power. 6608 */ 6609 if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) 6610 I915_WRITE(SOUTH_DSPCLK_GATE_D, 6611 I915_READ(SOUTH_DSPCLK_GATE_D) | 6612 PCH_LP_PARTITION_LEVEL_DISABLE); 6613 6614 /* WADPOClockGatingDisable:hsw */ 6615 I915_WRITE(_TRANSA_CHICKEN1, 6616 I915_READ(_TRANSA_CHICKEN1) | 6617 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 6618} 6619 6620static void lpt_suspend_hw(struct drm_device *dev) 6621{ 6622 struct drm_i915_private *dev_priv = dev->dev_private; 6623 6624 if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) { 6625 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); 6626 6627 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; 6628 I915_WRITE(SOUTH_DSPCLK_GATE_D, val); 6629 } 6630} 6631 6632static void broadwell_init_clock_gating(struct drm_device *dev) 6633{ 6634 struct drm_i915_private *dev_priv = dev->dev_private; 6635 enum pipe pipe; 6636 6637 I915_WRITE(WM3_LP_ILK, 0); 6638 I915_WRITE(WM2_LP_ILK, 0); 6639 I915_WRITE(WM1_LP_ILK, 0); 6640 6641 /* WaSwitchSolVfFArbitrationPriority:bdw */ 6642 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 6643 6644 /* WaPsrDPAMaskVBlankInSRD:bdw */ 6645 I915_WRITE(CHICKEN_PAR1_1, 6646 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); 6647 6648 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ 6649 for_each_pipe(dev_priv, pipe) { 6650 I915_WRITE(CHICKEN_PIPESL_1(pipe), 6651 I915_READ(CHICKEN_PIPESL_1(pipe)) | 6652 BDW_DPRS_MASK_VBLANK_SRD); 6653 } 6654 6655 /* WaVSRefCountFullforceMissDisable:bdw */ 6656 /* WaDSRefCountFullforceMissDisable:bdw */ 6657 I915_WRITE(GEN7_FF_THREAD_MODE, 6658 I915_READ(GEN7_FF_THREAD_MODE) & 6659 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 6660 6661 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 6662 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 6663 6664 /* WaDisableSDEUnitClockGating:bdw */ 6665 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 6666 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 6667 6668 lpt_init_clock_gating(dev); 6669} 6670 6671static void haswell_init_clock_gating(struct drm_device *dev) 6672{ 6673 struct drm_i915_private *dev_priv = dev->dev_private; 6674 6675 ilk_init_lp_watermarks(dev); 6676 6677 /* L3 caching of data atomics doesn't work -- disable it. */ 6678 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 6679 I915_WRITE(HSW_ROW_CHICKEN3, 6680 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); 6681 6682 /* This is required by WaCatErrorRejectionIssue:hsw */ 6683 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 6684 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 6685 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 6686 6687 /* WaVSRefCountFullforceMissDisable:hsw */ 6688 I915_WRITE(GEN7_FF_THREAD_MODE, 6689 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); 6690 6691 /* WaDisable_RenderCache_OperationalFlush:hsw */ 6692 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6693 6694 /* enable HiZ Raw Stall Optimization */ 6695 I915_WRITE(CACHE_MODE_0_GEN7, 6696 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 6697 6698 /* WaDisable4x2SubspanOptimization:hsw */ 6699 I915_WRITE(CACHE_MODE_1, 6700 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 6701 6702 /* 6703 * BSpec recommends 8x4 when MSAA is used, 6704 * however in practice 16x4 seems fastest. 6705 * 6706 * Note that PS/WM thread counts depend on the WIZ hashing 6707 * disable bit, which we don't touch here, but it's good 6708 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6709 */ 6710 I915_WRITE(GEN7_GT_MODE, 6711 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6712 6713 /* WaSwitchSolVfFArbitrationPriority:hsw */ 6714 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 6715 6716 /* WaRsPkgCStateDisplayPMReq:hsw */ 6717 I915_WRITE(CHICKEN_PAR1_1, 6718 I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); 6719 6720 lpt_init_clock_gating(dev); 6721} 6722 6723static void ivybridge_init_clock_gating(struct drm_device *dev) 6724{ 6725 struct drm_i915_private *dev_priv = dev->dev_private; 6726 uint32_t snpcr; 6727 6728 ilk_init_lp_watermarks(dev); 6729 6730 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); 6731 6732 /* WaDisableEarlyCull:ivb */ 6733 I915_WRITE(_3D_CHICKEN3, 6734 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 6735 6736 /* WaDisableBackToBackFlipFix:ivb */ 6737 I915_WRITE(IVB_CHICKEN3, 6738 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 6739 CHICKEN3_DGMG_DONE_FIX_DISABLE); 6740 6741 /* WaDisablePSDDualDispatchEnable:ivb */ 6742 if (IS_IVB_GT1(dev)) 6743 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 6744 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 6745 6746 /* WaDisable_RenderCache_OperationalFlush:ivb */ 6747 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6748 6749 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 6750 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, 6751 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 6752 6753 /* WaApplyL3ControlAndL3ChickenMode:ivb */ 6754 I915_WRITE(GEN7_L3CNTLREG1, 6755 GEN7_WA_FOR_GEN7_L3_CONTROL); 6756 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, 6757 GEN7_WA_L3_CHICKEN_MODE); 6758 if (IS_IVB_GT1(dev)) 6759 I915_WRITE(GEN7_ROW_CHICKEN2, 6760 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6761 else { 6762 /* must write both registers */ 6763 I915_WRITE(GEN7_ROW_CHICKEN2, 6764 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6765 I915_WRITE(GEN7_ROW_CHICKEN2_GT2, 6766 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6767 } 6768 6769 /* WaForceL3Serialization:ivb */ 6770 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 6771 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 6772 6773 /* 6774 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 6775 * This implements the WaDisableRCZUnitClockGating:ivb workaround. 6776 */ 6777 I915_WRITE(GEN6_UCGCTL2, 6778 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 6779 6780 /* This is required by WaCatErrorRejectionIssue:ivb */ 6781 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 6782 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 6783 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 6784 6785 g4x_disable_trickle_feed(dev); 6786 6787 gen7_setup_fixed_func_scheduler(dev_priv); 6788 6789 if (0) { /* causes HiZ corruption on ivb:gt1 */ 6790 /* enable HiZ Raw Stall Optimization */ 6791 I915_WRITE(CACHE_MODE_0_GEN7, 6792 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 6793 } 6794 6795 /* WaDisable4x2SubspanOptimization:ivb */ 6796 I915_WRITE(CACHE_MODE_1, 6797 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 6798 6799 /* 6800 * BSpec recommends 8x4 when MSAA is used, 6801 * however in practice 16x4 seems fastest. 6802 * 6803 * Note that PS/WM thread counts depend on the WIZ hashing 6804 * disable bit, which we don't touch here, but it's good 6805 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6806 */ 6807 I915_WRITE(GEN7_GT_MODE, 6808 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6809 6810 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); 6811 snpcr &= ~GEN6_MBC_SNPCR_MASK; 6812 snpcr |= GEN6_MBC_SNPCR_MED; 6813 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); 6814 6815 if (!HAS_PCH_NOP(dev)) 6816 cpt_init_clock_gating(dev); 6817 6818 gen6_check_mch_setup(dev); 6819} 6820 6821static void valleyview_init_clock_gating(struct drm_device *dev) 6822{ 6823 struct drm_i915_private *dev_priv = dev->dev_private; 6824 6825 I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); 6826 6827 /* WaDisableEarlyCull:vlv */ 6828 I915_WRITE(_3D_CHICKEN3, 6829 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 6830 6831 /* WaDisableBackToBackFlipFix:vlv */ 6832 I915_WRITE(IVB_CHICKEN3, 6833 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 6834 CHICKEN3_DGMG_DONE_FIX_DISABLE); 6835 6836 /* WaPsdDispatchEnable:vlv */ 6837 /* WaDisablePSDDualDispatchEnable:vlv */ 6838 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 6839 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | 6840 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 6841 6842 /* WaDisable_RenderCache_OperationalFlush:vlv */ 6843 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6844 6845 /* WaForceL3Serialization:vlv */ 6846 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 6847 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 6848 6849 /* WaDisableDopClockGating:vlv */ 6850 I915_WRITE(GEN7_ROW_CHICKEN2, 6851 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6852 6853 /* This is required by WaCatErrorRejectionIssue:vlv */ 6854 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 6855 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 6856 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 6857 6858 gen7_setup_fixed_func_scheduler(dev_priv); 6859 6860 /* 6861 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 6862 * This implements the WaDisableRCZUnitClockGating:vlv workaround. 6863 */ 6864 I915_WRITE(GEN6_UCGCTL2, 6865 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 6866 6867 /* WaDisableL3Bank2xClockGate:vlv 6868 * Disabling L3 clock gating- MMIO 940c[25] = 1 6869 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ 6870 I915_WRITE(GEN7_UCGCTL4, 6871 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); 6872 6873 I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); 6874 6875 /* 6876 * BSpec says this must be set, even though 6877 * WaDisable4x2SubspanOptimization isn't listed for VLV. 6878 */ 6879 I915_WRITE(CACHE_MODE_1, 6880 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 6881 6882 /* 6883 * WaIncreaseL3CreditsForVLVB0:vlv 6884 * This is the hardware default actually. 6885 */ 6886 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 6887 6888 /* 6889 * WaDisableVLVClockGating_VBIIssue:vlv 6890 * Disable clock gating on th GCFG unit to prevent a delay 6891 * in the reporting of vblank events. 6892 */ 6893 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); 6894} 6895 6896static void cherryview_init_clock_gating(struct drm_device *dev) 6897{ 6898 struct drm_i915_private *dev_priv = dev->dev_private; 6899 6900 I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); 6901 6902 I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); 6903 6904 /* WaVSRefCountFullforceMissDisable:chv */ 6905 /* WaDSRefCountFullforceMissDisable:chv */ 6906 I915_WRITE(GEN7_FF_THREAD_MODE, 6907 I915_READ(GEN7_FF_THREAD_MODE) & 6908 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 6909 6910 /* WaDisableSemaphoreAndSyncFlipWait:chv */ 6911 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 6912 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 6913 6914 /* WaDisableCSUnitClockGating:chv */ 6915 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 6916 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 6917 6918 /* WaDisableSDEUnitClockGating:chv */ 6919 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 6920 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 6921} 6922 6923static void g4x_init_clock_gating(struct drm_device *dev) 6924{ 6925 struct drm_i915_private *dev_priv = dev->dev_private; 6926 uint32_t dspclk_gate; 6927 6928 I915_WRITE(RENCLK_GATE_D1, 0); 6929 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | 6930 GS_UNIT_CLOCK_GATE_DISABLE | 6931 CL_UNIT_CLOCK_GATE_DISABLE); 6932 I915_WRITE(RAMCLK_GATE_D, 0); 6933 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | 6934 OVRUNIT_CLOCK_GATE_DISABLE | 6935 OVCUNIT_CLOCK_GATE_DISABLE; 6936 if (IS_GM45(dev)) 6937 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; 6938 I915_WRITE(DSPCLK_GATE_D, dspclk_gate); 6939 6940 /* WaDisableRenderCachePipelinedFlush */ 6941 I915_WRITE(CACHE_MODE_0, 6942 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 6943 6944 /* WaDisable_RenderCache_OperationalFlush:g4x */ 6945 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6946 6947 g4x_disable_trickle_feed(dev); 6948} 6949 6950static void crestline_init_clock_gating(struct drm_device *dev) 6951{ 6952 struct drm_i915_private *dev_priv = dev->dev_private; 6953 6954 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); 6955 I915_WRITE(RENCLK_GATE_D2, 0); 6956 I915_WRITE(DSPCLK_GATE_D, 0); 6957 I915_WRITE(RAMCLK_GATE_D, 0); 6958 I915_WRITE16(DEUC, 0); 6959 I915_WRITE(MI_ARB_STATE, 6960 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 6961 6962 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 6963 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6964} 6965 6966static void broadwater_init_clock_gating(struct drm_device *dev) 6967{ 6968 struct drm_i915_private *dev_priv = dev->dev_private; 6969 6970 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | 6971 I965_RCC_CLOCK_GATE_DISABLE | 6972 I965_RCPB_CLOCK_GATE_DISABLE | 6973 I965_ISC_CLOCK_GATE_DISABLE | 6974 I965_FBC_CLOCK_GATE_DISABLE); 6975 I915_WRITE(RENCLK_GATE_D2, 0); 6976 I915_WRITE(MI_ARB_STATE, 6977 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 6978 6979 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 6980 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6981} 6982 6983static void gen3_init_clock_gating(struct drm_device *dev) 6984{ 6985 struct drm_i915_private *dev_priv = dev->dev_private; 6986 u32 dstate = I915_READ(D_STATE); 6987 6988 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | 6989 DSTATE_DOT_CLOCK_GATING; 6990 I915_WRITE(D_STATE, dstate); 6991 6992 if (IS_PINEVIEW(dev)) 6993 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); 6994 6995 /* IIR "flip pending" means done if this bit is set */ 6996 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); 6997 6998 /* interrupts should cause a wake up from C3 */ 6999 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); 7000 7001 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 7002 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 7003 7004 I915_WRITE(MI_ARB_STATE, 7005 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7006} 7007 7008static void i85x_init_clock_gating(struct drm_device *dev) 7009{ 7010 struct drm_i915_private *dev_priv = dev->dev_private; 7011 7012 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); 7013 7014 /* interrupts should cause a wake up from C3 */ 7015 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | 7016 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); 7017 7018 I915_WRITE(MEM_MODE, 7019 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); 7020} 7021 7022static void i830_init_clock_gating(struct drm_device *dev) 7023{ 7024 struct drm_i915_private *dev_priv = dev->dev_private; 7025 7026 I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); 7027 7028 I915_WRITE(MEM_MODE, 7029 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | 7030 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); 7031} 7032 7033void intel_init_clock_gating(struct drm_device *dev) 7034{ 7035 struct drm_i915_private *dev_priv = dev->dev_private; 7036 7037 dev_priv->display.init_clock_gating(dev); 7038} 7039 7040void intel_suspend_hw(struct drm_device *dev) 7041{ 7042 if (HAS_PCH_LPT(dev)) 7043 lpt_suspend_hw(dev); 7044} 7045 7046static void intel_init_fbc(struct drm_i915_private *dev_priv) 7047{ 7048 if (!HAS_FBC(dev_priv)) { 7049 dev_priv->fbc.enabled = false; 7050 return; 7051 } 7052 7053 if (INTEL_INFO(dev_priv)->gen >= 7) { 7054 dev_priv->display.fbc_enabled = ironlake_fbc_enabled; 7055 dev_priv->display.enable_fbc = gen7_enable_fbc; 7056 dev_priv->display.disable_fbc = ironlake_disable_fbc; 7057 } else if (INTEL_INFO(dev_priv)->gen >= 5) { 7058 dev_priv->display.fbc_enabled = ironlake_fbc_enabled; 7059 dev_priv->display.enable_fbc = ironlake_enable_fbc; 7060 dev_priv->display.disable_fbc = ironlake_disable_fbc; 7061 } else if (IS_GM45(dev_priv)) { 7062 dev_priv->display.fbc_enabled = g4x_fbc_enabled; 7063 dev_priv->display.enable_fbc = g4x_enable_fbc; 7064 dev_priv->display.disable_fbc = g4x_disable_fbc; 7065 } else { 7066 dev_priv->display.fbc_enabled = i8xx_fbc_enabled; 7067 dev_priv->display.enable_fbc = i8xx_enable_fbc; 7068 dev_priv->display.disable_fbc = i8xx_disable_fbc; 7069 7070 /* This value was pulled out of someone's hat */ 7071 I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT); 7072 } 7073 7074 dev_priv->fbc.enabled = dev_priv->display.fbc_enabled(dev_priv->dev); 7075} 7076 7077/* Set up chip specific power management-related functions */ 7078void intel_init_pm(struct drm_device *dev) 7079{ 7080 struct drm_i915_private *dev_priv = dev->dev_private; 7081 7082 intel_init_fbc(dev_priv); 7083 7084 /* For cxsr */ 7085 if (IS_PINEVIEW(dev)) 7086 i915_pineview_get_mem_freq(dev); 7087 else if (IS_GEN5(dev)) 7088 i915_ironlake_get_mem_freq(dev); 7089 7090 /* For FIFO watermark updates */ 7091 if (INTEL_INFO(dev)->gen >= 9) { 7092 skl_setup_wm_latency(dev); 7093 7094 dev_priv->display.init_clock_gating = gen9_init_clock_gating; 7095 dev_priv->display.update_wm = skl_update_wm; 7096 dev_priv->display.update_sprite_wm = skl_update_sprite_wm; 7097 } else if (HAS_PCH_SPLIT(dev)) { 7098 ilk_setup_wm_latency(dev); 7099 7100 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] && 7101 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) || 7102 (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] && 7103 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) { 7104 dev_priv->display.update_wm = ilk_update_wm; 7105 dev_priv->display.update_sprite_wm = ilk_update_sprite_wm; 7106 } else { 7107 DRM_DEBUG_KMS("Failed to read display plane latency. " 7108 "Disable CxSR\n"); 7109 } 7110 7111 if (IS_GEN5(dev)) 7112 dev_priv->display.init_clock_gating = ironlake_init_clock_gating; 7113 else if (IS_GEN6(dev)) 7114 dev_priv->display.init_clock_gating = gen6_init_clock_gating; 7115 else if (IS_IVYBRIDGE(dev)) 7116 dev_priv->display.init_clock_gating = ivybridge_init_clock_gating; 7117 else if (IS_HASWELL(dev)) 7118 dev_priv->display.init_clock_gating = haswell_init_clock_gating; 7119 else if (INTEL_INFO(dev)->gen == 8) 7120 dev_priv->display.init_clock_gating = broadwell_init_clock_gating; 7121 } else if (IS_CHERRYVIEW(dev)) { 7122 dev_priv->display.update_wm = cherryview_update_wm; 7123 dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm; 7124 dev_priv->display.init_clock_gating = 7125 cherryview_init_clock_gating; 7126 } else if (IS_VALLEYVIEW(dev)) { 7127 dev_priv->display.update_wm = valleyview_update_wm; 7128 dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm; 7129 dev_priv->display.init_clock_gating = 7130 valleyview_init_clock_gating; 7131 } else if (IS_PINEVIEW(dev)) { 7132 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev), 7133 dev_priv->is_ddr3, 7134 dev_priv->fsb_freq, 7135 dev_priv->mem_freq)) { 7136 DRM_INFO("failed to find known CxSR latency " 7137 "(found ddr%s fsb freq %d, mem freq %d), " 7138 "disabling CxSR\n", 7139 (dev_priv->is_ddr3 == 1) ? "3" : "2", 7140 dev_priv->fsb_freq, dev_priv->mem_freq); 7141 /* Disable CxSR and never update its watermark again */ 7142 intel_set_memory_cxsr(dev_priv, false); 7143 dev_priv->display.update_wm = NULL; 7144 } else 7145 dev_priv->display.update_wm = pineview_update_wm; 7146 dev_priv->display.init_clock_gating = gen3_init_clock_gating; 7147 } else if (IS_G4X(dev)) { 7148 dev_priv->display.update_wm = g4x_update_wm; 7149 dev_priv->display.init_clock_gating = g4x_init_clock_gating; 7150 } else if (IS_GEN4(dev)) { 7151 dev_priv->display.update_wm = i965_update_wm; 7152 if (IS_CRESTLINE(dev)) 7153 dev_priv->display.init_clock_gating = crestline_init_clock_gating; 7154 else if (IS_BROADWATER(dev)) 7155 dev_priv->display.init_clock_gating = broadwater_init_clock_gating; 7156 } else if (IS_GEN3(dev)) { 7157 dev_priv->display.update_wm = i9xx_update_wm; 7158 dev_priv->display.get_fifo_size = i9xx_get_fifo_size; 7159 dev_priv->display.init_clock_gating = gen3_init_clock_gating; 7160 } else if (IS_GEN2(dev)) { 7161 if (INTEL_INFO(dev)->num_pipes == 1) { 7162 dev_priv->display.update_wm = i845_update_wm; 7163 dev_priv->display.get_fifo_size = i845_get_fifo_size; 7164 } else { 7165 dev_priv->display.update_wm = i9xx_update_wm; 7166 dev_priv->display.get_fifo_size = i830_get_fifo_size; 7167 } 7168 7169 if (IS_I85X(dev) || IS_I865G(dev)) 7170 dev_priv->display.init_clock_gating = i85x_init_clock_gating; 7171 else 7172 dev_priv->display.init_clock_gating = i830_init_clock_gating; 7173 } else { 7174 DRM_ERROR("unexpected fall-through in intel_init_pm\n"); 7175 } 7176} 7177 7178int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) 7179{ 7180 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7181 7182 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 7183 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); 7184 return -EAGAIN; 7185 } 7186 7187 I915_WRITE(GEN6_PCODE_DATA, *val); 7188 I915_WRITE(GEN6_PCODE_DATA1, 0); 7189 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 7190 7191 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, 7192 500)) { 7193 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); 7194 return -ETIMEDOUT; 7195 } 7196 7197 *val = I915_READ(GEN6_PCODE_DATA); 7198 I915_WRITE(GEN6_PCODE_DATA, 0); 7199 7200 return 0; 7201} 7202 7203int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val) 7204{ 7205 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7206 7207 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 7208 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); 7209 return -EAGAIN; 7210 } 7211 7212 I915_WRITE(GEN6_PCODE_DATA, val); 7213 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 7214 7215 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, 7216 500)) { 7217 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); 7218 return -ETIMEDOUT; 7219 } 7220 7221 I915_WRITE(GEN6_PCODE_DATA, 0); 7222 7223 return 0; 7224} 7225 7226static int vlv_gpu_freq_div(unsigned int czclk_freq) 7227{ 7228 switch (czclk_freq) { 7229 case 200: 7230 return 10; 7231 case 267: 7232 return 12; 7233 case 320: 7234 case 333: 7235 return 16; 7236 case 400: 7237 return 20; 7238 default: 7239 return -1; 7240 } 7241} 7242 7243static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) 7244{ 7245 int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4); 7246 7247 div = vlv_gpu_freq_div(czclk_freq); 7248 if (div < 0) 7249 return div; 7250 7251 return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div); 7252} 7253 7254static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) 7255{ 7256 int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4); 7257 7258 mul = vlv_gpu_freq_div(czclk_freq); 7259 if (mul < 0) 7260 return mul; 7261 7262 return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6; 7263} 7264 7265static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) 7266{ 7267 int div, czclk_freq = dev_priv->rps.cz_freq; 7268 7269 div = vlv_gpu_freq_div(czclk_freq) / 2; 7270 if (div < 0) 7271 return div; 7272 7273 return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2; 7274} 7275 7276static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) 7277{ 7278 int mul, czclk_freq = dev_priv->rps.cz_freq; 7279 7280 mul = vlv_gpu_freq_div(czclk_freq) / 2; 7281 if (mul < 0) 7282 return mul; 7283 7284 /* CHV needs even values */ 7285 return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2; 7286} 7287 7288int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val) 7289{ 7290 int ret = -1; 7291 7292 if (IS_CHERRYVIEW(dev_priv->dev)) 7293 ret = chv_gpu_freq(dev_priv, val); 7294 else if (IS_VALLEYVIEW(dev_priv->dev)) 7295 ret = byt_gpu_freq(dev_priv, val); 7296 7297 return ret; 7298} 7299 7300int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val) 7301{ 7302 int ret = -1; 7303 7304 if (IS_CHERRYVIEW(dev_priv->dev)) 7305 ret = chv_freq_opcode(dev_priv, val); 7306 else if (IS_VALLEYVIEW(dev_priv->dev)) 7307 ret = byt_freq_opcode(dev_priv, val); 7308 7309 return ret; 7310} 7311 7312void intel_pm_setup(struct drm_device *dev) 7313{ 7314 struct drm_i915_private *dev_priv = dev->dev_private; 7315 7316 mutex_init(&dev_priv->rps.hw_lock); 7317 7318 INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, 7319 intel_gen6_powersave_work); 7320 7321 dev_priv->pm.suspended = false; 7322}