Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.18 810 lines 25 kB view raw
1/* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <drm/drmP.h> 25#include <drm/i915_drm.h> 26#include "i915_drv.h" 27 28/** 29 * DOC: fence register handling 30 * 31 * Important to avoid confusions: "fences" in the i915 driver are not execution 32 * fences used to track command completion but hardware detiler objects which 33 * wrap a given range of the global GTT. Each platform has only a fairly limited 34 * set of these objects. 35 * 36 * Fences are used to detile GTT memory mappings. They're also connected to the 37 * hardware frontbuffer render tracking and hence interact with frontbuffer 38 * compression. Furthermore on older platforms fences are required for tiled 39 * objects used by the display engine. They can also be used by the render 40 * engine - they're required for blitter commands and are optional for render 41 * commands. But on gen4+ both display (with the exception of fbc) and rendering 42 * have their own tiling state bits and don't need fences. 43 * 44 * Also note that fences only support X and Y tiling and hence can't be used for 45 * the fancier new tiling formats like W, Ys and Yf. 46 * 47 * Finally note that because fences are such a restricted resource they're 48 * dynamically associated with objects. Furthermore fence state is committed to 49 * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must 50 * explicitly call i915_gem_object_get_fence() to synchronize fencing status 51 * for cpu access. Also note that some code wants an unfenced view, for those 52 * cases the fence can be removed forcefully with i915_gem_object_put_fence(). 53 * 54 * Internally these functions will synchronize with userspace access by removing 55 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. 56 */ 57 58#define pipelined 0 59 60static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, 61 struct i915_vma *vma) 62{ 63 i915_reg_t fence_reg_lo, fence_reg_hi; 64 int fence_pitch_shift; 65 u64 val; 66 67 if (INTEL_GEN(fence->i915) >= 6) { 68 fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); 69 fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); 70 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; 71 72 } else { 73 fence_reg_lo = FENCE_REG_965_LO(fence->id); 74 fence_reg_hi = FENCE_REG_965_HI(fence->id); 75 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 76 } 77 78 val = 0; 79 if (vma) { 80 unsigned int stride = i915_gem_object_get_stride(vma->obj); 81 82 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 83 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE)); 84 GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE)); 85 GEM_BUG_ON(!IS_ALIGNED(stride, 128)); 86 87 val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32; 88 val |= vma->node.start; 89 val |= (u64)((stride / 128) - 1) << fence_pitch_shift; 90 if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) 91 val |= BIT(I965_FENCE_TILING_Y_SHIFT); 92 val |= I965_FENCE_REG_VALID; 93 } 94 95 if (!pipelined) { 96 struct drm_i915_private *dev_priv = fence->i915; 97 98 /* To w/a incoherency with non-atomic 64-bit register updates, 99 * we split the 64-bit update into two 32-bit writes. In order 100 * for a partial fence not to be evaluated between writes, we 101 * precede the update with write to turn off the fence register, 102 * and only enable the fence as the last step. 103 * 104 * For extra levels of paranoia, we make sure each step lands 105 * before applying the next step. 106 */ 107 I915_WRITE(fence_reg_lo, 0); 108 POSTING_READ(fence_reg_lo); 109 110 I915_WRITE(fence_reg_hi, upper_32_bits(val)); 111 I915_WRITE(fence_reg_lo, lower_32_bits(val)); 112 POSTING_READ(fence_reg_lo); 113 } 114} 115 116static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, 117 struct i915_vma *vma) 118{ 119 u32 val; 120 121 val = 0; 122 if (vma) { 123 unsigned int tiling = i915_gem_object_get_tiling(vma->obj); 124 bool is_y_tiled = tiling == I915_TILING_Y; 125 unsigned int stride = i915_gem_object_get_stride(vma->obj); 126 127 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 128 GEM_BUG_ON(vma->node.start & ~I915_FENCE_START_MASK); 129 GEM_BUG_ON(!is_power_of_2(vma->fence_size)); 130 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); 131 132 if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) 133 stride /= 128; 134 else 135 stride /= 512; 136 GEM_BUG_ON(!is_power_of_2(stride)); 137 138 val = vma->node.start; 139 if (is_y_tiled) 140 val |= BIT(I830_FENCE_TILING_Y_SHIFT); 141 val |= I915_FENCE_SIZE_BITS(vma->fence_size); 142 val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT; 143 144 val |= I830_FENCE_REG_VALID; 145 } 146 147 if (!pipelined) { 148 struct drm_i915_private *dev_priv = fence->i915; 149 i915_reg_t reg = FENCE_REG(fence->id); 150 151 I915_WRITE(reg, val); 152 POSTING_READ(reg); 153 } 154} 155 156static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, 157 struct i915_vma *vma) 158{ 159 u32 val; 160 161 val = 0; 162 if (vma) { 163 unsigned int stride = i915_gem_object_get_stride(vma->obj); 164 165 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 166 GEM_BUG_ON(vma->node.start & ~I830_FENCE_START_MASK); 167 GEM_BUG_ON(!is_power_of_2(vma->fence_size)); 168 GEM_BUG_ON(!is_power_of_2(stride / 128)); 169 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); 170 171 val = vma->node.start; 172 if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) 173 val |= BIT(I830_FENCE_TILING_Y_SHIFT); 174 val |= I830_FENCE_SIZE_BITS(vma->fence_size); 175 val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT; 176 val |= I830_FENCE_REG_VALID; 177 } 178 179 if (!pipelined) { 180 struct drm_i915_private *dev_priv = fence->i915; 181 i915_reg_t reg = FENCE_REG(fence->id); 182 183 I915_WRITE(reg, val); 184 POSTING_READ(reg); 185 } 186} 187 188static void fence_write(struct drm_i915_fence_reg *fence, 189 struct i915_vma *vma) 190{ 191 /* Previous access through the fence register is marshalled by 192 * the mb() inside the fault handlers (i915_gem_release_mmaps) 193 * and explicitly managed for internal users. 194 */ 195 196 if (IS_GEN2(fence->i915)) 197 i830_write_fence_reg(fence, vma); 198 else if (IS_GEN3(fence->i915)) 199 i915_write_fence_reg(fence, vma); 200 else 201 i965_write_fence_reg(fence, vma); 202 203 /* Access through the fenced region afterwards is 204 * ordered by the posting reads whilst writing the registers. 205 */ 206 207 fence->dirty = false; 208} 209 210static int fence_update(struct drm_i915_fence_reg *fence, 211 struct i915_vma *vma) 212{ 213 int ret; 214 215 if (vma) { 216 if (!i915_vma_is_map_and_fenceable(vma)) 217 return -EINVAL; 218 219 if (WARN(!i915_gem_object_get_stride(vma->obj) || 220 !i915_gem_object_get_tiling(vma->obj), 221 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 222 i915_gem_object_get_stride(vma->obj), 223 i915_gem_object_get_tiling(vma->obj))) 224 return -EINVAL; 225 226 ret = i915_gem_active_retire(&vma->last_fence, 227 &vma->obj->base.dev->struct_mutex); 228 if (ret) 229 return ret; 230 } 231 232 if (fence->vma) { 233 struct i915_vma *old = fence->vma; 234 235 ret = i915_gem_active_retire(&old->last_fence, 236 &old->obj->base.dev->struct_mutex); 237 if (ret) 238 return ret; 239 240 i915_vma_flush_writes(old); 241 } 242 243 if (fence->vma && fence->vma != vma) { 244 /* Ensure that all userspace CPU access is completed before 245 * stealing the fence. 246 */ 247 GEM_BUG_ON(fence->vma->fence != fence); 248 i915_vma_revoke_mmap(fence->vma); 249 250 fence->vma->fence = NULL; 251 fence->vma = NULL; 252 253 list_move(&fence->link, &fence->i915->mm.fence_list); 254 } 255 256 /* We only need to update the register itself if the device is awake. 257 * If the device is currently powered down, we will defer the write 258 * to the runtime resume, see i915_gem_restore_fences(). 259 */ 260 if (intel_runtime_pm_get_if_in_use(fence->i915)) { 261 fence_write(fence, vma); 262 intel_runtime_pm_put(fence->i915); 263 } 264 265 if (vma) { 266 if (fence->vma != vma) { 267 vma->fence = fence; 268 fence->vma = vma; 269 } 270 271 list_move_tail(&fence->link, &fence->i915->mm.fence_list); 272 } 273 274 return 0; 275} 276 277/** 278 * i915_vma_put_fence - force-remove fence for a VMA 279 * @vma: vma to map linearly (not through a fence reg) 280 * 281 * This function force-removes any fence from the given object, which is useful 282 * if the kernel wants to do untiled GTT access. 283 * 284 * Returns: 285 * 286 * 0 on success, negative error code on failure. 287 */ 288int i915_vma_put_fence(struct i915_vma *vma) 289{ 290 struct drm_i915_fence_reg *fence = vma->fence; 291 292 if (!fence) 293 return 0; 294 295 if (fence->pin_count) 296 return -EBUSY; 297 298 return fence_update(fence, NULL); 299} 300 301static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) 302{ 303 struct drm_i915_fence_reg *fence; 304 305 list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { 306 GEM_BUG_ON(fence->vma && fence->vma->fence != fence); 307 308 if (fence->pin_count) 309 continue; 310 311 return fence; 312 } 313 314 /* Wait for completion of pending flips which consume fences */ 315 if (intel_has_pending_fb_unpin(dev_priv)) 316 return ERR_PTR(-EAGAIN); 317 318 return ERR_PTR(-EDEADLK); 319} 320 321/** 322 * i915_vma_pin_fence - set up fencing for a vma 323 * @vma: vma to map through a fence reg 324 * 325 * When mapping objects through the GTT, userspace wants to be able to write 326 * to them without having to worry about swizzling if the object is tiled. 327 * This function walks the fence regs looking for a free one for @obj, 328 * stealing one if it can't find any. 329 * 330 * It then sets up the reg based on the object's properties: address, pitch 331 * and tiling format. 332 * 333 * For an untiled surface, this removes any existing fence. 334 * 335 * Returns: 336 * 337 * 0 on success, negative error code on failure. 338 */ 339int 340i915_vma_pin_fence(struct i915_vma *vma) 341{ 342 struct drm_i915_fence_reg *fence; 343 struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; 344 int err; 345 346 /* Note that we revoke fences on runtime suspend. Therefore the user 347 * must keep the device awake whilst using the fence. 348 */ 349 assert_rpm_wakelock_held(vma->vm->i915); 350 351 /* Just update our place in the LRU if our fence is getting reused. */ 352 if (vma->fence) { 353 fence = vma->fence; 354 GEM_BUG_ON(fence->vma != vma); 355 fence->pin_count++; 356 if (!fence->dirty) { 357 list_move_tail(&fence->link, 358 &fence->i915->mm.fence_list); 359 return 0; 360 } 361 } else if (set) { 362 fence = fence_find(vma->vm->i915); 363 if (IS_ERR(fence)) 364 return PTR_ERR(fence); 365 366 GEM_BUG_ON(fence->pin_count); 367 fence->pin_count++; 368 } else 369 return 0; 370 371 err = fence_update(fence, set); 372 if (err) 373 goto out_unpin; 374 375 GEM_BUG_ON(fence->vma != set); 376 GEM_BUG_ON(vma->fence != (set ? fence : NULL)); 377 378 if (set) 379 return 0; 380 381out_unpin: 382 fence->pin_count--; 383 return err; 384} 385 386/** 387 * i915_reserve_fence - Reserve a fence for vGPU 388 * @dev_priv: i915 device private 389 * 390 * This function walks the fence regs looking for a free one and remove 391 * it from the fence_list. It is used to reserve fence for vGPU to use. 392 */ 393struct drm_i915_fence_reg * 394i915_reserve_fence(struct drm_i915_private *dev_priv) 395{ 396 struct drm_i915_fence_reg *fence; 397 int count; 398 int ret; 399 400 lockdep_assert_held(&dev_priv->drm.struct_mutex); 401 402 /* Keep at least one fence available for the display engine. */ 403 count = 0; 404 list_for_each_entry(fence, &dev_priv->mm.fence_list, link) 405 count += !fence->pin_count; 406 if (count <= 1) 407 return ERR_PTR(-ENOSPC); 408 409 fence = fence_find(dev_priv); 410 if (IS_ERR(fence)) 411 return fence; 412 413 if (fence->vma) { 414 /* Force-remove fence from VMA */ 415 ret = fence_update(fence, NULL); 416 if (ret) 417 return ERR_PTR(ret); 418 } 419 420 list_del(&fence->link); 421 return fence; 422} 423 424/** 425 * i915_unreserve_fence - Reclaim a reserved fence 426 * @fence: the fence reg 427 * 428 * This function add a reserved fence register from vGPU to the fence_list. 429 */ 430void i915_unreserve_fence(struct drm_i915_fence_reg *fence) 431{ 432 lockdep_assert_held(&fence->i915->drm.struct_mutex); 433 434 list_add(&fence->link, &fence->i915->mm.fence_list); 435} 436 437/** 438 * i915_gem_revoke_fences - revoke fence state 439 * @dev_priv: i915 device private 440 * 441 * Removes all GTT mmappings via the fence registers. This forces any user 442 * of the fence to reacquire that fence before continuing with their access. 443 * One use is during GPU reset where the fence register is lost and we need to 444 * revoke concurrent userspace access via GTT mmaps until the hardware has been 445 * reset and the fence registers have been restored. 446 */ 447void i915_gem_revoke_fences(struct drm_i915_private *dev_priv) 448{ 449 int i; 450 451 lockdep_assert_held(&dev_priv->drm.struct_mutex); 452 453 for (i = 0; i < dev_priv->num_fence_regs; i++) { 454 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 455 456 GEM_BUG_ON(fence->vma && fence->vma->fence != fence); 457 458 if (fence->vma) 459 i915_vma_revoke_mmap(fence->vma); 460 } 461} 462 463/** 464 * i915_gem_restore_fences - restore fence state 465 * @dev_priv: i915 device private 466 * 467 * Restore the hw fence state to match the software tracking again, to be called 468 * after a gpu reset and on resume. Note that on runtime suspend we only cancel 469 * the fences, to be reacquired by the user later. 470 */ 471void i915_gem_restore_fences(struct drm_i915_private *dev_priv) 472{ 473 int i; 474 475 for (i = 0; i < dev_priv->num_fence_regs; i++) { 476 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 477 struct i915_vma *vma = reg->vma; 478 479 GEM_BUG_ON(vma && vma->fence != reg); 480 481 /* 482 * Commit delayed tiling changes if we have an object still 483 * attached to the fence, otherwise just clear the fence. 484 */ 485 if (vma && !i915_gem_object_is_tiled(vma->obj)) { 486 GEM_BUG_ON(!reg->dirty); 487 GEM_BUG_ON(i915_vma_has_userfault(vma)); 488 489 list_move(&reg->link, &dev_priv->mm.fence_list); 490 vma->fence = NULL; 491 vma = NULL; 492 } 493 494 fence_write(reg, vma); 495 reg->vma = vma; 496 } 497} 498 499/** 500 * DOC: tiling swizzling details 501 * 502 * The idea behind tiling is to increase cache hit rates by rearranging 503 * pixel data so that a group of pixel accesses are in the same cacheline. 504 * Performance improvement from doing this on the back/depth buffer are on 505 * the order of 30%. 506 * 507 * Intel architectures make this somewhat more complicated, though, by 508 * adjustments made to addressing of data when the memory is in interleaved 509 * mode (matched pairs of DIMMS) to improve memory bandwidth. 510 * For interleaved memory, the CPU sends every sequential 64 bytes 511 * to an alternate memory channel so it can get the bandwidth from both. 512 * 513 * The GPU also rearranges its accesses for increased bandwidth to interleaved 514 * memory, and it matches what the CPU does for non-tiled. However, when tiled 515 * it does it a little differently, since one walks addresses not just in the 516 * X direction but also Y. So, along with alternating channels when bit 517 * 6 of the address flips, it also alternates when other bits flip -- Bits 9 518 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) 519 * are common to both the 915 and 965-class hardware. 520 * 521 * The CPU also sometimes XORs in higher bits as well, to improve 522 * bandwidth doing strided access like we do so frequently in graphics. This 523 * is called "Channel XOR Randomization" in the MCH documentation. The result 524 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address 525 * decode. 526 * 527 * All of this bit 6 XORing has an effect on our memory management, 528 * as we need to make sure that the 3d driver can correctly address object 529 * contents. 530 * 531 * If we don't have interleaved memory, all tiling is safe and no swizzling is 532 * required. 533 * 534 * When bit 17 is XORed in, we simply refuse to tile at all. Bit 535 * 17 is not just a page offset, so as we page an object out and back in, 536 * individual pages in it will have different bit 17 addresses, resulting in 537 * each 64 bytes being swapped with its neighbor! 538 * 539 * Otherwise, if interleaved, we have to tell the 3d driver what the address 540 * swizzling it needs to do is, since it's writing with the CPU to the pages 541 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the 542 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling 543 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order 544 * to match what the GPU expects. 545 */ 546 547/** 548 * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern 549 * @dev_priv: i915 device private 550 * 551 * Detects bit 6 swizzling of address lookup between IGD access and CPU 552 * access through main memory. 553 */ 554void 555i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv) 556{ 557 uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 558 uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 559 560 if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { 561 /* 562 * On BDW+, swizzling is not used. We leave the CPU memory 563 * controller in charge of optimizing memory accesses without 564 * the extra address manipulation GPU side. 565 * 566 * VLV and CHV don't have GPU swizzling. 567 */ 568 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 569 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 570 } else if (INTEL_GEN(dev_priv) >= 6) { 571 if (dev_priv->preserve_bios_swizzle) { 572 if (I915_READ(DISP_ARB_CTL) & 573 DISP_TILE_SURFACE_SWIZZLING) { 574 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 575 swizzle_y = I915_BIT_6_SWIZZLE_9; 576 } else { 577 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 578 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 579 } 580 } else { 581 uint32_t dimm_c0, dimm_c1; 582 dimm_c0 = I915_READ(MAD_DIMM_C0); 583 dimm_c1 = I915_READ(MAD_DIMM_C1); 584 dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; 585 dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; 586 /* Enable swizzling when the channels are populated 587 * with identically sized dimms. We don't need to check 588 * the 3rd channel because no cpu with gpu attached 589 * ships in that configuration. Also, swizzling only 590 * makes sense for 2 channels anyway. */ 591 if (dimm_c0 == dimm_c1) { 592 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 593 swizzle_y = I915_BIT_6_SWIZZLE_9; 594 } else { 595 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 596 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 597 } 598 } 599 } else if (IS_GEN5(dev_priv)) { 600 /* On Ironlake whatever DRAM config, GPU always do 601 * same swizzling setup. 602 */ 603 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 604 swizzle_y = I915_BIT_6_SWIZZLE_9; 605 } else if (IS_GEN2(dev_priv)) { 606 /* As far as we know, the 865 doesn't have these bit 6 607 * swizzling issues. 608 */ 609 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 610 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 611 } else if (IS_MOBILE(dev_priv) || 612 IS_I915G(dev_priv) || IS_I945G(dev_priv)) { 613 uint32_t dcc; 614 615 /* On 9xx chipsets, channel interleave by the CPU is 616 * determined by DCC. For single-channel, neither the CPU 617 * nor the GPU do swizzling. For dual channel interleaved, 618 * the GPU's interleave is bit 9 and 10 for X tiled, and bit 619 * 9 for Y tiled. The CPU's interleave is independent, and 620 * can be based on either bit 11 (haven't seen this yet) or 621 * bit 17 (common). 622 */ 623 dcc = I915_READ(DCC); 624 switch (dcc & DCC_ADDRESSING_MODE_MASK) { 625 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: 626 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: 627 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 628 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 629 break; 630 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: 631 if (dcc & DCC_CHANNEL_XOR_DISABLE) { 632 /* This is the base swizzling by the GPU for 633 * tiled buffers. 634 */ 635 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 636 swizzle_y = I915_BIT_6_SWIZZLE_9; 637 } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { 638 /* Bit 11 swizzling by the CPU in addition. */ 639 swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; 640 swizzle_y = I915_BIT_6_SWIZZLE_9_11; 641 } else { 642 /* Bit 17 swizzling by the CPU in addition. */ 643 swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; 644 swizzle_y = I915_BIT_6_SWIZZLE_9_17; 645 } 646 break; 647 } 648 649 /* check for L-shaped memory aka modified enhanced addressing */ 650 if (IS_GEN4(dev_priv) && 651 !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { 652 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 653 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 654 } 655 656 if (dcc == 0xffffffff) { 657 DRM_ERROR("Couldn't read from MCHBAR. " 658 "Disabling tiling.\n"); 659 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 660 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 661 } 662 } else { 663 /* The 965, G33, and newer, have a very flexible memory 664 * configuration. It will enable dual-channel mode 665 * (interleaving) on as much memory as it can, and the GPU 666 * will additionally sometimes enable different bit 6 667 * swizzling for tiled objects from the CPU. 668 * 669 * Here's what I found on the G965: 670 * slot fill memory size swizzling 671 * 0A 0B 1A 1B 1-ch 2-ch 672 * 512 0 0 0 512 0 O 673 * 512 0 512 0 16 1008 X 674 * 512 0 0 512 16 1008 X 675 * 0 512 0 512 16 1008 X 676 * 1024 1024 1024 0 2048 1024 O 677 * 678 * We could probably detect this based on either the DRB 679 * matching, which was the case for the swizzling required in 680 * the table above, or from the 1-ch value being less than 681 * the minimum size of a rank. 682 * 683 * Reports indicate that the swizzling actually 684 * varies depending upon page placement inside the 685 * channels, i.e. we see swizzled pages where the 686 * banks of memory are paired and unswizzled on the 687 * uneven portion, so leave that as unknown. 688 */ 689 if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) { 690 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 691 swizzle_y = I915_BIT_6_SWIZZLE_9; 692 } 693 } 694 695 if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || 696 swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { 697 /* Userspace likes to explode if it sees unknown swizzling, 698 * so lie. We will finish the lie when reporting through 699 * the get-tiling-ioctl by reporting the physical swizzle 700 * mode as unknown instead. 701 * 702 * As we don't strictly know what the swizzling is, it may be 703 * bit17 dependent, and so we need to also prevent the pages 704 * from being moved. 705 */ 706 dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; 707 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 708 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 709 } 710 711 dev_priv->mm.bit_6_swizzle_x = swizzle_x; 712 dev_priv->mm.bit_6_swizzle_y = swizzle_y; 713} 714 715/* 716 * Swap every 64 bytes of this page around, to account for it having a new 717 * bit 17 of its physical address and therefore being interpreted differently 718 * by the GPU. 719 */ 720static void 721i915_gem_swizzle_page(struct page *page) 722{ 723 char temp[64]; 724 char *vaddr; 725 int i; 726 727 vaddr = kmap(page); 728 729 for (i = 0; i < PAGE_SIZE; i += 128) { 730 memcpy(temp, &vaddr[i], 64); 731 memcpy(&vaddr[i], &vaddr[i + 64], 64); 732 memcpy(&vaddr[i + 64], temp, 64); 733 } 734 735 kunmap(page); 736} 737 738/** 739 * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling 740 * @obj: i915 GEM buffer object 741 * @pages: the scattergather list of physical pages 742 * 743 * This function fixes up the swizzling in case any page frame number for this 744 * object has changed in bit 17 since that state has been saved with 745 * i915_gem_object_save_bit_17_swizzle(). 746 * 747 * This is called when pinning backing storage again, since the kernel is free 748 * to move unpinned backing storage around (either by directly moving pages or 749 * by swapping them out and back in again). 750 */ 751void 752i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, 753 struct sg_table *pages) 754{ 755 struct sgt_iter sgt_iter; 756 struct page *page; 757 int i; 758 759 if (obj->bit_17 == NULL) 760 return; 761 762 i = 0; 763 for_each_sgt_page(page, sgt_iter, pages) { 764 char new_bit_17 = page_to_phys(page) >> 17; 765 if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { 766 i915_gem_swizzle_page(page); 767 set_page_dirty(page); 768 } 769 i++; 770 } 771} 772 773/** 774 * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling 775 * @obj: i915 GEM buffer object 776 * @pages: the scattergather list of physical pages 777 * 778 * This function saves the bit 17 of each page frame number so that swizzling 779 * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must 780 * be called before the backing storage can be unpinned. 781 */ 782void 783i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, 784 struct sg_table *pages) 785{ 786 const unsigned int page_count = obj->base.size >> PAGE_SHIFT; 787 struct sgt_iter sgt_iter; 788 struct page *page; 789 int i; 790 791 if (obj->bit_17 == NULL) { 792 obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count), 793 sizeof(long), GFP_KERNEL); 794 if (obj->bit_17 == NULL) { 795 DRM_ERROR("Failed to allocate memory for bit 17 " 796 "record\n"); 797 return; 798 } 799 } 800 801 i = 0; 802 803 for_each_sgt_page(page, sgt_iter, pages) { 804 if (page_to_phys(page) & (1 << 17)) 805 __set_bit(i, obj->bit_17); 806 else 807 __clear_bit(i, obj->bit_17); 808 i++; 809 } 810}