Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/nv20: Add Z compression support.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Tested-by: Xavier Chantry <chantry.xavier@gmail.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>

authored by

Francisco Jerez and committed by
Ben Skeggs
87a326a3 a5cf68b0

+106 -14
+3
drivers/gpu/drm/nouveau/nouveau_drv.h
··· 70 70 uint32_t addr; 71 71 uint32_t limit; 72 72 uint32_t pitch; 73 + uint32_t zcomp; 74 + struct drm_mm_node *tag_mem; 73 75 struct nouveau_fence *fence; 74 76 }; 75 77 ··· 308 306 309 307 struct nouveau_fb_engine { 310 308 int num_tiles; 309 + struct drm_mm tag_heap; 311 310 312 311 int (*init)(struct drm_device *dev); 313 312 void (*takedown)(struct drm_device *dev);
+6
drivers/gpu/drm/nouveau/nouveau_reg.h
··· 45 45 # define NV04_PFB_REF_CMD_REFRESH (1 << 0) 46 46 #define NV04_PFB_PRE 0x001002d4 47 47 # define NV04_PFB_PRE_CMD_PRECHARGE (1 << 0) 48 + #define NV20_PFB_ZCOMP(i) (0x00100300 + 4*(i)) 49 + # define NV20_PFB_ZCOMP_MODE_32 (4 << 24) 50 + # define NV20_PFB_ZCOMP_EN (1 << 31) 51 + # define NV25_PFB_ZCOMP_MODE_16 (1 << 20) 52 + # define NV25_PFB_ZCOMP_MODE_32 (2 << 20) 48 53 #define NV10_PFB_CLOSE_PAGE2 0x0010033c 49 54 #define NV04_PFB_SCRAMBLE(i) (0x00100400 + 4 * (i)) 50 55 #define NV40_PFB_TILE(i) (0x00100600 + (i*16)) ··· 384 379 #define NV20_PGRAPH_TLIMIT(i) (0x00400904 + (i*16)) 385 380 #define NV20_PGRAPH_TSIZE(i) (0x00400908 + (i*16)) 386 381 #define NV20_PGRAPH_TSTATUS(i) (0x0040090C + (i*16)) 382 + #define NV20_PGRAPH_ZCOMP(i) (0x00400980 + 4*(i)) 387 383 #define NV10_PGRAPH_TILE(i) (0x00400B00 + (i*16)) 388 384 #define NV10_PGRAPH_TLIMIT(i) (0x00400B04 + (i*16)) 389 385 #define NV10_PGRAPH_TSIZE(i) (0x00400B08 + (i*16))
+86 -4
drivers/gpu/drm/nouveau/nv10_fb.c
··· 3 3 #include "nouveau_drv.h" 4 4 #include "nouveau_drm.h" 5 5 6 + static struct drm_mm_node * 7 + nv20_fb_alloc_tag(struct drm_device *dev, uint32_t size) 8 + { 9 + struct drm_nouveau_private *dev_priv = dev->dev_private; 10 + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; 11 + struct drm_mm_node *mem; 12 + int ret; 13 + 14 + ret = drm_mm_pre_get(&pfb->tag_heap); 15 + if (ret) 16 + return NULL; 17 + 18 + spin_lock(&dev_priv->tile.lock); 19 + mem = drm_mm_search_free(&pfb->tag_heap, size, 0, 0); 20 + if (mem) 21 + mem = drm_mm_get_block_atomic(mem, size, 0); 22 + spin_unlock(&dev_priv->tile.lock); 23 + 24 + return mem; 25 + } 26 + 27 + static void 28 + nv20_fb_free_tag(struct drm_device *dev, struct drm_mm_node *mem) 29 + { 30 + struct drm_nouveau_private *dev_priv = dev->dev_private; 31 + 32 + spin_lock(&dev_priv->tile.lock); 33 + drm_mm_put_block(mem); 34 + spin_unlock(&dev_priv->tile.lock); 35 + } 36 + 6 37 void 7 38 nv10_fb_init_tile_region(struct drm_device *dev, int i, uint32_t addr, 8 39 uint32_t size, uint32_t pitch, uint32_t flags) 9 40 { 10 41 struct drm_nouveau_private *dev_priv = dev->dev_private; 11 42 struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i]; 43 + int bpp = (flags & NOUVEAU_GEM_TILE_32BPP ? 32 : 16); 12 44 13 45 tile->addr = addr; 14 46 tile->limit = max(1u, addr + size) - 1; 15 47 tile->pitch = pitch; 16 48 17 - if (dev_priv->card_type == NV_20) 18 - tile->addr |= 1; 19 - else 49 + if (dev_priv->card_type == NV_20) { 50 + if (flags & NOUVEAU_GEM_TILE_ZETA) { 51 + /* 52 + * Allocate some of the on-die tag memory, 53 + * used to store Z compression meta-data (most 54 + * likely just a bitmap determining if a given 55 + * tile is compressed or not). 56 + */ 57 + tile->tag_mem = nv20_fb_alloc_tag(dev, size / 256); 58 + 59 + if (tile->tag_mem) { 60 + /* Enable Z compression */ 61 + if (dev_priv->chipset >= 0x25) 62 + tile->zcomp = tile->tag_mem->start | 63 + (bpp == 16 ? 64 + NV25_PFB_ZCOMP_MODE_16 : 65 + NV25_PFB_ZCOMP_MODE_32); 66 + else 67 + tile->zcomp = tile->tag_mem->start | 68 + NV20_PFB_ZCOMP_EN | 69 + (bpp == 16 ? 0 : 70 + NV20_PFB_ZCOMP_MODE_32); 71 + } 72 + 73 + tile->addr |= 3; 74 + } else { 75 + tile->addr |= 1; 76 + } 77 + 78 + } else { 20 79 tile->addr |= 1 << 31; 80 + } 21 81 } 22 82 23 83 void ··· 86 26 struct drm_nouveau_private *dev_priv = dev->dev_private; 87 27 struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i]; 88 28 89 - tile->addr = tile->limit = tile->pitch = 0; 29 + if (tile->tag_mem) { 30 + nv20_fb_free_tag(dev, tile->tag_mem); 31 + tile->tag_mem = NULL; 32 + } 33 + 34 + tile->addr = tile->limit = tile->pitch = tile->zcomp = 0; 90 35 } 91 36 92 37 void ··· 103 38 nv_wr32(dev, NV10_PFB_TLIMIT(i), tile->limit); 104 39 nv_wr32(dev, NV10_PFB_TSIZE(i), tile->pitch); 105 40 nv_wr32(dev, NV10_PFB_TILE(i), tile->addr); 41 + 42 + if (dev_priv->card_type == NV_20) 43 + nv_wr32(dev, NV20_PFB_ZCOMP(i), tile->zcomp); 106 44 } 107 45 108 46 int ··· 117 49 118 50 pfb->num_tiles = NV10_PFB_TILE__SIZE; 119 51 52 + if (dev_priv->card_type == NV_20) 53 + drm_mm_init(&pfb->tag_heap, 0, 54 + (dev_priv->chipset >= 0x25 ? 55 + 64 * 1024 : 32 * 1024)); 56 + 120 57 /* Turn all the tiling regions off. */ 121 58 for (i = 0; i < pfb->num_tiles; i++) 122 59 pfb->set_tile_region(dev, i); ··· 132 59 void 133 60 nv10_fb_takedown(struct drm_device *dev) 134 61 { 62 + struct drm_nouveau_private *dev_priv = dev->dev_private; 63 + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; 64 + int i; 65 + 66 + for (i = 0; i < pfb->num_tiles; i++) 67 + pfb->free_tile_region(dev, i); 68 + 69 + if (dev_priv->card_type == NV_20) 70 + drm_mm_takedown(&pfb->tag_heap); 135 71 }
+11 -10
drivers/gpu/drm/nouveau/nv20_graph.c
··· 526 526 nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->pitch); 527 527 nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i); 528 528 nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->addr); 529 + 530 + if (dev_priv->card_type == NV_20) { 531 + nv_wr32(dev, NV20_PGRAPH_ZCOMP(i), tile->zcomp); 532 + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00ea0090 + 4 * i); 533 + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->zcomp); 534 + } 529 535 } 530 536 531 537 int ··· 595 589 nv_wr32(dev, 0x40009C , 0x00000040); 596 590 597 591 if (dev_priv->chipset >= 0x25) { 598 - nv_wr32(dev, 0x400890, 0x00080000); 592 + nv_wr32(dev, 0x400890, 0x00a8cfff); 599 593 nv_wr32(dev, 0x400610, 0x304B1FB6); 600 - nv_wr32(dev, 0x400B80, 0x18B82880); 594 + nv_wr32(dev, 0x400B80, 0x1cbd3883); 601 595 nv_wr32(dev, 0x400B84, 0x44000000); 602 596 nv_wr32(dev, 0x400098, 0x40000080); 603 597 nv_wr32(dev, 0x400B88, 0x000000ff); 598 + 604 599 } else { 605 - nv_wr32(dev, 0x400880, 0x00080000); /* 0x0008c7df */ 600 + nv_wr32(dev, 0x400880, 0x0008c7df); 606 601 nv_wr32(dev, 0x400094, 0x00000005); 607 - nv_wr32(dev, 0x400B80, 0x45CAA208); /* 0x45eae20e */ 602 + nv_wr32(dev, 0x400B80, 0x45eae20e); 608 603 nv_wr32(dev, 0x400B84, 0x24000000); 609 604 nv_wr32(dev, 0x400098, 0x00000040); 610 605 nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00E00038); ··· 618 611 for (i = 0; i < NV10_PFB_TILE__SIZE; i++) 619 612 nv20_graph_set_tile_region(dev, i); 620 613 621 - for (i = 0; i < 8; i++) { 622 - nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 + i * 4)); 623 - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4); 624 - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, 625 - nv_rd32(dev, 0x100300 + i * 4)); 626 - } 627 614 nv_wr32(dev, 0x4009a0, nv_rd32(dev, 0x100324)); 628 615 nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA000C); 629 616 nv_wr32(dev, NV10_PGRAPH_RDI_DATA, nv_rd32(dev, 0x100324));