Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/radeon/kms: add a CS ioctl flag not to rewrite tiling flags in the CS

This adds a new optional chunk to the CS ioctl that specifies optional flags
to the CS parser. Why this is useful is explained below. Note that some regs
no longer need the NOP relocation packet if this feature is enabled.
Tested on r300g and r600g with this flag disabled and enabled.

Assume there are two contexts sharing the same mipmapped tiled texture.
One context wants to render into the first mipmap and the other one
wants to render into the last mipmap. As you probably know, the hardware
has a MACRO_SWITCH feature, which turns off macro tiling for small mipmaps,
but that only applies to samplers.
(at least on r300-r500, though later hardware likely behaves the same)

So we want to just re-set the tiling flags before rendering (writing
packets), right? ... No. The contexts run in parallel, so they may
set the tiling flags simultaneously and then fire their command streams
also simultaneously. The last one setting the flags wins, the other one
loses.

Another problem is when one context wants to render into the first and
the last mipmap in one CS. Impossible. It must flush before changing
tiling flags and do the rendering into the smaller mipmaps in another CS.

Yet another problem is that writing copy_blit in userspace would be a mess
involving re-setting tiling flags to please the kernel, and causing races
with other contexts at the same time.

The only way out of this is to send tiling flags with each CS, ideally
with each relocation. But we already do that through the registers.
So let's just use what we have in the registers.

Signed-off-by: Marek Olšák <maraeo@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

authored by

Marek Olšák and committed by
Dave Airlie
e70f224c 6991b8f2

+136 -99
+50 -42
drivers/gpu/drm/radeon/evergreen_cs.c
··· 480 480 } 481 481 break; 482 482 case DB_Z_INFO: 483 - r = evergreen_cs_packet_next_reloc(p, &reloc); 484 - if (r) { 485 - dev_warn(p->dev, "bad SET_CONTEXT_REG " 486 - "0x%04X\n", reg); 487 - return -EINVAL; 488 - } 489 483 track->db_z_info = radeon_get_ib_value(p, idx); 490 - ib[idx] &= ~Z_ARRAY_MODE(0xf); 491 - track->db_z_info &= ~Z_ARRAY_MODE(0xf); 492 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 493 - ib[idx] |= Z_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 494 - track->db_z_info |= Z_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 495 - } else { 496 - ib[idx] |= Z_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 497 - track->db_z_info |= Z_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 484 + if (!p->keep_tiling_flags) { 485 + r = evergreen_cs_packet_next_reloc(p, &reloc); 486 + if (r) { 487 + dev_warn(p->dev, "bad SET_CONTEXT_REG " 488 + "0x%04X\n", reg); 489 + return -EINVAL; 490 + } 491 + ib[idx] &= ~Z_ARRAY_MODE(0xf); 492 + track->db_z_info &= ~Z_ARRAY_MODE(0xf); 493 + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 494 + ib[idx] |= Z_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 495 + track->db_z_info |= Z_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 496 + } else { 497 + ib[idx] |= Z_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 498 + track->db_z_info |= Z_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 499 + } 498 500 } 499 501 break; 500 502 case DB_STENCIL_INFO: ··· 609 607 case CB_COLOR5_INFO: 610 608 case CB_COLOR6_INFO: 611 609 case CB_COLOR7_INFO: 612 - r = evergreen_cs_packet_next_reloc(p, &reloc); 613 - if (r) { 614 - dev_warn(p->dev, "bad SET_CONTEXT_REG " 615 - "0x%04X\n", reg); 616 - return -EINVAL; 617 - } 618 610 tmp = (reg - CB_COLOR0_INFO) / 0x3c; 619 611 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); 620 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 621 - ib[idx] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 622 - track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 623 - } else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { 624 - ib[idx] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 625 - track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 612 + if (!p->keep_tiling_flags) { 613 + r = evergreen_cs_packet_next_reloc(p, &reloc); 614 + if (r) { 615 + dev_warn(p->dev, "bad SET_CONTEXT_REG " 616 + "0x%04X\n", reg); 617 + return -EINVAL; 618 + } 619 + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 620 + ib[idx] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 621 + track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 622 + } else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { 623 + ib[idx] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 624 + track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 625 + } 626 626 } 627 627 break; 628 628 case CB_COLOR8_INFO: 629 629 case CB_COLOR9_INFO: 630 630 case CB_COLOR10_INFO: 631 631 case CB_COLOR11_INFO: 632 - r = evergreen_cs_packet_next_reloc(p, &reloc); 633 - if (r) { 634 - dev_warn(p->dev, "bad SET_CONTEXT_REG " 635 - "0x%04X\n", reg); 636 - return -EINVAL; 637 - } 638 632 tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8; 639 633 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); 640 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 641 - ib[idx] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 642 - track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 643 - } else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { 644 - ib[idx] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 645 - track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 634 + if (!p->keep_tiling_flags) { 635 + r = evergreen_cs_packet_next_reloc(p, &reloc); 636 + if (r) { 637 + dev_warn(p->dev, "bad SET_CONTEXT_REG " 638 + "0x%04X\n", reg); 639 + return -EINVAL; 640 + } 641 + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 642 + ib[idx] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 643 + track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 644 + } else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { 645 + ib[idx] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 646 + track->cb_color_info[tmp] |= CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 647 + } 646 648 } 647 649 break; 648 650 case CB_COLOR0_PITCH: ··· 1317 1311 return -EINVAL; 1318 1312 } 1319 1313 ib[idx+1+(i*8)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1320 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1321 - ib[idx+1+(i*8)+1] |= TEX_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 1322 - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1323 - ib[idx+1+(i*8)+1] |= TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 1314 + if (!p->keep_tiling_flags) { 1315 + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1316 + ib[idx+1+(i*8)+1] |= TEX_ARRAY_MODE(ARRAY_2D_TILED_THIN1); 1317 + else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1318 + ib[idx+1+(i*8)+1] |= TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 1319 + } 1324 1320 texture = reloc->robj; 1325 1321 /* tex mip base */ 1326 1322 r = evergreen_cs_packet_next_reloc(p, &reloc);
+52 -44
drivers/gpu/drm/radeon/r300.c
··· 701 701 return r; 702 702 } 703 703 704 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 705 - tile_flags |= R300_TXO_MACRO_TILE; 706 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 707 - tile_flags |= R300_TXO_MICRO_TILE; 708 - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) 709 - tile_flags |= R300_TXO_MICRO_TILE_SQUARE; 704 + if (p->keep_tiling_flags) { 705 + ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */ 706 + ((idx_value & ~31) + (u32)reloc->lobj.gpu_offset); 707 + } else { 708 + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 709 + tile_flags |= R300_TXO_MACRO_TILE; 710 + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 711 + tile_flags |= R300_TXO_MICRO_TILE; 712 + else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) 713 + tile_flags |= R300_TXO_MICRO_TILE_SQUARE; 710 714 711 - tmp = idx_value + ((u32)reloc->lobj.gpu_offset); 712 - tmp |= tile_flags; 713 - ib[idx] = tmp; 715 + tmp = idx_value + ((u32)reloc->lobj.gpu_offset); 716 + tmp |= tile_flags; 717 + ib[idx] = tmp; 718 + } 714 719 track->textures[i].robj = reloc->robj; 715 720 track->tex_dirty = true; 716 721 break; ··· 765 760 /* RB3D_COLORPITCH1 */ 766 761 /* RB3D_COLORPITCH2 */ 767 762 /* RB3D_COLORPITCH3 */ 768 - r = r100_cs_packet_next_reloc(p, &reloc); 769 - if (r) { 770 - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 771 - idx, reg); 772 - r100_cs_dump_packet(p, pkt); 773 - return r; 763 + if (!p->keep_tiling_flags) { 764 + r = r100_cs_packet_next_reloc(p, &reloc); 765 + if (r) { 766 + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 767 + idx, reg); 768 + r100_cs_dump_packet(p, pkt); 769 + return r; 770 + } 771 + 772 + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 773 + tile_flags |= R300_COLOR_TILE_ENABLE; 774 + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 775 + tile_flags |= R300_COLOR_MICROTILE_ENABLE; 776 + else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) 777 + tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE; 778 + 779 + tmp = idx_value & ~(0x7 << 16); 780 + tmp |= tile_flags; 781 + ib[idx] = tmp; 774 782 } 775 - 776 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 777 - tile_flags |= R300_COLOR_TILE_ENABLE; 778 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 779 - tile_flags |= R300_COLOR_MICROTILE_ENABLE; 780 - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) 781 - tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE; 782 - 783 - tmp = idx_value & ~(0x7 << 16); 784 - tmp |= tile_flags; 785 - ib[idx] = tmp; 786 783 i = (reg - 0x4E38) >> 2; 787 784 track->cb[i].pitch = idx_value & 0x3FFE; 788 785 switch (((idx_value >> 21) & 0xF)) { ··· 850 843 break; 851 844 case 0x4F24: 852 845 /* ZB_DEPTHPITCH */ 853 - r = r100_cs_packet_next_reloc(p, &reloc); 854 - if (r) { 855 - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 856 - idx, reg); 857 - r100_cs_dump_packet(p, pkt); 858 - return r; 846 + if (!p->keep_tiling_flags) { 847 + r = r100_cs_packet_next_reloc(p, &reloc); 848 + if (r) { 849 + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 850 + idx, reg); 851 + r100_cs_dump_packet(p, pkt); 852 + return r; 853 + } 854 + 855 + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 856 + tile_flags |= R300_DEPTHMACROTILE_ENABLE; 857 + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 858 + tile_flags |= R300_DEPTHMICROTILE_TILED; 859 + else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) 860 + tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE; 861 + 862 + tmp = idx_value & ~(0x7 << 16); 863 + tmp |= tile_flags; 864 + ib[idx] = tmp; 859 865 } 860 - 861 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 862 - tile_flags |= R300_DEPTHMACROTILE_ENABLE; 863 - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 864 - tile_flags |= R300_DEPTHMICROTILE_TILED; 865 - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) 866 - tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE; 867 - 868 - tmp = idx_value & ~(0x7 << 16); 869 - tmp |= tile_flags; 870 - ib[idx] = tmp; 871 - 872 866 track->zb.pitch = idx_value & 0x3FFC; 873 867 track->zb_dirty = true; 874 868 break;
+16 -10
drivers/gpu/drm/radeon/r600_cs.c
··· 941 941 track->db_depth_control = radeon_get_ib_value(p, idx); 942 942 break; 943 943 case R_028010_DB_DEPTH_INFO: 944 - if (r600_cs_packet_next_is_pkt3_nop(p)) { 944 + if (!p->keep_tiling_flags && 945 + r600_cs_packet_next_is_pkt3_nop(p)) { 945 946 r = r600_cs_packet_next_reloc(p, &reloc); 946 947 if (r) { 947 948 dev_warn(p->dev, "bad SET_CONTEXT_REG " ··· 993 992 case R_0280B4_CB_COLOR5_INFO: 994 993 case R_0280B8_CB_COLOR6_INFO: 995 994 case R_0280BC_CB_COLOR7_INFO: 996 - if (r600_cs_packet_next_is_pkt3_nop(p)) { 995 + if (!p->keep_tiling_flags && 996 + r600_cs_packet_next_is_pkt3_nop(p)) { 997 997 r = r600_cs_packet_next_reloc(p, &reloc); 998 998 if (r) { 999 999 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); ··· 1293 1291 mip_offset <<= 8; 1294 1292 1295 1293 word0 = radeon_get_ib_value(p, idx + 0); 1296 - if (tiling_flags & RADEON_TILING_MACRO) 1297 - word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); 1298 - else if (tiling_flags & RADEON_TILING_MICRO) 1299 - word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); 1294 + if (!p->keep_tiling_flags) { 1295 + if (tiling_flags & RADEON_TILING_MACRO) 1296 + word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); 1297 + else if (tiling_flags & RADEON_TILING_MICRO) 1298 + word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); 1299 + } 1300 1300 word1 = radeon_get_ib_value(p, idx + 1); 1301 1301 w0 = G_038000_TEX_WIDTH(word0) + 1; 1302 1302 h0 = G_038004_TEX_HEIGHT(word1) + 1; ··· 1625 1621 return -EINVAL; 1626 1622 } 1627 1623 base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1628 - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1629 - ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); 1630 - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1631 - ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); 1624 + if (!p->keep_tiling_flags) { 1625 + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1626 + ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); 1627 + else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1628 + ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); 1629 + } 1632 1630 texture = reloc->robj; 1633 1631 /* tex mip base */ 1634 1632 r = r600_cs_packet_next_reloc(p, &reloc);
+2 -1
drivers/gpu/drm/radeon/radeon.h
··· 611 611 struct radeon_ib *ib; 612 612 void *track; 613 613 unsigned family; 614 - int parser_error; 614 + int parser_error; 615 + bool keep_tiling_flags; 615 616 }; 616 617 617 618 extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx);
+10 -1
drivers/gpu/drm/radeon/radeon_cs.c
··· 93 93 { 94 94 struct drm_radeon_cs *cs = data; 95 95 uint64_t *chunk_array_ptr; 96 - unsigned size, i; 96 + unsigned size, i, flags = 0; 97 97 98 98 if (!cs->num_chunks) { 99 99 return 0; ··· 140 140 if (p->chunks[i].length_dw == 0) 141 141 return -EINVAL; 142 142 } 143 + if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS && 144 + !p->chunks[i].length_dw) { 145 + return -EINVAL; 146 + } 143 147 144 148 p->chunks[i].length_dw = user_chunk.length_dw; 145 149 p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; ··· 158 154 if (DRM_COPY_FROM_USER(p->chunks[i].kdata, 159 155 p->chunks[i].user_ptr, size)) { 160 156 return -EFAULT; 157 + } 158 + if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { 159 + flags = p->chunks[i].kdata[0]; 161 160 } 162 161 } else { 163 162 p->chunks[i].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); ··· 181 174 p->chunks[p->chunk_ib_idx].length_dw); 182 175 return -EINVAL; 183 176 } 177 + 178 + p->keep_tiling_flags = (flags & RADEON_CS_KEEP_TILING_FLAGS) != 0; 184 179 return 0; 185 180 } 186 181
+2 -1
drivers/gpu/drm/radeon/radeon_drv.c
··· 53 53 * 2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg, backend query 54 54 * 2.10.0 - fusion 2D tiling 55 55 * 2.11.0 - backend map, initial compute support for the CS checker 56 + * 2.12.0 - RADEON_CS_KEEP_TILING_FLAGS 56 57 */ 57 58 #define KMS_DRIVER_MAJOR 2 58 - #define KMS_DRIVER_MINOR 11 59 + #define KMS_DRIVER_MINOR 12 59 60 #define KMS_DRIVER_PATCHLEVEL 0 60 61 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); 61 62 int radeon_driver_unload_kms(struct drm_device *dev);
+4
include/drm/radeon_drm.h
··· 874 874 875 875 #define RADEON_CHUNK_ID_RELOCS 0x01 876 876 #define RADEON_CHUNK_ID_IB 0x02 877 + #define RADEON_CHUNK_ID_FLAGS 0x03 878 + 879 + /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */ 880 + #define RADEON_CS_KEEP_TILING_FLAGS 0x01 877 881 878 882 struct drm_radeon_cs_chunk { 879 883 uint32_t chunk_id;