Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/radeon/kms: remove r6xx+ blit copy routines

No longer used now that we use the async dma engines or
CP DMA for bo copies.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+33 -1764
+2 -2
drivers/gpu/drm/radeon/Makefile
··· 72 72 radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \ 73 73 rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \ 74 74 r200.o radeon_legacy_tv.o r600_cs.o r600_blit_shaders.o \ 75 - r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \ 76 - evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \ 75 + radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \ 76 + evergreen.o evergreen_cs.o evergreen_blit_shaders.o \ 77 77 evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ 78 78 atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ 79 79 si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o cik_blit_shaders.o \
-54
drivers/gpu/drm/radeon/cayman_blit_shaders.c
··· 317 317 0x00000010, /* */ 318 318 }; 319 319 320 - const u32 cayman_vs[] = 321 - { 322 - 0x00000004, 323 - 0x80400400, 324 - 0x0000a03c, 325 - 0x95000688, 326 - 0x00004000, 327 - 0x15000688, 328 - 0x00000000, 329 - 0x88000000, 330 - 0x04000000, 331 - 0x67961001, 332 - #ifdef __BIG_ENDIAN 333 - 0x00020000, 334 - #else 335 - 0x00000000, 336 - #endif 337 - 0x00000000, 338 - 0x04000000, 339 - 0x67961000, 340 - #ifdef __BIG_ENDIAN 341 - 0x00020008, 342 - #else 343 - 0x00000008, 344 - #endif 345 - 0x00000000, 346 - }; 347 - 348 - const u32 cayman_ps[] = 349 - { 350 - 0x00000004, 351 - 0xa00c0000, 352 - 0x00000008, 353 - 0x80400000, 354 - 0x00000000, 355 - 0x95000688, 356 - 0x00000000, 357 - 0x88000000, 358 - 0x00380400, 359 - 0x00146b10, 360 - 0x00380000, 361 - 0x20146b10, 362 - 0x00380400, 363 - 0x40146b00, 364 - 0x80380000, 365 - 0x60146b00, 366 - 0x00000010, 367 - 0x000d1000, 368 - 0xb0800000, 369 - 0x00000000, 370 - }; 371 - 372 - const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps); 373 - const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs); 374 320 const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
-8
drivers/gpu/drm/radeon/evergreen.c
··· 5144 5144 } 5145 5145 evergreen_gpu_init(rdev); 5146 5146 5147 - r = evergreen_blit_init(rdev); 5148 - if (r) { 5149 - r600_blit_fini(rdev); 5150 - rdev->asic->copy.copy = NULL; 5151 - dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); 5152 - } 5153 - 5154 5147 /* allocate rlc buffers */ 5155 5148 if (rdev->flags & RADEON_IS_IGP) { 5156 5149 rdev->rlc.reg_list = sumo_rlc_save_restore_register_list; ··· 5413 5420 void evergreen_fini(struct radeon_device *rdev) 5414 5421 { 5415 5422 r600_audio_fini(rdev); 5416 - r600_blit_fini(rdev); 5417 5423 r700_cp_fini(rdev); 5418 5424 r600_dma_fini(rdev); 5419 5425 r600_irq_fini(rdev);
-729
drivers/gpu/drm/radeon/evergreen_blit_kms.c
··· 1 - /* 2 - * Copyright 2010 Advanced Micro Devices, Inc. 3 - * 4 - * Permission is hereby granted, free of charge, to any person obtaining a 5 - * copy of this software and associated documentation files (the "Software"), 6 - * to deal in the Software without restriction, including without limitation 7 - * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 - * and/or sell copies of the Software, and to permit persons to whom the 9 - * Software is furnished to do so, subject to the following conditions: 10 - * 11 - * The above copyright notice and this permission notice (including the next 12 - * paragraph) shall be included in all copies or substantial portions of the 13 - * Software. 14 - * 15 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 - * DEALINGS IN THE SOFTWARE. 22 - * 23 - * Authors: 24 - * Alex Deucher <alexander.deucher@amd.com> 25 - */ 26 - 27 - #include <drm/drmP.h> 28 - #include <drm/radeon_drm.h> 29 - #include "radeon.h" 30 - 31 - #include "evergreend.h" 32 - #include "evergreen_blit_shaders.h" 33 - #include "cayman_blit_shaders.h" 34 - #include "radeon_blit_common.h" 35 - 36 - /* emits 17 */ 37 - static void 38 - set_render_target(struct radeon_device *rdev, int format, 39 - int w, int h, u64 gpu_addr) 40 - { 41 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 42 - u32 cb_color_info; 43 - int pitch, slice; 44 - 45 - h = ALIGN(h, 8); 46 - if (h < 8) 47 - h = 8; 48 - 49 - cb_color_info = CB_FORMAT(format) | 50 - CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) | 51 - CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 52 - pitch = (w / 8) - 1; 53 - slice = ((w * h) / 64) - 1; 54 - 55 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 15)); 56 - radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2); 57 - radeon_ring_write(ring, gpu_addr >> 8); 58 - radeon_ring_write(ring, pitch); 59 - radeon_ring_write(ring, slice); 60 - radeon_ring_write(ring, 0); 61 - radeon_ring_write(ring, cb_color_info); 62 - radeon_ring_write(ring, 0); 63 - radeon_ring_write(ring, (w - 1) | ((h - 1) << 16)); 64 - radeon_ring_write(ring, 0); 65 - radeon_ring_write(ring, 0); 66 - radeon_ring_write(ring, 0); 67 - radeon_ring_write(ring, 0); 68 - radeon_ring_write(ring, 0); 69 - radeon_ring_write(ring, 0); 70 - radeon_ring_write(ring, 0); 71 - radeon_ring_write(ring, 0); 72 - } 73 - 74 - /* emits 5dw */ 75 - static void 76 - cp_set_surface_sync(struct radeon_device *rdev, 77 - u32 sync_type, u32 size, 78 - u64 mc_addr) 79 - { 80 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 81 - u32 cp_coher_size; 82 - 83 - if (size == 0xffffffff) 84 - cp_coher_size = 0xffffffff; 85 - else 86 - cp_coher_size = ((size + 255) >> 8); 87 - 88 - if (rdev->family >= CHIP_CAYMAN) { 89 - /* CP_COHER_CNTL2 has to be set manually when submitting a surface_sync 90 - * to the RB directly. For IBs, the CP programs this as part of the 91 - * surface_sync packet. 92 - */ 93 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 94 - radeon_ring_write(ring, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2); 95 - radeon_ring_write(ring, 0); /* CP_COHER_CNTL2 */ 96 - } 97 - radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); 98 - radeon_ring_write(ring, sync_type); 99 - radeon_ring_write(ring, cp_coher_size); 100 - radeon_ring_write(ring, mc_addr >> 8); 101 - radeon_ring_write(ring, 10); /* poll interval */ 102 - } 103 - 104 - /* emits 11dw + 1 surface sync = 16dw */ 105 - static void 106 - set_shaders(struct radeon_device *rdev) 107 - { 108 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 109 - u64 gpu_addr; 110 - 111 - /* VS */ 112 - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; 113 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 3)); 114 - radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2); 115 - radeon_ring_write(ring, gpu_addr >> 8); 116 - radeon_ring_write(ring, 2); 117 - radeon_ring_write(ring, 0); 118 - 119 - /* PS */ 120 - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; 121 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 4)); 122 - radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2); 123 - radeon_ring_write(ring, gpu_addr >> 8); 124 - radeon_ring_write(ring, 1); 125 - radeon_ring_write(ring, 0); 126 - radeon_ring_write(ring, 2); 127 - 128 - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; 129 - cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); 130 - } 131 - 132 - /* emits 10 + 1 sync (5) = 15 */ 133 - static void 134 - set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) 135 - { 136 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 137 - u32 sq_vtx_constant_word2, sq_vtx_constant_word3; 138 - 139 - /* high addr, stride */ 140 - sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) | 141 - SQ_VTXC_STRIDE(16); 142 - #ifdef __BIG_ENDIAN 143 - sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32); 144 - #endif 145 - /* xyzw swizzles */ 146 - sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) | 147 - SQ_VTCX_SEL_Y(SQ_SEL_Y) | 148 - SQ_VTCX_SEL_Z(SQ_SEL_Z) | 149 - SQ_VTCX_SEL_W(SQ_SEL_W); 150 - 151 - radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8)); 152 - radeon_ring_write(ring, 0x580); 153 - radeon_ring_write(ring, gpu_addr & 0xffffffff); 154 - radeon_ring_write(ring, 48 - 1); /* size */ 155 - radeon_ring_write(ring, sq_vtx_constant_word2); 156 - radeon_ring_write(ring, sq_vtx_constant_word3); 157 - radeon_ring_write(ring, 0); 158 - radeon_ring_write(ring, 0); 159 - radeon_ring_write(ring, 0); 160 - radeon_ring_write(ring, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER)); 161 - 162 - if ((rdev->family == CHIP_CEDAR) || 163 - (rdev->family == CHIP_PALM) || 164 - (rdev->family == CHIP_SUMO) || 165 - (rdev->family == CHIP_SUMO2) || 166 - (rdev->family == CHIP_CAICOS)) 167 - cp_set_surface_sync(rdev, 168 - PACKET3_TC_ACTION_ENA, 48, gpu_addr); 169 - else 170 - cp_set_surface_sync(rdev, 171 - PACKET3_VC_ACTION_ENA, 48, gpu_addr); 172 - 173 - } 174 - 175 - /* emits 10 */ 176 - static void 177 - set_tex_resource(struct radeon_device *rdev, 178 - int format, int w, int h, int pitch, 179 - u64 gpu_addr, u32 size) 180 - { 181 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 182 - u32 sq_tex_resource_word0, sq_tex_resource_word1; 183 - u32 sq_tex_resource_word4, sq_tex_resource_word7; 184 - 185 - if (h < 1) 186 - h = 1; 187 - 188 - sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D); 189 - sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | 190 - ((w - 1) << 18)); 191 - sq_tex_resource_word1 = ((h - 1) << 0) | 192 - TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 193 - /* xyzw swizzles */ 194 - sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) | 195 - TEX_DST_SEL_Y(SQ_SEL_Y) | 196 - TEX_DST_SEL_Z(SQ_SEL_Z) | 197 - TEX_DST_SEL_W(SQ_SEL_W); 198 - 199 - sq_tex_resource_word7 = format | 200 - S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE); 201 - 202 - cp_set_surface_sync(rdev, 203 - PACKET3_TC_ACTION_ENA, size, gpu_addr); 204 - 205 - radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8)); 206 - radeon_ring_write(ring, 0); 207 - radeon_ring_write(ring, sq_tex_resource_word0); 208 - radeon_ring_write(ring, sq_tex_resource_word1); 209 - radeon_ring_write(ring, gpu_addr >> 8); 210 - radeon_ring_write(ring, gpu_addr >> 8); 211 - radeon_ring_write(ring, sq_tex_resource_word4); 212 - radeon_ring_write(ring, 0); 213 - radeon_ring_write(ring, 0); 214 - radeon_ring_write(ring, sq_tex_resource_word7); 215 - } 216 - 217 - /* emits 12 */ 218 - static void 219 - set_scissors(struct radeon_device *rdev, int x1, int y1, 220 - int x2, int y2) 221 - { 222 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 223 - /* workaround some hw bugs */ 224 - if (x2 == 0) 225 - x1 = 1; 226 - if (y2 == 0) 227 - y1 = 1; 228 - if (rdev->family >= CHIP_CAYMAN) { 229 - if ((x2 == 1) && (y2 == 1)) 230 - x2 = 2; 231 - } 232 - 233 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 234 - radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); 235 - radeon_ring_write(ring, (x1 << 0) | (y1 << 16)); 236 - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); 237 - 238 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 239 - radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); 240 - radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); 241 - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); 242 - 243 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 244 - radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); 245 - radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); 246 - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); 247 - } 248 - 249 - /* emits 10 */ 250 - static void 251 - draw_auto(struct radeon_device *rdev) 252 - { 253 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 254 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 255 - radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2); 256 - radeon_ring_write(ring, DI_PT_RECTLIST); 257 - 258 - radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0)); 259 - radeon_ring_write(ring, 260 - #ifdef __BIG_ENDIAN 261 - (2 << 2) | 262 - #endif 263 - DI_INDEX_SIZE_16_BIT); 264 - 265 - radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0)); 266 - radeon_ring_write(ring, 1); 267 - 268 - radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); 269 - radeon_ring_write(ring, 3); 270 - radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX); 271 - 272 - } 273 - 274 - /* emits 39 */ 275 - static void 276 - set_default_state(struct radeon_device *rdev) 277 - { 278 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 279 - u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; 280 - u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; 281 - u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; 282 - int num_ps_gprs, num_vs_gprs, num_temp_gprs; 283 - int num_gs_gprs, num_es_gprs, num_hs_gprs, num_ls_gprs; 284 - int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; 285 - int num_hs_threads, num_ls_threads; 286 - int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; 287 - int num_hs_stack_entries, num_ls_stack_entries; 288 - u64 gpu_addr; 289 - int dwords; 290 - 291 - /* set clear context state */ 292 - radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 293 - radeon_ring_write(ring, 0); 294 - 295 - if (rdev->family < CHIP_CAYMAN) { 296 - switch (rdev->family) { 297 - case CHIP_CEDAR: 298 - default: 299 - num_ps_gprs = 93; 300 - num_vs_gprs = 46; 301 - num_temp_gprs = 4; 302 - num_gs_gprs = 31; 303 - num_es_gprs = 31; 304 - num_hs_gprs = 23; 305 - num_ls_gprs = 23; 306 - num_ps_threads = 96; 307 - num_vs_threads = 16; 308 - num_gs_threads = 16; 309 - num_es_threads = 16; 310 - num_hs_threads = 16; 311 - num_ls_threads = 16; 312 - num_ps_stack_entries = 42; 313 - num_vs_stack_entries = 42; 314 - num_gs_stack_entries = 42; 315 - num_es_stack_entries = 42; 316 - num_hs_stack_entries = 42; 317 - num_ls_stack_entries = 42; 318 - break; 319 - case CHIP_REDWOOD: 320 - num_ps_gprs = 93; 321 - num_vs_gprs = 46; 322 - num_temp_gprs = 4; 323 - num_gs_gprs = 31; 324 - num_es_gprs = 31; 325 - num_hs_gprs = 23; 326 - num_ls_gprs = 23; 327 - num_ps_threads = 128; 328 - num_vs_threads = 20; 329 - num_gs_threads = 20; 330 - num_es_threads = 20; 331 - num_hs_threads = 20; 332 - num_ls_threads = 20; 333 - num_ps_stack_entries = 42; 334 - num_vs_stack_entries = 42; 335 - num_gs_stack_entries = 42; 336 - num_es_stack_entries = 42; 337 - num_hs_stack_entries = 42; 338 - num_ls_stack_entries = 42; 339 - break; 340 - case CHIP_JUNIPER: 341 - num_ps_gprs = 93; 342 - num_vs_gprs = 46; 343 - num_temp_gprs = 4; 344 - num_gs_gprs = 31; 345 - num_es_gprs = 31; 346 - num_hs_gprs = 23; 347 - num_ls_gprs = 23; 348 - num_ps_threads = 128; 349 - num_vs_threads = 20; 350 - num_gs_threads = 20; 351 - num_es_threads = 20; 352 - num_hs_threads = 20; 353 - num_ls_threads = 20; 354 - num_ps_stack_entries = 85; 355 - num_vs_stack_entries = 85; 356 - num_gs_stack_entries = 85; 357 - num_es_stack_entries = 85; 358 - num_hs_stack_entries = 85; 359 - num_ls_stack_entries = 85; 360 - break; 361 - case CHIP_CYPRESS: 362 - case CHIP_HEMLOCK: 363 - num_ps_gprs = 93; 364 - num_vs_gprs = 46; 365 - num_temp_gprs = 4; 366 - num_gs_gprs = 31; 367 - num_es_gprs = 31; 368 - num_hs_gprs = 23; 369 - num_ls_gprs = 23; 370 - num_ps_threads = 128; 371 - num_vs_threads = 20; 372 - num_gs_threads = 20; 373 - num_es_threads = 20; 374 - num_hs_threads = 20; 375 - num_ls_threads = 20; 376 - num_ps_stack_entries = 85; 377 - num_vs_stack_entries = 85; 378 - num_gs_stack_entries = 85; 379 - num_es_stack_entries = 85; 380 - num_hs_stack_entries = 85; 381 - num_ls_stack_entries = 85; 382 - break; 383 - case CHIP_PALM: 384 - num_ps_gprs = 93; 385 - num_vs_gprs = 46; 386 - num_temp_gprs = 4; 387 - num_gs_gprs = 31; 388 - num_es_gprs = 31; 389 - num_hs_gprs = 23; 390 - num_ls_gprs = 23; 391 - num_ps_threads = 96; 392 - num_vs_threads = 16; 393 - num_gs_threads = 16; 394 - num_es_threads = 16; 395 - num_hs_threads = 16; 396 - num_ls_threads = 16; 397 - num_ps_stack_entries = 42; 398 - num_vs_stack_entries = 42; 399 - num_gs_stack_entries = 42; 400 - num_es_stack_entries = 42; 401 - num_hs_stack_entries = 42; 402 - num_ls_stack_entries = 42; 403 - break; 404 - case CHIP_SUMO: 405 - num_ps_gprs = 93; 406 - num_vs_gprs = 46; 407 - num_temp_gprs = 4; 408 - num_gs_gprs = 31; 409 - num_es_gprs = 31; 410 - num_hs_gprs = 23; 411 - num_ls_gprs = 23; 412 - num_ps_threads = 96; 413 - num_vs_threads = 25; 414 - num_gs_threads = 25; 415 - num_es_threads = 25; 416 - num_hs_threads = 25; 417 - num_ls_threads = 25; 418 - num_ps_stack_entries = 42; 419 - num_vs_stack_entries = 42; 420 - num_gs_stack_entries = 42; 421 - num_es_stack_entries = 42; 422 - num_hs_stack_entries = 42; 423 - num_ls_stack_entries = 42; 424 - break; 425 - case CHIP_SUMO2: 426 - num_ps_gprs = 93; 427 - num_vs_gprs = 46; 428 - num_temp_gprs = 4; 429 - num_gs_gprs = 31; 430 - num_es_gprs = 31; 431 - num_hs_gprs = 23; 432 - num_ls_gprs = 23; 433 - num_ps_threads = 96; 434 - num_vs_threads = 25; 435 - num_gs_threads = 25; 436 - num_es_threads = 25; 437 - num_hs_threads = 25; 438 - num_ls_threads = 25; 439 - num_ps_stack_entries = 85; 440 - num_vs_stack_entries = 85; 441 - num_gs_stack_entries = 85; 442 - num_es_stack_entries = 85; 443 - num_hs_stack_entries = 85; 444 - num_ls_stack_entries = 85; 445 - break; 446 - case CHIP_BARTS: 447 - num_ps_gprs = 93; 448 - num_vs_gprs = 46; 449 - num_temp_gprs = 4; 450 - num_gs_gprs = 31; 451 - num_es_gprs = 31; 452 - num_hs_gprs = 23; 453 - num_ls_gprs = 23; 454 - num_ps_threads = 128; 455 - num_vs_threads = 20; 456 - num_gs_threads = 20; 457 - num_es_threads = 20; 458 - num_hs_threads = 20; 459 - num_ls_threads = 20; 460 - num_ps_stack_entries = 85; 461 - num_vs_stack_entries = 85; 462 - num_gs_stack_entries = 85; 463 - num_es_stack_entries = 85; 464 - num_hs_stack_entries = 85; 465 - num_ls_stack_entries = 85; 466 - break; 467 - case CHIP_TURKS: 468 - num_ps_gprs = 93; 469 - num_vs_gprs = 46; 470 - num_temp_gprs = 4; 471 - num_gs_gprs = 31; 472 - num_es_gprs = 31; 473 - num_hs_gprs = 23; 474 - num_ls_gprs = 23; 475 - num_ps_threads = 128; 476 - num_vs_threads = 20; 477 - num_gs_threads = 20; 478 - num_es_threads = 20; 479 - num_hs_threads = 20; 480 - num_ls_threads = 20; 481 - num_ps_stack_entries = 42; 482 - num_vs_stack_entries = 42; 483 - num_gs_stack_entries = 42; 484 - num_es_stack_entries = 42; 485 - num_hs_stack_entries = 42; 486 - num_ls_stack_entries = 42; 487 - break; 488 - case CHIP_CAICOS: 489 - num_ps_gprs = 93; 490 - num_vs_gprs = 46; 491 - num_temp_gprs = 4; 492 - num_gs_gprs = 31; 493 - num_es_gprs = 31; 494 - num_hs_gprs = 23; 495 - num_ls_gprs = 23; 496 - num_ps_threads = 128; 497 - num_vs_threads = 10; 498 - num_gs_threads = 10; 499 - num_es_threads = 10; 500 - num_hs_threads = 10; 501 - num_ls_threads = 10; 502 - num_ps_stack_entries = 42; 503 - num_vs_stack_entries = 42; 504 - num_gs_stack_entries = 42; 505 - num_es_stack_entries = 42; 506 - num_hs_stack_entries = 42; 507 - num_ls_stack_entries = 42; 508 - break; 509 - } 510 - 511 - if ((rdev->family == CHIP_CEDAR) || 512 - (rdev->family == CHIP_PALM) || 513 - (rdev->family == CHIP_SUMO) || 514 - (rdev->family == CHIP_SUMO2) || 515 - (rdev->family == CHIP_CAICOS)) 516 - sq_config = 0; 517 - else 518 - sq_config = VC_ENABLE; 519 - 520 - sq_config |= (EXPORT_SRC_C | 521 - CS_PRIO(0) | 522 - LS_PRIO(0) | 523 - HS_PRIO(0) | 524 - PS_PRIO(0) | 525 - VS_PRIO(1) | 526 - GS_PRIO(2) | 527 - ES_PRIO(3)); 528 - 529 - sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | 530 - NUM_VS_GPRS(num_vs_gprs) | 531 - NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); 532 - sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | 533 - NUM_ES_GPRS(num_es_gprs)); 534 - sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) | 535 - NUM_LS_GPRS(num_ls_gprs)); 536 - sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | 537 - NUM_VS_THREADS(num_vs_threads) | 538 - NUM_GS_THREADS(num_gs_threads) | 539 - NUM_ES_THREADS(num_es_threads)); 540 - sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) | 541 - NUM_LS_THREADS(num_ls_threads)); 542 - sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | 543 - NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); 544 - sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | 545 - NUM_ES_STACK_ENTRIES(num_es_stack_entries)); 546 - sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | 547 - NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); 548 - 549 - /* disable dyn gprs */ 550 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 551 - radeon_ring_write(ring, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); 552 - radeon_ring_write(ring, 0); 553 - 554 - /* setup LDS */ 555 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 556 - radeon_ring_write(ring, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2); 557 - radeon_ring_write(ring, 0x10001000); 558 - 559 - /* SQ config */ 560 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 11)); 561 - radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); 562 - radeon_ring_write(ring, sq_config); 563 - radeon_ring_write(ring, sq_gpr_resource_mgmt_1); 564 - radeon_ring_write(ring, sq_gpr_resource_mgmt_2); 565 - radeon_ring_write(ring, sq_gpr_resource_mgmt_3); 566 - radeon_ring_write(ring, 0); 567 - radeon_ring_write(ring, 0); 568 - radeon_ring_write(ring, sq_thread_resource_mgmt); 569 - radeon_ring_write(ring, sq_thread_resource_mgmt_2); 570 - radeon_ring_write(ring, sq_stack_resource_mgmt_1); 571 - radeon_ring_write(ring, sq_stack_resource_mgmt_2); 572 - radeon_ring_write(ring, sq_stack_resource_mgmt_3); 573 - } 574 - 575 - /* CONTEXT_CONTROL */ 576 - radeon_ring_write(ring, 0xc0012800); 577 - radeon_ring_write(ring, 0x80000000); 578 - radeon_ring_write(ring, 0x80000000); 579 - 580 - /* SQ_VTX_BASE_VTX_LOC */ 581 - radeon_ring_write(ring, 0xc0026f00); 582 - radeon_ring_write(ring, 0x00000000); 583 - radeon_ring_write(ring, 0x00000000); 584 - radeon_ring_write(ring, 0x00000000); 585 - 586 - /* SET_SAMPLER */ 587 - radeon_ring_write(ring, 0xc0036e00); 588 - radeon_ring_write(ring, 0x00000000); 589 - radeon_ring_write(ring, 0x00000012); 590 - radeon_ring_write(ring, 0x00000000); 591 - radeon_ring_write(ring, 0x00000000); 592 - 593 - /* set to DX10/11 mode */ 594 - radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); 595 - radeon_ring_write(ring, 1); 596 - 597 - /* emit an IB pointing at default state */ 598 - dwords = ALIGN(rdev->r600_blit.state_len, 0x10); 599 - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; 600 - radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 601 - radeon_ring_write(ring, gpu_addr & 0xFFFFFFFC); 602 - radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF); 603 - radeon_ring_write(ring, dwords); 604 - 605 - } 606 - 607 - int evergreen_blit_init(struct radeon_device *rdev) 608 - { 609 - u32 obj_size; 610 - int i, r, dwords; 611 - void *ptr; 612 - u32 packet2s[16]; 613 - int num_packet2s = 0; 614 - 615 - rdev->r600_blit.primitives.set_render_target = set_render_target; 616 - rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync; 617 - rdev->r600_blit.primitives.set_shaders = set_shaders; 618 - rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource; 619 - rdev->r600_blit.primitives.set_tex_resource = set_tex_resource; 620 - rdev->r600_blit.primitives.set_scissors = set_scissors; 621 - rdev->r600_blit.primitives.draw_auto = draw_auto; 622 - rdev->r600_blit.primitives.set_default_state = set_default_state; 623 - 624 - rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ 625 - rdev->r600_blit.ring_size_common += 55; /* shaders + def state */ 626 - rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */ 627 - rdev->r600_blit.ring_size_common += 5; /* done copy */ 628 - rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ 629 - 630 - rdev->r600_blit.ring_size_per_loop = 74; 631 - if (rdev->family >= CHIP_CAYMAN) 632 - rdev->r600_blit.ring_size_per_loop += 9; /* additional DWs for surface sync */ 633 - 634 - rdev->r600_blit.max_dim = 16384; 635 - 636 - rdev->r600_blit.state_offset = 0; 637 - 638 - if (rdev->family < CHIP_CAYMAN) 639 - rdev->r600_blit.state_len = evergreen_default_size; 640 - else 641 - rdev->r600_blit.state_len = cayman_default_size; 642 - 643 - dwords = rdev->r600_blit.state_len; 644 - while (dwords & 0xf) { 645 - packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0)); 646 - dwords++; 647 - } 648 - 649 - obj_size = dwords * 4; 650 - obj_size = ALIGN(obj_size, 256); 651 - 652 - rdev->r600_blit.vs_offset = obj_size; 653 - if (rdev->family < CHIP_CAYMAN) 654 - obj_size += evergreen_vs_size * 4; 655 - else 656 - obj_size += cayman_vs_size * 4; 657 - obj_size = ALIGN(obj_size, 256); 658 - 659 - rdev->r600_blit.ps_offset = obj_size; 660 - if (rdev->family < CHIP_CAYMAN) 661 - obj_size += evergreen_ps_size * 4; 662 - else 663 - obj_size += cayman_ps_size * 4; 664 - obj_size = ALIGN(obj_size, 256); 665 - 666 - /* pin copy shader into vram if not already initialized */ 667 - if (!rdev->r600_blit.shader_obj) { 668 - r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, 669 - RADEON_GEM_DOMAIN_VRAM, 670 - NULL, &rdev->r600_blit.shader_obj); 671 - if (r) { 672 - DRM_ERROR("evergreen failed to allocate shader\n"); 673 - return r; 674 - } 675 - 676 - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 677 - if (unlikely(r != 0)) 678 - return r; 679 - r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, 680 - &rdev->r600_blit.shader_gpu_addr); 681 - radeon_bo_unreserve(rdev->r600_blit.shader_obj); 682 - if (r) { 683 - dev_err(rdev->dev, "(%d) pin blit object failed\n", r); 684 - return r; 685 - } 686 - } 687 - 688 - DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n", 689 - obj_size, 690 - rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset); 691 - 692 - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 693 - if (unlikely(r != 0)) 694 - return r; 695 - r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr); 696 - if (r) { 697 - DRM_ERROR("failed to map blit object %d\n", r); 698 - return r; 699 - } 700 - 701 - if (rdev->family < CHIP_CAYMAN) { 702 - memcpy_toio(ptr + rdev->r600_blit.state_offset, 703 - evergreen_default_state, rdev->r600_blit.state_len * 4); 704 - 705 - if (num_packet2s) 706 - memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), 707 - packet2s, num_packet2s * 4); 708 - for (i = 0; i < evergreen_vs_size; i++) 709 - *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]); 710 - for (i = 0; i < evergreen_ps_size; i++) 711 - *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]); 712 - } else { 713 - memcpy_toio(ptr + rdev->r600_blit.state_offset, 714 - cayman_default_state, rdev->r600_blit.state_len * 4); 715 - 716 - if (num_packet2s) 717 - memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), 718 - packet2s, num_packet2s * 4); 719 - for (i = 0; i < cayman_vs_size; i++) 720 - *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]); 721 - for (i = 0; i < cayman_ps_size; i++) 722 - *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]); 723 - } 724 - radeon_bo_kunmap(rdev->r600_blit.shader_obj); 725 - radeon_bo_unreserve(rdev->r600_blit.shader_obj); 726 - 727 - radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 728 - return 0; 729 - }
-54
drivers/gpu/drm/radeon/evergreen_blit_shaders.c
··· 300 300 0x00000010, /* */ 301 301 }; 302 302 303 - const u32 evergreen_vs[] = 304 - { 305 - 0x00000004, 306 - 0x80800400, 307 - 0x0000a03c, 308 - 0x95000688, 309 - 0x00004000, 310 - 0x15200688, 311 - 0x00000000, 312 - 0x00000000, 313 - 0x3c000000, 314 - 0x67961001, 315 - #ifdef __BIG_ENDIAN 316 - 0x000a0000, 317 - #else 318 - 0x00080000, 319 - #endif 320 - 0x00000000, 321 - 0x1c000000, 322 - 0x67961000, 323 - #ifdef __BIG_ENDIAN 324 - 0x00020008, 325 - #else 326 - 0x00000008, 327 - #endif 328 - 0x00000000, 329 - }; 330 - 331 - const u32 evergreen_ps[] = 332 - { 333 - 0x00000003, 334 - 0xa00c0000, 335 - 0x00000008, 336 - 0x80400000, 337 - 0x00000000, 338 - 0x95200688, 339 - 0x00380400, 340 - 0x00146b10, 341 - 0x00380000, 342 - 0x20146b10, 343 - 0x00380400, 344 - 0x40146b00, 345 - 0x80380000, 346 - 0x60146b00, 347 - 0x00000000, 348 - 0x00000000, 349 - 0x00000010, 350 - 0x000d1000, 351 - 0xb0800000, 352 - 0x00000000, 353 - }; 354 - 355 - const u32 evergreen_ps_size = ARRAY_SIZE(evergreen_ps); 356 - const u32 evergreen_vs_size = ARRAY_SIZE(evergreen_vs); 357 303 const u32 evergreen_default_size = ARRAY_SIZE(evergreen_default_state);
-8
drivers/gpu/drm/radeon/ni.c
··· 2118 2118 return r; 2119 2119 cayman_gpu_init(rdev); 2120 2120 2121 - r = evergreen_blit_init(rdev); 2122 - if (r) { 2123 - r600_blit_fini(rdev); 2124 - rdev->asic->copy.copy = NULL; 2125 - dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); 2126 - } 2127 - 2128 2121 /* allocate rlc buffers */ 2129 2122 if (rdev->flags & RADEON_IS_IGP) { 2130 2123 rdev->rlc.reg_list = tn_rlc_save_restore_register_list; ··· 2406 2413 2407 2414 void cayman_fini(struct radeon_device *rdev) 2408 2415 { 2409 - r600_blit_fini(rdev); 2410 2416 cayman_cp_fini(rdev); 2411 2417 cayman_dma_fini(rdev); 2412 2418 r600_irq_fini(rdev);
-26
drivers/gpu/drm/radeon/r600.c
··· 3136 3136 radeon_ring_write(ring, emit_wait ? 1 : 0); 3137 3137 } 3138 3138 3139 - int r600_copy_blit(struct radeon_device *rdev, 3140 - uint64_t src_offset, 3141 - uint64_t dst_offset, 3142 - unsigned num_gpu_pages, 3143 - struct radeon_fence **fence) 3144 - { 3145 - struct radeon_semaphore *sem = NULL; 3146 - struct radeon_sa_bo *vb = NULL; 3147 - int r; 3148 - 3149 - r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem); 3150 - if (r) { 3151 - return r; 3152 - } 3153 - r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb); 3154 - r600_blit_done_copy(rdev, fence, vb, sem); 3155 - return 0; 3156 - } 3157 - 3158 3139 /** 3159 3140 * r600_copy_cpdma - copy pages using the CP DMA engine 3160 3141 * ··· 3337 3356 return r; 3338 3357 } 3339 3358 r600_gpu_init(rdev); 3340 - r = r600_blit_init(rdev); 3341 - if (r) { 3342 - r600_blit_fini(rdev); 3343 - rdev->asic->copy.copy = NULL; 3344 - dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); 3345 - } 3346 3359 3347 3360 /* allocate wb buffer */ 3348 3361 r = radeon_wb_init(rdev); ··· 3549 3574 void r600_fini(struct radeon_device *rdev) 3550 3575 { 3551 3576 r600_audio_fini(rdev); 3552 - r600_blit_fini(rdev); 3553 3577 r600_cp_fini(rdev); 3554 3578 r600_dma_fini(rdev); 3555 3579 r600_irq_fini(rdev);
+31
drivers/gpu/drm/radeon/r600_blit.c
··· 31 31 32 32 #include "r600_blit_shaders.h" 33 33 34 + /* 23 bits of float fractional data */ 35 + #define I2F_FRAC_BITS 23 36 + #define I2F_MASK ((1 << I2F_FRAC_BITS) - 1) 37 + 38 + /* 39 + * Converts unsigned integer into 32-bit IEEE floating point representation. 40 + * Will be exact from 0 to 2^24. Above that, we round towards zero 41 + * as the fractional bits will not fit in a float. (It would be better to 42 + * round towards even as the fpu does, but that is slower.) 43 + */ 44 + static __pure uint32_t int2float(uint32_t x) 45 + { 46 + uint32_t msb, exponent, fraction; 47 + 48 + /* Zero is special */ 49 + if (!x) return 0; 50 + 51 + /* Get location of the most significant bit */ 52 + msb = __fls(x); 53 + 54 + /* 55 + * Use a rotate instead of a shift because that works both leftwards 56 + * and rightwards due to the mod(32) behaviour. This means we don't 57 + * need to check to see if we are above 2^24 or not. 58 + */ 59 + fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK; 60 + exponent = (127 + msb) << I2F_FRAC_BITS; 61 + 62 + return fraction + exponent; 63 + } 64 + 34 65 #define DI_PT_RECTLIST 0x11 35 66 #define DI_INDEX_SIZE_16_BIT 0x0 36 67 #define DI_SRC_SEL_AUTO_INDEX 0x2
-785
drivers/gpu/drm/radeon/r600_blit_kms.c
··· 1 - /* 2 - * Copyright 2009 Advanced Micro Devices, Inc. 3 - * Copyright 2009 Red Hat Inc. 4 - * 5 - * Permission is hereby granted, free of charge, to any person obtaining a 6 - * copy of this software and associated documentation files (the "Software"), 7 - * to deal in the Software without restriction, including without limitation 8 - * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 - * and/or sell copies of the Software, and to permit persons to whom the 10 - * Software is furnished to do so, subject to the following conditions: 11 - * 12 - * The above copyright notice and this permission notice (including the next 13 - * paragraph) shall be included in all copies or substantial portions of the 14 - * Software. 15 - * 16 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 - * DEALINGS IN THE SOFTWARE. 23 - * 24 - */ 25 - 26 - #include <drm/drmP.h> 27 - #include <drm/radeon_drm.h> 28 - #include "radeon.h" 29 - 30 - #include "r600d.h" 31 - #include "r600_blit_shaders.h" 32 - #include "radeon_blit_common.h" 33 - 34 - /* 23 bits of float fractional data */ 35 - #define I2F_FRAC_BITS 23 36 - #define I2F_MASK ((1 << I2F_FRAC_BITS) - 1) 37 - 38 - /* 39 - * Converts unsigned integer into 32-bit IEEE floating point representation. 40 - * Will be exact from 0 to 2^24. Above that, we round towards zero 41 - * as the fractional bits will not fit in a float. (It would be better to 42 - * round towards even as the fpu does, but that is slower.) 43 - */ 44 - __pure uint32_t int2float(uint32_t x) 45 - { 46 - uint32_t msb, exponent, fraction; 47 - 48 - /* Zero is special */ 49 - if (!x) return 0; 50 - 51 - /* Get location of the most significant bit */ 52 - msb = __fls(x); 53 - 54 - /* 55 - * Use a rotate instead of a shift because that works both leftwards 56 - * and rightwards due to the mod(32) behaviour. This means we don't 57 - * need to check to see if we are above 2^24 or not. 58 - */ 59 - fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK; 60 - exponent = (127 + msb) << I2F_FRAC_BITS; 61 - 62 - return fraction + exponent; 63 - } 64 - 65 - /* emits 21 on rv770+, 23 on r600 */ 66 - static void 67 - set_render_target(struct radeon_device *rdev, int format, 68 - int w, int h, u64 gpu_addr) 69 - { 70 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 71 - u32 cb_color_info; 72 - int pitch, slice; 73 - 74 - h = ALIGN(h, 8); 75 - if (h < 8) 76 - h = 8; 77 - 78 - cb_color_info = CB_FORMAT(format) | 79 - CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) | 80 - CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); 81 - pitch = (w / 8) - 1; 82 - slice = ((w * h) / 64) - 1; 83 - 84 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 85 - radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 86 - radeon_ring_write(ring, gpu_addr >> 8); 87 - 88 - if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) { 89 - radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0)); 90 - radeon_ring_write(ring, 2 << 0); 91 - } 92 - 93 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 94 - radeon_ring_write(ring, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 95 - radeon_ring_write(ring, (pitch << 0) | (slice << 10)); 96 - 97 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 98 - radeon_ring_write(ring, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 99 - radeon_ring_write(ring, 0); 100 - 101 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 102 - radeon_ring_write(ring, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 103 - radeon_ring_write(ring, cb_color_info); 104 - 105 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 106 - radeon_ring_write(ring, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 107 - radeon_ring_write(ring, 0); 108 - 109 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 110 - radeon_ring_write(ring, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 111 - radeon_ring_write(ring, 0); 112 - 113 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 114 - radeon_ring_write(ring, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 115 - radeon_ring_write(ring, 0); 116 - } 117 - 118 - /* emits 5dw */ 119 - static void 120 - cp_set_surface_sync(struct radeon_device *rdev, 121 - u32 sync_type, u32 size, 122 - u64 mc_addr) 123 - { 124 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 125 - u32 cp_coher_size; 126 - 127 - if (size == 0xffffffff) 128 - cp_coher_size = 0xffffffff; 129 - else 130 - cp_coher_size = ((size + 255) >> 8); 131 - 132 - radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); 133 - radeon_ring_write(ring, sync_type); 134 - radeon_ring_write(ring, cp_coher_size); 135 - radeon_ring_write(ring, mc_addr >> 8); 136 - radeon_ring_write(ring, 10); /* poll interval */ 137 - } 138 - 139 - /* emits 21dw + 1 surface sync = 26dw */ 140 - static void 141 - set_shaders(struct radeon_device *rdev) 142 - { 143 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 144 - u64 gpu_addr; 145 - u32 sq_pgm_resources; 146 - 147 - /* setup shader regs */ 148 - sq_pgm_resources = (1 << 0); 149 - 150 - /* VS */ 151 - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; 152 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 153 - radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 154 - radeon_ring_write(ring, gpu_addr >> 8); 155 - 156 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 157 - radeon_ring_write(ring, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 158 - radeon_ring_write(ring, sq_pgm_resources); 159 - 160 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 161 - radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 162 - radeon_ring_write(ring, 0); 163 - 164 - /* PS */ 165 - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; 166 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 167 - radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 168 - radeon_ring_write(ring, gpu_addr >> 8); 169 - 170 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 171 - radeon_ring_write(ring, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 172 - radeon_ring_write(ring, sq_pgm_resources | (1 << 28)); 173 - 174 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 175 - radeon_ring_write(ring, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 176 - radeon_ring_write(ring, 2); 177 - 178 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 179 - radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 180 - radeon_ring_write(ring, 0); 181 - 182 - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; 183 - cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); 184 - } 185 - 186 - /* emits 9 + 1 sync (5) = 14*/ 187 - static void 188 - set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) 189 - { 190 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 191 - u32 sq_vtx_constant_word2; 192 - 193 - sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) | 194 - SQ_VTXC_STRIDE(16); 195 - #ifdef __BIG_ENDIAN 196 - sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32); 197 - #endif 198 - 199 - radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7)); 200 - radeon_ring_write(ring, 0x460); 201 - radeon_ring_write(ring, gpu_addr & 0xffffffff); 202 - radeon_ring_write(ring, 48 - 1); 203 - radeon_ring_write(ring, sq_vtx_constant_word2); 204 - radeon_ring_write(ring, 1 << 0); 205 - radeon_ring_write(ring, 0); 206 - radeon_ring_write(ring, 0); 207 - radeon_ring_write(ring, SQ_TEX_VTX_VALID_BUFFER << 30); 208 - 209 - if ((rdev->family == CHIP_RV610) || 210 - (rdev->family == CHIP_RV620) || 211 - (rdev->family == CHIP_RS780) || 212 - (rdev->family == CHIP_RS880) || 213 - (rdev->family == CHIP_RV710)) 214 - cp_set_surface_sync(rdev, 215 - PACKET3_TC_ACTION_ENA, 48, gpu_addr); 216 - else 217 - cp_set_surface_sync(rdev, 218 - PACKET3_VC_ACTION_ENA, 48, gpu_addr); 219 - } 220 - 221 - /* emits 9 */ 222 - static void 223 - set_tex_resource(struct radeon_device *rdev, 224 - int format, int w, int h, int pitch, 225 - u64 gpu_addr, u32 size) 226 - { 227 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 228 - uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 229 - 230 - if (h < 1) 231 - h = 1; 232 - 233 - sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) | 234 - S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); 235 - sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) | 236 - S_038000_TEX_WIDTH(w - 1); 237 - 238 - sq_tex_resource_word1 = S_038004_DATA_FORMAT(format); 239 - sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1); 240 - 241 - sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) | 242 - S_038010_DST_SEL_X(SQ_SEL_X) | 243 - S_038010_DST_SEL_Y(SQ_SEL_Y) | 244 - S_038010_DST_SEL_Z(SQ_SEL_Z) | 245 - S_038010_DST_SEL_W(SQ_SEL_W); 246 - 247 - cp_set_surface_sync(rdev, 248 - PACKET3_TC_ACTION_ENA, size, gpu_addr); 249 - 250 - radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7)); 251 - radeon_ring_write(ring, 0); 252 - radeon_ring_write(ring, sq_tex_resource_word0); 253 - radeon_ring_write(ring, sq_tex_resource_word1); 254 - radeon_ring_write(ring, gpu_addr >> 8); 255 - radeon_ring_write(ring, gpu_addr >> 8); 256 - radeon_ring_write(ring, sq_tex_resource_word4); 257 - radeon_ring_write(ring, 0); 258 - radeon_ring_write(ring, SQ_TEX_VTX_VALID_TEXTURE << 30); 259 - } 260 - 261 - /* emits 12 */ 262 - static void 263 - set_scissors(struct radeon_device *rdev, int x1, int y1, 264 - int x2, int y2) 265 - { 266 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 267 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 268 - radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 269 - radeon_ring_write(ring, (x1 << 0) | (y1 << 16)); 270 - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); 271 - 272 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 273 - radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 274 - radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); 275 - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); 276 - 277 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 278 - radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); 279 - radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31)); 280 - radeon_ring_write(ring, (x2 << 0) | (y2 << 16)); 281 - } 282 - 283 - /* emits 10 */ 284 - static void 285 - draw_auto(struct radeon_device *rdev) 286 - { 287 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 288 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 289 - radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); 290 - radeon_ring_write(ring, DI_PT_RECTLIST); 291 - 292 - radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0)); 293 - radeon_ring_write(ring, 294 - #ifdef __BIG_ENDIAN 295 - (2 << 2) | 296 - #endif 297 - DI_INDEX_SIZE_16_BIT); 298 - 299 - radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0)); 300 - radeon_ring_write(ring, 1); 301 - 302 - radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); 303 - radeon_ring_write(ring, 3); 304 - radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX); 305 - 306 - } 307 - 308 - /* emits 14 */ 309 - static void 310 - set_default_state(struct radeon_device *rdev) 311 - { 312 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 313 - u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 314 - u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 315 - int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; 316 - int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; 317 - int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; 318 - u64 gpu_addr; 319 - int dwords; 320 - 321 - switch (rdev->family) { 322 - case CHIP_R600: 323 - num_ps_gprs = 192; 324 - num_vs_gprs = 56; 325 - num_temp_gprs = 4; 326 - num_gs_gprs = 0; 327 - num_es_gprs = 0; 328 - num_ps_threads = 136; 329 - num_vs_threads = 48; 330 - num_gs_threads = 4; 331 - num_es_threads = 4; 332 - num_ps_stack_entries = 128; 333 - num_vs_stack_entries = 128; 334 - num_gs_stack_entries = 0; 335 - num_es_stack_entries = 0; 336 - break; 337 - case CHIP_RV630: 338 - case CHIP_RV635: 339 - num_ps_gprs = 84; 340 - num_vs_gprs = 36; 341 - num_temp_gprs = 4; 342 - num_gs_gprs = 0; 343 - num_es_gprs = 0; 344 - num_ps_threads = 144; 345 - num_vs_threads = 40; 346 - num_gs_threads = 4; 347 - num_es_threads = 4; 348 - num_ps_stack_entries = 40; 349 - num_vs_stack_entries = 40; 350 - num_gs_stack_entries = 32; 351 - num_es_stack_entries = 16; 352 - break; 353 - case CHIP_RV610: 354 - case CHIP_RV620: 355 - case CHIP_RS780: 356 - case CHIP_RS880: 357 - default: 358 - num_ps_gprs = 84; 359 - num_vs_gprs = 36; 360 - num_temp_gprs = 4; 361 - num_gs_gprs = 0; 362 - num_es_gprs = 0; 363 - num_ps_threads = 136; 364 - num_vs_threads = 48; 365 - num_gs_threads = 4; 366 - num_es_threads = 4; 367 - num_ps_stack_entries = 40; 368 - num_vs_stack_entries = 40; 369 - num_gs_stack_entries = 32; 370 - num_es_stack_entries = 16; 371 - break; 372 - case CHIP_RV670: 373 - num_ps_gprs = 144; 374 - num_vs_gprs = 40; 375 - num_temp_gprs = 4; 376 - num_gs_gprs = 0; 377 - num_es_gprs = 0; 378 - num_ps_threads = 136; 379 - num_vs_threads = 48; 380 - num_gs_threads = 4; 381 - num_es_threads = 4; 382 - num_ps_stack_entries = 40; 383 - num_vs_stack_entries = 40; 384 - num_gs_stack_entries = 32; 385 - num_es_stack_entries = 16; 386 - break; 387 - case CHIP_RV770: 388 - num_ps_gprs = 192; 389 - num_vs_gprs = 56; 390 - num_temp_gprs = 4; 391 - num_gs_gprs = 0; 392 - num_es_gprs = 0; 393 - num_ps_threads = 188; 394 - num_vs_threads = 60; 395 - num_gs_threads = 0; 396 - num_es_threads = 0; 397 - num_ps_stack_entries = 256; 398 - num_vs_stack_entries = 256; 399 - num_gs_stack_entries = 0; 400 - num_es_stack_entries = 0; 401 - break; 402 - case CHIP_RV730: 403 - case CHIP_RV740: 404 - num_ps_gprs = 84; 405 - num_vs_gprs = 36; 406 - num_temp_gprs = 4; 407 - num_gs_gprs = 0; 408 - num_es_gprs = 0; 409 - num_ps_threads = 188; 410 - num_vs_threads = 60; 411 - num_gs_threads = 0; 412 - num_es_threads = 0; 413 - num_ps_stack_entries = 128; 414 - num_vs_stack_entries = 128; 415 - num_gs_stack_entries = 0; 416 - num_es_stack_entries = 0; 417 - break; 418 - case CHIP_RV710: 419 - num_ps_gprs = 192; 420 - num_vs_gprs = 56; 421 - num_temp_gprs = 4; 422 - num_gs_gprs = 0; 423 - num_es_gprs = 0; 424 - num_ps_threads = 144; 425 - num_vs_threads = 48; 426 - num_gs_threads = 0; 427 - num_es_threads = 0; 428 - num_ps_stack_entries = 128; 429 - num_vs_stack_entries = 128; 430 - num_gs_stack_entries = 0; 431 - num_es_stack_entries = 0; 432 - break; 433 - } 434 - 435 - if ((rdev->family == CHIP_RV610) || 436 - (rdev->family == CHIP_RV620) || 437 - (rdev->family == CHIP_RS780) || 438 - (rdev->family == CHIP_RS880) || 439 - (rdev->family == CHIP_RV710)) 440 - sq_config = 0; 441 - else 442 - sq_config = VC_ENABLE; 443 - 444 - sq_config |= (DX9_CONSTS | 445 - ALU_INST_PREFER_VECTOR | 446 - PS_PRIO(0) | 447 - VS_PRIO(1) | 448 - GS_PRIO(2) | 449 - ES_PRIO(3)); 450 - 451 - sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | 452 - NUM_VS_GPRS(num_vs_gprs) | 453 - NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); 454 - sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | 455 - NUM_ES_GPRS(num_es_gprs)); 456 - sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | 457 - NUM_VS_THREADS(num_vs_threads) | 458 - NUM_GS_THREADS(num_gs_threads) | 459 - NUM_ES_THREADS(num_es_threads)); 460 - sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | 461 - NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); 462 - sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | 463 - NUM_ES_STACK_ENTRIES(num_es_stack_entries)); 464 - 465 - /* emit an IB pointing at default state */ 466 - dwords = ALIGN(rdev->r600_blit.state_len, 0x10); 467 - gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; 468 - radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 469 - radeon_ring_write(ring, 470 - #ifdef __BIG_ENDIAN 471 - (2 << 0) | 472 - #endif 473 - (gpu_addr & 0xFFFFFFFC)); 474 - radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF); 475 - radeon_ring_write(ring, dwords); 476 - 477 - /* SQ config */ 478 - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 6)); 479 - radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); 480 - radeon_ring_write(ring, sq_config); 481 - radeon_ring_write(ring, sq_gpr_resource_mgmt_1); 482 - radeon_ring_write(ring, sq_gpr_resource_mgmt_2); 483 - radeon_ring_write(ring, sq_thread_resource_mgmt); 484 - radeon_ring_write(ring, sq_stack_resource_mgmt_1); 485 - radeon_ring_write(ring, sq_stack_resource_mgmt_2); 486 - } 487 - 488 - int r600_blit_init(struct radeon_device *rdev) 489 - { 490 - u32 obj_size; 491 - int i, r, dwords; 492 - void *ptr; 493 - u32 packet2s[16]; 494 - int num_packet2s = 0; 495 - 496 - rdev->r600_blit.primitives.set_render_target = set_render_target; 497 - rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync; 498 - rdev->r600_blit.primitives.set_shaders = set_shaders; 499 - rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource; 500 - rdev->r600_blit.primitives.set_tex_resource = set_tex_resource; 501 - rdev->r600_blit.primitives.set_scissors = set_scissors; 502 - rdev->r600_blit.primitives.draw_auto = draw_auto; 503 - rdev->r600_blit.primitives.set_default_state = set_default_state; 504 - 505 - rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ 506 - rdev->r600_blit.ring_size_common += 40; /* shaders + def state */ 507 - rdev->r600_blit.ring_size_common += 5; /* done copy */ 508 - rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ 509 - 510 - rdev->r600_blit.ring_size_per_loop = 76; 511 - /* set_render_target emits 2 extra dwords on rv6xx */ 512 - if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) 513 - rdev->r600_blit.ring_size_per_loop += 2; 514 - 515 - rdev->r600_blit.max_dim = 8192; 516 - 517 - rdev->r600_blit.state_offset = 0; 518 - 519 - if (rdev->family >= CHIP_RV770) 520 - rdev->r600_blit.state_len = r7xx_default_size; 521 - else 522 - rdev->r600_blit.state_len = r6xx_default_size; 523 - 524 - dwords = rdev->r600_blit.state_len; 525 - while (dwords & 0xf) { 526 - packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0)); 527 - dwords++; 528 - } 529 - 530 - obj_size = dwords * 4; 531 - obj_size = ALIGN(obj_size, 256); 532 - 533 - rdev->r600_blit.vs_offset = obj_size; 534 - obj_size += r6xx_vs_size * 4; 535 - obj_size = ALIGN(obj_size, 256); 536 - 537 - rdev->r600_blit.ps_offset = obj_size; 538 - obj_size += r6xx_ps_size * 4; 539 - obj_size = ALIGN(obj_size, 256); 540 - 541 - /* pin copy shader into vram if not already initialized */ 542 - if (rdev->r600_blit.shader_obj == NULL) { 543 - r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, 544 - RADEON_GEM_DOMAIN_VRAM, 545 - NULL, &rdev->r600_blit.shader_obj); 546 - if (r) { 547 - DRM_ERROR("r600 failed to allocate shader\n"); 548 - return r; 549 - } 550 - 551 - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 552 - if (unlikely(r != 0)) 553 - return r; 554 - r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, 555 - &rdev->r600_blit.shader_gpu_addr); 556 - radeon_bo_unreserve(rdev->r600_blit.shader_obj); 557 - if (r) { 558 - dev_err(rdev->dev, "(%d) pin blit object failed\n", r); 559 - return r; 560 - } 561 - } 562 - 563 - DRM_DEBUG("r6xx blit allocated bo %08x vs %08x ps %08x\n", 564 - obj_size, 565 - rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset); 566 - 567 - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 568 - if (unlikely(r != 0)) 569 - return r; 570 - r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr); 571 - if (r) { 572 - DRM_ERROR("failed to map blit object %d\n", r); 573 - return r; 574 - } 575 - if (rdev->family >= CHIP_RV770) 576 - memcpy_toio(ptr + rdev->r600_blit.state_offset, 577 - r7xx_default_state, rdev->r600_blit.state_len * 4); 578 - else 579 - memcpy_toio(ptr + rdev->r600_blit.state_offset, 580 - r6xx_default_state, rdev->r600_blit.state_len * 4); 581 - if (num_packet2s) 582 - memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), 583 - packet2s, num_packet2s * 4); 584 - for (i = 0; i < r6xx_vs_size; i++) 585 - *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(r6xx_vs[i]); 586 - for (i = 0; i < r6xx_ps_size; i++) 587 - *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(r6xx_ps[i]); 588 - radeon_bo_kunmap(rdev->r600_blit.shader_obj); 589 - radeon_bo_unreserve(rdev->r600_blit.shader_obj); 590 - 591 - radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 592 - return 0; 593 - } 594 - 595 - void r600_blit_fini(struct radeon_device *rdev) 596 - { 597 - int r; 598 - 599 - radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 600 - if (rdev->r600_blit.shader_obj == NULL) 601 - return; 602 - /* If we can't reserve the bo, unref should be enough to destroy 603 - * it when it becomes idle. 604 - */ 605 - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 606 - if (!r) { 607 - radeon_bo_unpin(rdev->r600_blit.shader_obj); 608 - radeon_bo_unreserve(rdev->r600_blit.shader_obj); 609 - } 610 - radeon_bo_unref(&rdev->r600_blit.shader_obj); 611 - } 612 - 613 - static unsigned r600_blit_create_rect(unsigned num_gpu_pages, 614 - int *width, int *height, int max_dim) 615 - { 616 - unsigned max_pages; 617 - unsigned pages = num_gpu_pages; 618 - int w, h; 619 - 620 - if (num_gpu_pages == 0) { 621 - /* not supposed to be called with no pages, but just in case */ 622 - h = 0; 623 - w = 0; 624 - pages = 0; 625 - WARN_ON(1); 626 - } else { 627 - int rect_order = 2; 628 - h = RECT_UNIT_H; 629 - while (num_gpu_pages / rect_order) { 630 - h *= 2; 631 - rect_order *= 4; 632 - if (h >= max_dim) { 633 - h = max_dim; 634 - break; 635 - } 636 - } 637 - max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H); 638 - if (pages > max_pages) 639 - pages = max_pages; 640 - w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h; 641 - w = (w / RECT_UNIT_W) * RECT_UNIT_W; 642 - pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H); 643 - BUG_ON(pages == 0); 644 - } 645 - 646 - 647 - DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages); 648 - 649 - /* return width and height only of the caller wants it */ 650 - if (height) 651 - *height = h; 652 - if (width) 653 - *width = w; 654 - 655 - return pages; 656 - } 657 - 658 - 659 - int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, 660 - struct radeon_fence **fence, struct radeon_sa_bo **vb, 661 - struct radeon_semaphore **sem) 662 - { 663 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 664 - int r; 665 - int ring_size; 666 - int num_loops = 0; 667 - int dwords_per_loop = rdev->r600_blit.ring_size_per_loop; 668 - 669 - /* num loops */ 670 - while (num_gpu_pages) { 671 - num_gpu_pages -= 672 - r600_blit_create_rect(num_gpu_pages, NULL, NULL, 673 - rdev->r600_blit.max_dim); 674 - num_loops++; 675 - } 676 - 677 - /* 48 bytes for vertex per loop */ 678 - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, vb, 679 - (num_loops*48)+256, 256, true); 680 - if (r) { 681 - return r; 682 - } 683 - 684 - r = radeon_semaphore_create(rdev, sem); 685 - if (r) { 686 - radeon_sa_bo_free(rdev, vb, NULL); 687 - return r; 688 - } 689 - 690 - /* calculate number of loops correctly */ 691 - ring_size = num_loops * dwords_per_loop; 692 - ring_size += rdev->r600_blit.ring_size_common; 693 - r = radeon_ring_lock(rdev, ring, ring_size); 694 - if (r) { 695 - radeon_sa_bo_free(rdev, vb, NULL); 696 - radeon_semaphore_free(rdev, sem, NULL); 697 - return r; 698 - } 699 - 700 - if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) { 701 - radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring, 702 - RADEON_RING_TYPE_GFX_INDEX); 703 - radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX); 704 - } else { 705 - radeon_semaphore_free(rdev, sem, NULL); 706 - } 707 - 708 - rdev->r600_blit.primitives.set_default_state(rdev); 709 - rdev->r600_blit.primitives.set_shaders(rdev); 710 - return 0; 711 - } 712 - 713 - void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, 714 - struct radeon_sa_bo *vb, struct radeon_semaphore *sem) 715 - { 716 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 717 - int r; 718 - 719 - r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); 720 - if (r) { 721 - radeon_ring_unlock_undo(rdev, ring); 722 - return; 723 - } 724 - 725 - radeon_ring_unlock_commit(rdev, ring); 726 - radeon_sa_bo_free(rdev, &vb, *fence); 727 - radeon_semaphore_free(rdev, &sem, *fence); 728 - } 729 - 730 - void r600_kms_blit_copy(struct radeon_device *rdev, 731 - u64 src_gpu_addr, u64 dst_gpu_addr, 732 - unsigned num_gpu_pages, 733 - struct radeon_sa_bo *vb) 734 - { 735 - u64 vb_gpu_addr; 736 - u32 *vb_cpu_addr; 737 - 738 - DRM_DEBUG("emitting copy %16llx %16llx %d\n", 739 - src_gpu_addr, dst_gpu_addr, num_gpu_pages); 740 - vb_cpu_addr = (u32 *)radeon_sa_bo_cpu_addr(vb); 741 - vb_gpu_addr = radeon_sa_bo_gpu_addr(vb); 742 - 743 - while (num_gpu_pages) { 744 - int w, h; 745 - unsigned size_in_bytes; 746 - unsigned pages_per_loop = 747 - r600_blit_create_rect(num_gpu_pages, &w, &h, 748 - rdev->r600_blit.max_dim); 749 - 750 - size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE; 751 - DRM_DEBUG("rectangle w=%d h=%d\n", w, h); 752 - 753 - vb_cpu_addr[0] = 0; 754 - vb_cpu_addr[1] = 0; 755 - vb_cpu_addr[2] = 0; 756 - vb_cpu_addr[3] = 0; 757 - 758 - vb_cpu_addr[4] = 0; 759 - vb_cpu_addr[5] = int2float(h); 760 - vb_cpu_addr[6] = 0; 761 - vb_cpu_addr[7] = int2float(h); 762 - 763 - vb_cpu_addr[8] = int2float(w); 764 - vb_cpu_addr[9] = int2float(h); 765 - vb_cpu_addr[10] = int2float(w); 766 - vb_cpu_addr[11] = int2float(h); 767 - 768 - rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8, 769 - w, h, w, src_gpu_addr, size_in_bytes); 770 - rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8, 771 - w, h, dst_gpu_addr); 772 - rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h); 773 - rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr); 774 - rdev->r600_blit.primitives.draw_auto(rdev); 775 - rdev->r600_blit.primitives.cp_set_surface_sync(rdev, 776 - PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, 777 - size_in_bytes, dst_gpu_addr); 778 - 779 - vb_cpu_addr += 12; 780 - vb_gpu_addr += 4*12; 781 - src_gpu_addr += size_in_bytes; 782 - dst_gpu_addr += size_in_bytes; 783 - num_gpu_pages -= pages_per_loop; 784 - } 785 - }
-1
drivers/gpu/drm/radeon/r600_blit_shaders.h
··· 35 35 extern const u32 r6xx_ps_size, r6xx_vs_size; 36 36 extern const u32 r6xx_default_size, r7xx_default_size; 37 37 38 - __pure uint32_t int2float(uint32_t x); 39 38 #endif
-30
drivers/gpu/drm/radeon/radeon.h
··· 844 844 bool enabled; 845 845 }; 846 846 847 - struct r600_blit_cp_primitives { 848 - void (*set_render_target)(struct radeon_device *rdev, int format, 849 - int w, int h, u64 gpu_addr); 850 - void (*cp_set_surface_sync)(struct radeon_device *rdev, 851 - u32 sync_type, u32 size, 852 - u64 mc_addr); 853 - void (*set_shaders)(struct radeon_device *rdev); 854 - void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr); 855 - void (*set_tex_resource)(struct radeon_device *rdev, 856 - int format, int w, int h, int pitch, 857 - u64 gpu_addr, u32 size); 858 - void (*set_scissors)(struct radeon_device *rdev, int x1, int y1, 859 - int x2, int y2); 860 - void (*draw_auto)(struct radeon_device *rdev); 861 - void (*set_default_state)(struct radeon_device *rdev); 862 - }; 863 - 864 - struct r600_blit { 865 - struct radeon_bo *shader_obj; 866 - struct r600_blit_cp_primitives primitives; 867 - int max_dim; 868 - int ring_size_common; 869 - int ring_size_per_loop; 870 - u64 shader_gpu_addr; 871 - u32 vs_offset, ps_offset; 872 - u32 state_offset; 873 - u32 state_len; 874 - }; 875 - 876 847 /* 877 848 * RLC stuff 878 849 */ ··· 2037 2066 const struct firmware *sdma_fw; /* CIK SDMA firmware */ 2038 2067 const struct firmware *smc_fw; /* SMC firmware */ 2039 2068 const struct firmware *uvd_fw; /* UVD firmware */ 2040 - struct r600_blit r600_blit; 2041 2069 struct r600_vram_scratch vram_scratch; 2042 2070 int msi_enabled; /* msi enabled */ 2043 2071 struct r600_ih ih; /* r6/700 interrupt ring */
-16
drivers/gpu/drm/radeon/radeon_asic.h
··· 337 337 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); 338 338 int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); 339 339 int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); 340 - int r600_copy_blit(struct radeon_device *rdev, 341 - uint64_t src_offset, uint64_t dst_offset, 342 - unsigned num_gpu_pages, struct radeon_fence **fence); 343 340 int r600_copy_cpdma(struct radeon_device *rdev, 344 341 uint64_t src_offset, uint64_t dst_offset, 345 342 unsigned num_gpu_pages, struct radeon_fence **fence); ··· 368 371 int r600_mc_wait_for_idle(struct radeon_device *rdev); 369 372 int r600_pcie_gart_init(struct radeon_device *rdev); 370 373 void r600_scratch_init(struct radeon_device *rdev); 371 - int r600_blit_init(struct radeon_device *rdev); 372 - void r600_blit_fini(struct radeon_device *rdev); 373 374 int r600_init_microcode(struct radeon_device *rdev); 374 375 /* r600 irq */ 375 376 int r600_irq_process(struct radeon_device *rdev); ··· 386 391 void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); 387 392 void r600_hdmi_enable(struct drm_encoder *encoder, bool enable); 388 393 void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode); 389 - /* r600 blit */ 390 - int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, 391 - struct radeon_fence **fence, struct radeon_sa_bo **vb, 392 - struct radeon_semaphore **sem); 393 - void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, 394 - struct radeon_sa_bo *vb, struct radeon_semaphore *sem); 395 - void r600_kms_blit_copy(struct radeon_device *rdev, 396 - u64 src_gpu_addr, u64 dst_gpu_addr, 397 - unsigned num_gpu_pages, 398 - struct radeon_sa_bo *vb); 399 394 int r600_mc_wait_for_idle(struct radeon_device *rdev); 400 395 u32 r600_get_xclk(struct radeon_device *rdev); 401 396 uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev); ··· 515 530 extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc); 516 531 extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc); 517 532 void evergreen_disable_interrupt_state(struct radeon_device *rdev); 518 - int evergreen_blit_init(struct radeon_device *rdev); 519 533 int evergreen_mc_wait_for_idle(struct radeon_device *rdev); 520 534 void evergreen_dma_fence_ring_emit(struct radeon_device *rdev, 521 535 struct radeon_fence *fence);
-44
drivers/gpu/drm/radeon/radeon_blit_common.h
··· 1 - /* 2 - * Copyright 2009 Advanced Micro Devices, Inc. 3 - * Copyright 2009 Red Hat Inc. 4 - * Copyright 2012 Alcatel-Lucent, Inc. 5 - * 6 - * Permission is hereby granted, free of charge, to any person obtaining a 7 - * copy of this software and associated documentation files (the "Software"), 8 - * to deal in the Software without restriction, including without limitation 9 - * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 - * and/or sell copies of the Software, and to permit persons to whom the 11 - * Software is furnished to do so, subject to the following conditions: 12 - * 13 - * The above copyright notice and this permission notice (including the next 14 - * paragraph) shall be included in all copies or substantial portions of the 15 - * Software. 16 - * 17 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21 - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 - * DEALINGS IN THE SOFTWARE. 24 - * 25 - */ 26 - 27 - #ifndef __RADEON_BLIT_COMMON_H__ 28 - 29 - #define DI_PT_RECTLIST 0x11 30 - #define DI_INDEX_SIZE_16_BIT 0x0 31 - #define DI_SRC_SEL_AUTO_INDEX 0x2 32 - 33 - #define FMT_8 0x1 34 - #define FMT_5_6_5 0x8 35 - #define FMT_8_8_8_8 0x1a 36 - #define COLOR_8 0x1 37 - #define COLOR_5_6_5 0x8 38 - #define COLOR_8_8_8_8 0x1a 39 - 40 - #define RECT_UNIT_H 32 41 - #define RECT_UNIT_W (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H) 42 - 43 - #define __RADEON_BLIT_COMMON_H__ 44 - #endif
-7
drivers/gpu/drm/radeon/rv770.c
··· 1852 1852 } 1853 1853 1854 1854 rv770_gpu_init(rdev); 1855 - r = r600_blit_init(rdev); 1856 - if (r) { 1857 - r600_blit_fini(rdev); 1858 - rdev->asic->copy.copy = NULL; 1859 - dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); 1860 - } 1861 1855 1862 1856 /* allocate wb buffer */ 1863 1857 r = radeon_wb_init(rdev); ··· 2086 2092 2087 2093 void rv770_fini(struct radeon_device *rdev) 2088 2094 { 2089 - r600_blit_fini(rdev); 2090 2095 r700_cp_fini(rdev); 2091 2096 r600_dma_fini(rdev); 2092 2097 r600_irq_fini(rdev);