Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/radeon/kms: add blit support for cayman (v2)

Allows us to use the 3D engine for memory management
and allows us to use vram beyond the BAR aperture.

v2: fix copy paste typo
Reported-by: Nils Wallménius <nils.wallmenius@gmail.com>

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

authored by

Alex Deucher and committed by
Dave Airlie
cb92d452 ac10f81d

+597 -254
+322 -4
drivers/gpu/drm/radeon/cayman_blit_shaders.c
··· 39 39 40 40 const u32 cayman_default_state[] = 41 41 { 42 - /* XXX fill in additional blit state */ 42 + 0xc0066900, 43 + 0x00000000, 44 + 0x00000060, /* DB_RENDER_CONTROL */ 45 + 0x00000000, /* DB_COUNT_CONTROL */ 46 + 0x00000000, /* DB_DEPTH_VIEW */ 47 + 0x0000002a, /* DB_RENDER_OVERRIDE */ 48 + 0x00000000, /* DB_RENDER_OVERRIDE2 */ 49 + 0x00000000, /* DB_HTILE_DATA_BASE */ 43 50 44 51 0xc0026900, 45 - 0x00000316, 46 - 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 47 - 0x00000010, /* */ 52 + 0x0000000a, 53 + 0x00000000, /* DB_STENCIL_CLEAR */ 54 + 0x00000000, /* DB_DEPTH_CLEAR */ 55 + 56 + 0xc0036900, 57 + 0x0000000f, 58 + 0x00000000, /* DB_DEPTH_INFO */ 59 + 0x00000000, /* DB_Z_INFO */ 60 + 0x00000000, /* DB_STENCIL_INFO */ 61 + 62 + 0xc0016900, 63 + 0x00000080, 64 + 0x00000000, /* PA_SC_WINDOW_OFFSET */ 65 + 66 + 0xc00d6900, 67 + 0x00000083, 68 + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ 69 + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ 70 + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ 71 + 0x00000000, 72 + 0x20002000, 73 + 0x00000000, 74 + 0x20002000, 75 + 0x00000000, 76 + 0x20002000, 77 + 0xaaaaaaaa, /* PA_SC_EDGERULE */ 78 + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ 79 + 0x0000000f, /* CB_TARGET_MASK */ 80 + 0x0000000f, /* CB_SHADER_MASK */ 81 + 82 + 0xc0226900, 83 + 0x00000094, 84 + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ 85 + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ 86 + 0x80000000, 87 + 0x20002000, 88 + 0x80000000, 89 + 0x20002000, 90 + 0x80000000, 91 + 0x20002000, 92 + 0x80000000, 93 + 0x20002000, 94 + 0x80000000, 95 + 0x20002000, 96 + 0x80000000, 97 + 0x20002000, 98 + 0x80000000, 99 + 0x20002000, 100 + 0x80000000, 101 + 0x20002000, 102 + 0x80000000, 103 + 0x20002000, 104 + 0x80000000, 105 + 0x20002000, 106 + 0x80000000, 107 + 0x20002000, 108 + 0x80000000, 109 + 0x20002000, 110 + 0x80000000, 111 + 0x20002000, 112 + 0x80000000, 113 + 0x20002000, 114 + 0x80000000, 115 + 0x20002000, 116 + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ 117 + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ 118 + 119 + 0xc0016900, 120 + 0x000000d4, 121 + 0x00000000, /* SX_MISC */ 48 122 49 123 0xc0026900, 50 124 0x000000d9, 51 125 0x00000000, /* CP_RINGID */ 52 126 0x00000000, /* CP_VMID */ 127 + 128 + 0xc0096900, 129 + 0x00000100, 130 + 0x00ffffff, /* VGT_MAX_VTX_INDX */ 131 + 0x00000000, /* VGT_MIN_VTX_INDX */ 132 + 0x00000000, /* VGT_INDX_OFFSET */ 133 + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ 134 + 0x00000000, /* SX_ALPHA_TEST_CONTROL */ 135 + 0x00000000, /* CB_BLEND_RED */ 136 + 0x00000000, /* CB_BLEND_GREEN */ 137 + 0x00000000, /* CB_BLEND_BLUE */ 138 + 0x00000000, /* CB_BLEND_ALPHA */ 139 + 140 + 0xc0016900, 141 + 0x00000187, 142 + 0x00000100, /* SPI_VS_OUT_ID_0 */ 143 + 144 + 0xc0026900, 145 + 0x00000191, 146 + 0x00000100, /* SPI_PS_INPUT_CNTL_0 */ 147 + 0x00000101, /* SPI_PS_INPUT_CNTL_1 */ 148 + 149 + 0xc0016900, 150 + 0x000001b1, 151 + 0x00000000, /* SPI_VS_OUT_CONFIG */ 152 + 153 + 0xc0106900, 154 + 0x000001b3, 155 + 0x20000001, /* SPI_PS_IN_CONTROL_0 */ 156 + 0x00000000, /* SPI_PS_IN_CONTROL_1 */ 157 + 0x00000000, /* SPI_INTERP_CONTROL_0 */ 158 + 0x00000000, /* SPI_INPUT_Z */ 159 + 0x00000000, /* SPI_FOG_CNTL */ 160 + 0x00100000, /* SPI_BARYC_CNTL */ 161 + 0x00000000, /* SPI_PS_IN_CONTROL_2 */ 162 + 0x00000000, /* SPI_COMPUTE_INPUT_CNTL */ 163 + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */ 164 + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */ 165 + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */ 166 + 0x00000000, /* SPI_GPR_MGMT */ 167 + 0x00000000, /* SPI_LDS_MGMT */ 168 + 0x00000000, /* SPI_STACK_MGMT */ 169 + 0x00000000, /* SPI_WAVE_MGMT_1 */ 170 + 0x00000000, /* SPI_WAVE_MGMT_2 */ 171 + 172 + 0xc0016900, 173 + 0x000001e0, 174 + 0x00000000, /* CB_BLEND0_CONTROL */ 175 + 176 + 0xc00e6900, 177 + 0x00000200, 178 + 0x00000000, /* DB_DEPTH_CONTROL */ 179 + 0x00000000, /* DB_EQAA */ 180 + 0x00cc0010, /* CB_COLOR_CONTROL */ 181 + 0x00000210, /* DB_SHADER_CONTROL */ 182 + 0x00010000, /* PA_CL_CLIP_CNTL */ 183 + 0x00000004, /* PA_SU_SC_MODE_CNTL */ 184 + 0x00000100, /* PA_CL_VTE_CNTL */ 185 + 0x00000000, /* PA_CL_VS_OUT_CNTL */ 186 + 0x00000000, /* PA_CL_NANINF_CNTL */ 187 + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ 188 + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ 189 + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ 190 + 0x00000000, /* */ 191 + 0x00000000, /* */ 192 + 193 + 0xc0026900, 194 + 0x00000229, 195 + 0x00000000, /* SQ_PGM_START_FS */ 196 + 0x00000000, 197 + 198 + 0xc0016900, 199 + 0x0000023b, 200 + 0x00000000, /* SQ_LDS_ALLOC_PS */ 201 + 202 + 0xc0066900, 203 + 0x00000240, 204 + 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ 205 + 0x00000000, 206 + 0x00000000, 207 + 0x00000000, 208 + 0x00000000, 209 + 0x00000000, 210 + 211 + 0xc0046900, 212 + 0x00000247, 213 + 0x00000000, /* SQ_GS_VERT_ITEMSIZE */ 214 + 0x00000000, 215 + 0x00000000, 216 + 0x00000000, 217 + 218 + 0xc0116900, 219 + 0x00000280, 220 + 0x00000000, /* PA_SU_POINT_SIZE */ 221 + 0x00000000, /* PA_SU_POINT_MINMAX */ 222 + 0x00000008, /* PA_SU_LINE_CNTL */ 223 + 0x00000000, /* PA_SC_LINE_STIPPLE */ 224 + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ 225 + 0x00000000, /* VGT_HOS_CNTL */ 226 + 0x00000000, 227 + 0x00000000, 228 + 0x00000000, 229 + 0x00000000, 230 + 0x00000000, 231 + 0x00000000, 232 + 0x00000000, 233 + 0x00000000, 234 + 0x00000000, 235 + 0x00000000, 236 + 0x00000000, /* VGT_GS_MODE */ 237 + 238 + 0xc0026900, 239 + 0x00000292, 240 + 0x00000000, /* PA_SC_MODE_CNTL_0 */ 241 + 0x00000000, /* PA_SC_MODE_CNTL_1 */ 242 + 243 + 0xc0016900, 244 + 0x000002a1, 245 + 0x00000000, /* VGT_PRIMITIVEID_EN */ 246 + 247 + 0xc0016900, 248 + 0x000002a5, 249 + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ 250 + 251 + 0xc0026900, 252 + 0x000002a8, 253 + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ 254 + 0x00000000, 255 + 256 + 0xc0026900, 257 + 0x000002ad, 258 + 0x00000000, /* VGT_REUSE_OFF */ 259 + 0x00000000, 260 + 261 + 0xc0016900, 262 + 0x000002d5, 263 + 0x00000000, /* VGT_SHADER_STAGES_EN */ 264 + 265 + 0xc0016900, 266 + 0x000002dc, 267 + 0x0000aa00, /* DB_ALPHA_TO_MASK */ 268 + 269 + 0xc0066900, 270 + 0x000002de, 271 + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ 272 + 0x00000000, 273 + 0x00000000, 274 + 0x00000000, 275 + 0x00000000, 276 + 0x00000000, 277 + 278 + 0xc0026900, 279 + 0x000002e5, 280 + 0x00000000, /* VGT_STRMOUT_CONFIG */ 281 + 0x00000000, 282 + 283 + 0xc01b6900, 284 + 0x000002f5, 285 + 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ 286 + 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ 287 + 0x00000000, /* PA_SC_LINE_CNTL */ 288 + 0x00000000, /* PA_SC_AA_CONFIG */ 289 + 0x00000005, /* PA_SU_VTX_CNTL */ 290 + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ 291 + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ 292 + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ 293 + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ 294 + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 295 + 0x00000000, 296 + 0x00000000, 297 + 0x00000000, 298 + 0x00000000, 299 + 0x00000000, 300 + 0x00000000, 301 + 0x00000000, 302 + 0x00000000, 303 + 0x00000000, 304 + 0x00000000, 305 + 0x00000000, 306 + 0x00000000, 307 + 0x00000000, 308 + 0x00000000, 309 + 0x00000000, 310 + 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ 311 + 0xffffffff, 312 + 313 + 0xc0026900, 314 + 0x00000316, 315 + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 316 + 0x00000010, /* */ 53 317 }; 54 318 319 + const u32 cayman_vs[] = 320 + { 321 + 0x00000004, 322 + 0x80400400, 323 + 0x0000a03c, 324 + 0x95000688, 325 + 0x00004000, 326 + 0x15000688, 327 + 0x00000000, 328 + 0x88000000, 329 + 0x04000000, 330 + 0x67961001, 331 + #ifdef __BIG_ENDIAN 332 + 0x00020000, 333 + #else 334 + 0x00000000, 335 + #endif 336 + 0x00000000, 337 + 0x04000000, 338 + 0x67961000, 339 + #ifdef __BIG_ENDIAN 340 + 0x00020008, 341 + #else 342 + 0x00000008, 343 + #endif 344 + 0x00000000, 345 + }; 346 + 347 + const u32 cayman_ps[] = 348 + { 349 + 0x00000004, 350 + 0xa00c0000, 351 + 0x00000008, 352 + 0x80400000, 353 + 0x00000000, 354 + 0x95000688, 355 + 0x00000000, 356 + 0x88000000, 357 + 0x00380400, 358 + 0x00146b10, 359 + 0x00380000, 360 + 0x20146b10, 361 + 0x00380400, 362 + 0x40146b00, 363 + 0x80380000, 364 + 0x60146b00, 365 + 0x00000010, 366 + 0x000d1000, 367 + 0xb0800000, 368 + 0x00000000, 369 + }; 370 + 371 + const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps); 372 + const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs); 55 373 const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
+3
drivers/gpu/drm/radeon/cayman_blit_shaders.h
··· 25 25 #ifndef CAYMAN_BLIT_SHADERS_H 26 26 #define CAYMAN_BLIT_SHADERS_H 27 27 28 + extern const u32 cayman_ps[]; 29 + extern const u32 cayman_vs[]; 28 30 extern const u32 cayman_default_state[]; 29 31 32 + extern const u32 cayman_ps_size, cayman_vs_size; 30 33 extern const u32 cayman_default_size; 31 34 32 35 #endif
+264 -239
drivers/gpu/drm/radeon/evergreen_blit_kms.c
··· 31 31 32 32 #include "evergreend.h" 33 33 #include "evergreen_blit_shaders.h" 34 + #include "cayman_blit_shaders.h" 34 35 35 36 #define DI_PT_RECTLIST 0x11 36 37 #define DI_INDEX_SIZE_16_BIT 0x0 ··· 266 265 u64 gpu_addr; 267 266 int dwords; 268 267 269 - switch (rdev->family) { 270 - case CHIP_CEDAR: 271 - default: 272 - num_ps_gprs = 93; 273 - num_vs_gprs = 46; 274 - num_temp_gprs = 4; 275 - num_gs_gprs = 31; 276 - num_es_gprs = 31; 277 - num_hs_gprs = 23; 278 - num_ls_gprs = 23; 279 - num_ps_threads = 96; 280 - num_vs_threads = 16; 281 - num_gs_threads = 16; 282 - num_es_threads = 16; 283 - num_hs_threads = 16; 284 - num_ls_threads = 16; 285 - num_ps_stack_entries = 42; 286 - num_vs_stack_entries = 42; 287 - num_gs_stack_entries = 42; 288 - num_es_stack_entries = 42; 289 - num_hs_stack_entries = 42; 290 - num_ls_stack_entries = 42; 291 - break; 292 - case CHIP_REDWOOD: 293 - num_ps_gprs = 93; 294 - num_vs_gprs = 46; 295 - num_temp_gprs = 4; 296 - num_gs_gprs = 31; 297 - num_es_gprs = 31; 298 - num_hs_gprs = 23; 299 - num_ls_gprs = 23; 300 - num_ps_threads = 128; 301 - num_vs_threads = 20; 302 - num_gs_threads = 20; 303 - num_es_threads = 20; 304 - num_hs_threads = 20; 305 - num_ls_threads = 20; 306 - num_ps_stack_entries = 42; 307 - num_vs_stack_entries = 42; 308 - num_gs_stack_entries = 42; 309 - num_es_stack_entries = 42; 310 - num_hs_stack_entries = 42; 311 - num_ls_stack_entries = 42; 312 - break; 313 - case CHIP_JUNIPER: 314 - num_ps_gprs = 93; 315 - num_vs_gprs = 46; 316 - num_temp_gprs = 4; 317 - num_gs_gprs = 31; 318 - num_es_gprs = 31; 319 - num_hs_gprs = 23; 320 - num_ls_gprs = 23; 321 - num_ps_threads = 128; 322 - num_vs_threads = 20; 323 - num_gs_threads = 20; 324 - num_es_threads = 20; 325 - num_hs_threads = 20; 326 - num_ls_threads = 20; 327 - num_ps_stack_entries = 85; 328 - num_vs_stack_entries = 85; 329 - num_gs_stack_entries = 85; 330 - num_es_stack_entries = 85; 331 - num_hs_stack_entries = 85; 332 - num_ls_stack_entries = 85; 333 - break; 334 - case CHIP_CYPRESS: 335 - case CHIP_HEMLOCK: 336 - num_ps_gprs = 93; 337 - num_vs_gprs = 46; 338 - num_temp_gprs = 4; 339 - num_gs_gprs = 31; 340 - num_es_gprs = 31; 341 - num_hs_gprs = 23; 342 - num_ls_gprs = 23; 343 - num_ps_threads = 128; 344 - num_vs_threads = 20; 345 - num_gs_threads = 20; 346 - num_es_threads = 20; 347 - num_hs_threads = 20; 348 - num_ls_threads = 20; 349 - num_ps_stack_entries = 85; 350 - num_vs_stack_entries = 85; 351 - num_gs_stack_entries = 85; 352 - num_es_stack_entries = 85; 353 - num_hs_stack_entries = 85; 354 - num_ls_stack_entries = 85; 355 - break; 356 - case CHIP_PALM: 357 - num_ps_gprs = 93; 358 - num_vs_gprs = 46; 359 - num_temp_gprs = 4; 360 - num_gs_gprs = 31; 361 - num_es_gprs = 31; 362 - num_hs_gprs = 23; 363 - num_ls_gprs = 23; 364 - num_ps_threads = 96; 365 - num_vs_threads = 16; 366 - num_gs_threads = 16; 367 - num_es_threads = 16; 368 - num_hs_threads = 16; 369 - num_ls_threads = 16; 370 - num_ps_stack_entries = 42; 371 - num_vs_stack_entries = 42; 372 - num_gs_stack_entries = 42; 373 - num_es_stack_entries = 42; 374 - num_hs_stack_entries = 42; 375 - num_ls_stack_entries = 42; 376 - break; 377 - case CHIP_BARTS: 378 - num_ps_gprs = 93; 379 - num_vs_gprs = 46; 380 - num_temp_gprs = 4; 381 - num_gs_gprs = 31; 382 - num_es_gprs = 31; 383 - num_hs_gprs = 23; 384 - num_ls_gprs = 23; 385 - num_ps_threads = 128; 386 - num_vs_threads = 20; 387 - num_gs_threads = 20; 388 - num_es_threads = 20; 389 - num_hs_threads = 20; 390 - num_ls_threads = 20; 391 - num_ps_stack_entries = 85; 392 - num_vs_stack_entries = 85; 393 - num_gs_stack_entries = 85; 394 - num_es_stack_entries = 85; 395 - num_hs_stack_entries = 85; 396 - num_ls_stack_entries = 85; 397 - break; 398 - case CHIP_TURKS: 399 - num_ps_gprs = 93; 400 - num_vs_gprs = 46; 401 - num_temp_gprs = 4; 402 - num_gs_gprs = 31; 403 - num_es_gprs = 31; 404 - num_hs_gprs = 23; 405 - num_ls_gprs = 23; 406 - num_ps_threads = 128; 407 - num_vs_threads = 20; 408 - num_gs_threads = 20; 409 - num_es_threads = 20; 410 - num_hs_threads = 20; 411 - num_ls_threads = 20; 412 - num_ps_stack_entries = 42; 413 - num_vs_stack_entries = 42; 414 - num_gs_stack_entries = 42; 415 - num_es_stack_entries = 42; 416 - num_hs_stack_entries = 42; 417 - num_ls_stack_entries = 42; 418 - break; 419 - case CHIP_CAICOS: 420 - num_ps_gprs = 93; 421 - num_vs_gprs = 46; 422 - num_temp_gprs = 4; 423 - num_gs_gprs = 31; 424 - num_es_gprs = 31; 425 - num_hs_gprs = 23; 426 - num_ls_gprs = 23; 427 - num_ps_threads = 128; 428 - num_vs_threads = 10; 429 - num_gs_threads = 10; 430 - num_es_threads = 10; 431 - num_hs_threads = 10; 432 - num_ls_threads = 10; 433 - num_ps_stack_entries = 42; 434 - num_vs_stack_entries = 42; 435 - num_gs_stack_entries = 42; 436 - num_es_stack_entries = 42; 437 - num_hs_stack_entries = 42; 438 - num_ls_stack_entries = 42; 439 - break; 440 - } 441 - 442 - if ((rdev->family == CHIP_CEDAR) || 443 - (rdev->family == CHIP_PALM) || 444 - (rdev->family == CHIP_CAICOS)) 445 - sq_config = 0; 446 - else 447 - sq_config = VC_ENABLE; 448 - 449 - sq_config |= (EXPORT_SRC_C | 450 - CS_PRIO(0) | 451 - LS_PRIO(0) | 452 - HS_PRIO(0) | 453 - PS_PRIO(0) | 454 - VS_PRIO(1) | 455 - GS_PRIO(2) | 456 - ES_PRIO(3)); 457 - 458 - sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | 459 - NUM_VS_GPRS(num_vs_gprs) | 460 - NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); 461 - sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | 462 - NUM_ES_GPRS(num_es_gprs)); 463 - sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) | 464 - NUM_LS_GPRS(num_ls_gprs)); 465 - sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | 466 - NUM_VS_THREADS(num_vs_threads) | 467 - NUM_GS_THREADS(num_gs_threads) | 468 - NUM_ES_THREADS(num_es_threads)); 469 - sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) | 470 - NUM_LS_THREADS(num_ls_threads)); 471 - sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | 472 - NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); 473 - sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | 474 - NUM_ES_STACK_ENTRIES(num_es_stack_entries)); 475 - sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | 476 - NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); 477 - 478 268 /* set clear context state */ 479 269 radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0)); 480 270 radeon_ring_write(rdev, 0); 481 271 482 - /* disable dyn gprs */ 483 - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 484 - radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); 485 - radeon_ring_write(rdev, 0); 272 + if (rdev->family < CHIP_CAYMAN) { 273 + switch (rdev->family) { 274 + case CHIP_CEDAR: 275 + default: 276 + num_ps_gprs = 93; 277 + num_vs_gprs = 46; 278 + num_temp_gprs = 4; 279 + num_gs_gprs = 31; 280 + num_es_gprs = 31; 281 + num_hs_gprs = 23; 282 + num_ls_gprs = 23; 283 + num_ps_threads = 96; 284 + num_vs_threads = 16; 285 + num_gs_threads = 16; 286 + num_es_threads = 16; 287 + num_hs_threads = 16; 288 + num_ls_threads = 16; 289 + num_ps_stack_entries = 42; 290 + num_vs_stack_entries = 42; 291 + num_gs_stack_entries = 42; 292 + num_es_stack_entries = 42; 293 + num_hs_stack_entries = 42; 294 + num_ls_stack_entries = 42; 295 + break; 296 + case CHIP_REDWOOD: 297 + num_ps_gprs = 93; 298 + num_vs_gprs = 46; 299 + num_temp_gprs = 4; 300 + num_gs_gprs = 31; 301 + num_es_gprs = 31; 302 + num_hs_gprs = 23; 303 + num_ls_gprs = 23; 304 + num_ps_threads = 128; 305 + num_vs_threads = 20; 306 + num_gs_threads = 20; 307 + num_es_threads = 20; 308 + num_hs_threads = 20; 309 + num_ls_threads = 20; 310 + num_ps_stack_entries = 42; 311 + num_vs_stack_entries = 42; 312 + num_gs_stack_entries = 42; 313 + num_es_stack_entries = 42; 314 + num_hs_stack_entries = 42; 315 + num_ls_stack_entries = 42; 316 + break; 317 + case CHIP_JUNIPER: 318 + num_ps_gprs = 93; 319 + num_vs_gprs = 46; 320 + num_temp_gprs = 4; 321 + num_gs_gprs = 31; 322 + num_es_gprs = 31; 323 + num_hs_gprs = 23; 324 + num_ls_gprs = 23; 325 + num_ps_threads = 128; 326 + num_vs_threads = 20; 327 + num_gs_threads = 20; 328 + num_es_threads = 20; 329 + num_hs_threads = 20; 330 + num_ls_threads = 20; 331 + num_ps_stack_entries = 85; 332 + num_vs_stack_entries = 85; 333 + num_gs_stack_entries = 85; 334 + num_es_stack_entries = 85; 335 + num_hs_stack_entries = 85; 336 + num_ls_stack_entries = 85; 337 + break; 338 + case CHIP_CYPRESS: 339 + case CHIP_HEMLOCK: 340 + num_ps_gprs = 93; 341 + num_vs_gprs = 46; 342 + num_temp_gprs = 4; 343 + num_gs_gprs = 31; 344 + num_es_gprs = 31; 345 + num_hs_gprs = 23; 346 + num_ls_gprs = 23; 347 + num_ps_threads = 128; 348 + num_vs_threads = 20; 349 + num_gs_threads = 20; 350 + num_es_threads = 20; 351 + num_hs_threads = 20; 352 + num_ls_threads = 20; 353 + num_ps_stack_entries = 85; 354 + num_vs_stack_entries = 85; 355 + num_gs_stack_entries = 85; 356 + num_es_stack_entries = 85; 357 + num_hs_stack_entries = 85; 358 + num_ls_stack_entries = 85; 359 + break; 360 + case CHIP_PALM: 361 + num_ps_gprs = 93; 362 + num_vs_gprs = 46; 363 + num_temp_gprs = 4; 364 + num_gs_gprs = 31; 365 + num_es_gprs = 31; 366 + num_hs_gprs = 23; 367 + num_ls_gprs = 23; 368 + num_ps_threads = 96; 369 + num_vs_threads = 16; 370 + num_gs_threads = 16; 371 + num_es_threads = 16; 372 + num_hs_threads = 16; 373 + num_ls_threads = 16; 374 + num_ps_stack_entries = 42; 375 + num_vs_stack_entries = 42; 376 + num_gs_stack_entries = 42; 377 + num_es_stack_entries = 42; 378 + num_hs_stack_entries = 42; 379 + num_ls_stack_entries = 42; 380 + break; 381 + case CHIP_BARTS: 382 + num_ps_gprs = 93; 383 + num_vs_gprs = 46; 384 + num_temp_gprs = 4; 385 + num_gs_gprs = 31; 386 + num_es_gprs = 31; 387 + num_hs_gprs = 23; 388 + num_ls_gprs = 23; 389 + num_ps_threads = 128; 390 + num_vs_threads = 20; 391 + num_gs_threads = 20; 392 + num_es_threads = 20; 393 + num_hs_threads = 20; 394 + num_ls_threads = 20; 395 + num_ps_stack_entries = 85; 396 + num_vs_stack_entries = 85; 397 + num_gs_stack_entries = 85; 398 + num_es_stack_entries = 85; 399 + num_hs_stack_entries = 85; 400 + num_ls_stack_entries = 85; 401 + break; 402 + case CHIP_TURKS: 403 + num_ps_gprs = 93; 404 + num_vs_gprs = 46; 405 + num_temp_gprs = 4; 406 + num_gs_gprs = 31; 407 + num_es_gprs = 31; 408 + num_hs_gprs = 23; 409 + num_ls_gprs = 23; 410 + num_ps_threads = 128; 411 + num_vs_threads = 20; 412 + num_gs_threads = 20; 413 + num_es_threads = 20; 414 + num_hs_threads = 20; 415 + num_ls_threads = 20; 416 + num_ps_stack_entries = 42; 417 + num_vs_stack_entries = 42; 418 + num_gs_stack_entries = 42; 419 + num_es_stack_entries = 42; 420 + num_hs_stack_entries = 42; 421 + num_ls_stack_entries = 42; 422 + break; 423 + case CHIP_CAICOS: 424 + num_ps_gprs = 93; 425 + num_vs_gprs = 46; 426 + num_temp_gprs = 4; 427 + num_gs_gprs = 31; 428 + num_es_gprs = 31; 429 + num_hs_gprs = 23; 430 + num_ls_gprs = 23; 431 + num_ps_threads = 128; 432 + num_vs_threads = 10; 433 + num_gs_threads = 10; 434 + num_es_threads = 10; 435 + num_hs_threads = 10; 436 + num_ls_threads = 10; 437 + num_ps_stack_entries = 42; 438 + num_vs_stack_entries = 42; 439 + num_gs_stack_entries = 42; 440 + num_es_stack_entries = 42; 441 + num_hs_stack_entries = 42; 442 + num_ls_stack_entries = 42; 443 + break; 444 + } 486 445 487 - /* SQ config */ 488 - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11)); 489 - radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); 490 - radeon_ring_write(rdev, sq_config); 491 - radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); 492 - radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); 493 - radeon_ring_write(rdev, sq_gpr_resource_mgmt_3); 494 - radeon_ring_write(rdev, 0); 495 - radeon_ring_write(rdev, 0); 496 - radeon_ring_write(rdev, sq_thread_resource_mgmt); 497 - radeon_ring_write(rdev, sq_thread_resource_mgmt_2); 498 - radeon_ring_write(rdev, sq_stack_resource_mgmt_1); 499 - radeon_ring_write(rdev, sq_stack_resource_mgmt_2); 500 - radeon_ring_write(rdev, sq_stack_resource_mgmt_3); 446 + if ((rdev->family == CHIP_CEDAR) || 447 + (rdev->family == CHIP_PALM) || 448 + (rdev->family == CHIP_CAICOS)) 449 + sq_config = 0; 450 + else 451 + sq_config = VC_ENABLE; 452 + 453 + sq_config |= (EXPORT_SRC_C | 454 + CS_PRIO(0) | 455 + LS_PRIO(0) | 456 + HS_PRIO(0) | 457 + PS_PRIO(0) | 458 + VS_PRIO(1) | 459 + GS_PRIO(2) | 460 + ES_PRIO(3)); 461 + 462 + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | 463 + NUM_VS_GPRS(num_vs_gprs) | 464 + NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); 465 + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | 466 + NUM_ES_GPRS(num_es_gprs)); 467 + sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) | 468 + NUM_LS_GPRS(num_ls_gprs)); 469 + sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | 470 + NUM_VS_THREADS(num_vs_threads) | 471 + NUM_GS_THREADS(num_gs_threads) | 472 + NUM_ES_THREADS(num_es_threads)); 473 + sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) | 474 + NUM_LS_THREADS(num_ls_threads)); 475 + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | 476 + NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); 477 + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | 478 + NUM_ES_STACK_ENTRIES(num_es_stack_entries)); 479 + sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | 480 + NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); 481 + 482 + /* disable dyn gprs */ 483 + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 484 + radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); 485 + radeon_ring_write(rdev, 0); 486 + 487 + /* SQ config */ 488 + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11)); 489 + radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); 490 + radeon_ring_write(rdev, sq_config); 491 + radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); 492 + radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); 493 + radeon_ring_write(rdev, sq_gpr_resource_mgmt_3); 494 + radeon_ring_write(rdev, 0); 495 + radeon_ring_write(rdev, 0); 496 + radeon_ring_write(rdev, sq_thread_resource_mgmt); 497 + radeon_ring_write(rdev, sq_thread_resource_mgmt_2); 498 + radeon_ring_write(rdev, sq_stack_resource_mgmt_1); 499 + radeon_ring_write(rdev, sq_stack_resource_mgmt_2); 500 + radeon_ring_write(rdev, sq_stack_resource_mgmt_3); 501 + } 501 502 502 503 /* CONTEXT_CONTROL */ 503 504 radeon_ring_write(rdev, 0xc0012800); ··· 573 570 mutex_init(&rdev->r600_blit.mutex); 574 571 rdev->r600_blit.state_offset = 0; 575 572 576 - rdev->r600_blit.state_len = evergreen_default_size; 573 + if (rdev->family < CHIP_CAYMAN) 574 + rdev->r600_blit.state_len = evergreen_default_size; 575 + else 576 + rdev->r600_blit.state_len = cayman_default_size; 577 577 578 578 dwords = rdev->r600_blit.state_len; 579 579 while (dwords & 0xf) { ··· 588 582 obj_size = ALIGN(obj_size, 256); 589 583 590 584 rdev->r600_blit.vs_offset = obj_size; 591 - obj_size += evergreen_vs_size * 4; 585 + if (rdev->family < CHIP_CAYMAN) 586 + obj_size += evergreen_vs_size * 4; 587 + else 588 + obj_size += cayman_vs_size * 4; 592 589 obj_size = ALIGN(obj_size, 256); 593 590 594 591 rdev->r600_blit.ps_offset = obj_size; 595 - obj_size += evergreen_ps_size * 4; 592 + if (rdev->family < CHIP_CAYMAN) 593 + obj_size += evergreen_ps_size * 4; 594 + else 595 + obj_size += cayman_ps_size * 4; 596 596 obj_size = ALIGN(obj_size, 256); 597 597 598 598 r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, ··· 621 609 return r; 622 610 } 623 611 624 - memcpy_toio(ptr + rdev->r600_blit.state_offset, 625 - evergreen_default_state, rdev->r600_blit.state_len * 4); 612 + if (rdev->family < CHIP_CAYMAN) { 613 + memcpy_toio(ptr + rdev->r600_blit.state_offset, 614 + evergreen_default_state, rdev->r600_blit.state_len * 4); 626 615 627 - if (num_packet2s) 628 - memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), 629 - packet2s, num_packet2s * 4); 630 - for (i = 0; i < evergreen_vs_size; i++) 631 - *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]); 632 - for (i = 0; i < evergreen_ps_size; i++) 633 - *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]); 616 + if (num_packet2s) 617 + memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), 618 + packet2s, num_packet2s * 4); 619 + for (i = 0; i < evergreen_vs_size; i++) 620 + *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]); 621 + for (i = 0; i < evergreen_ps_size; i++) 622 + *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]); 623 + } else { 624 + memcpy_toio(ptr + rdev->r600_blit.state_offset, 625 + cayman_default_state, rdev->r600_blit.state_len * 4); 626 + 627 + if (num_packet2s) 628 + memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), 629 + packet2s, num_packet2s * 4); 630 + for (i = 0; i < cayman_vs_size; i++) 631 + *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]); 632 + for (i = 0; i < cayman_ps_size; i++) 633 + *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]); 634 + } 634 635 radeon_bo_kunmap(rdev->r600_blit.shader_obj); 635 636 radeon_bo_unreserve(rdev->r600_blit.shader_obj); 636 637
+5 -8
drivers/gpu/drm/radeon/ni.c
··· 1387 1387 return r; 1388 1388 cayman_gpu_init(rdev); 1389 1389 1390 - #if 0 1391 - r = cayman_blit_init(rdev); 1390 + r = evergreen_blit_init(rdev); 1392 1391 if (r) { 1393 - cayman_blit_fini(rdev); 1392 + evergreen_blit_fini(rdev); 1394 1393 rdev->asic->copy = NULL; 1395 1394 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); 1396 1395 } 1397 - #endif 1398 1396 1399 1397 /* allocate wb buffer */ 1400 1398 r = radeon_wb_init(rdev); ··· 1450 1452 1451 1453 int cayman_suspend(struct radeon_device *rdev) 1452 1454 { 1453 - /* int r; */ 1455 + int r; 1454 1456 1455 1457 /* FIXME: we should wait for ring to be empty */ 1456 1458 cayman_cp_enable(rdev, false); ··· 1459 1461 radeon_wb_disable(rdev); 1460 1462 cayman_pcie_gart_disable(rdev); 1461 1463 1462 - #if 0 1463 1464 /* unpin shaders bo */ 1464 1465 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 1465 1466 if (likely(r == 0)) { 1466 1467 radeon_bo_unpin(rdev->r600_blit.shader_obj); 1467 1468 radeon_bo_unreserve(rdev->r600_blit.shader_obj); 1468 1469 } 1469 - #endif 1470 + 1470 1471 return 0; 1471 1472 } 1472 1473 ··· 1577 1580 1578 1581 void cayman_fini(struct radeon_device *rdev) 1579 1582 { 1580 - /* cayman_blit_fini(rdev); */ 1583 + evergreen_blit_fini(rdev); 1581 1584 cayman_cp_fini(rdev); 1582 1585 r600_irq_fini(rdev); 1583 1586 radeon_wb_fini(rdev);
+3 -3
drivers/gpu/drm/radeon/radeon_asic.c
··· 906 906 .get_vblank_counter = &evergreen_get_vblank_counter, 907 907 .fence_ring_emit = &r600_fence_ring_emit, 908 908 .cs_parse = &evergreen_cs_parse, 909 - .copy_blit = NULL, 910 - .copy_dma = NULL, 911 - .copy = NULL, 909 + .copy_blit = &evergreen_copy_blit, 910 + .copy_dma = &evergreen_copy_blit, 911 + .copy = &evergreen_copy_blit, 912 912 .get_engine_clock = &radeon_atom_get_engine_clock, 913 913 .set_engine_clock = &radeon_atom_set_engine_clock, 914 914 .get_memory_clock = &radeon_atom_get_memory_clock,