Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: implement raster configuration for gfx v8

This patch is to implement the raster configuration and harvested
configuration of gfx v8.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Huang Rui and committed by
Alex Deucher
167ac573 0b2138a4

+204 -1
+167 -1
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
··· 3488 3488 return (~data) & mask; 3489 3489 } 3490 3490 3491 + static void 3492 + gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3493 + { 3494 + switch (adev->asic_type) { 3495 + case CHIP_FIJI: 3496 + *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3497 + RB_XSEL2(1) | PKR_MAP(2) | 3498 + PKR_XSEL(1) | PKR_YSEL(1) | 3499 + SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3500 + *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3501 + SE_PAIR_YSEL(2); 3502 + break; 3503 + case CHIP_TONGA: 3504 + case CHIP_POLARIS10: 3505 + *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3506 + SE_XSEL(1) | SE_YSEL(1); 3507 + *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3508 + SE_PAIR_YSEL(2); 3509 + break; 3510 + case CHIP_TOPAZ: 3511 + case CHIP_CARRIZO: 3512 + *rconf |= RB_MAP_PKR0(2); 3513 + *rconf1 |= 0x0; 3514 + break; 3515 + case CHIP_POLARIS11: 3516 + *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3517 + SE_XSEL(1) | SE_YSEL(1); 3518 + *rconf1 |= 0x0; 3519 + break; 3520 + case CHIP_STONEY: 3521 + *rconf |= 0x0; 3522 + *rconf1 |= 0x0; 3523 + break; 3524 + default: 3525 + DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3526 + break; 3527 + } 3528 + } 3529 + 3530 + static void 3531 + gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3532 + u32 raster_config, u32 raster_config_1, 3533 + unsigned rb_mask, unsigned num_rb) 3534 + { 3535 + unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3536 + unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3537 + unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3538 + unsigned rb_per_se = num_rb / num_se; 3539 + unsigned se_mask[4]; 3540 + unsigned se; 3541 + 3542 + se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3543 + se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3544 + se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3545 + se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3546 + 3547 + WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3548 + WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3549 + WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3550 + 3551 + if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3552 + (!se_mask[2] && !se_mask[3]))) { 3553 + raster_config_1 &= ~SE_PAIR_MAP_MASK; 3554 + 3555 + if (!se_mask[0] && !se_mask[1]) { 3556 + raster_config_1 |= 3557 + SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3558 + } else { 3559 + raster_config_1 |= 3560 + SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3561 + } 3562 + } 3563 + 3564 + for (se = 0; se < num_se; se++) { 3565 + unsigned raster_config_se = raster_config; 3566 + unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3567 + unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3568 + int idx = (se / 2) * 2; 3569 + 3570 + if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3571 + raster_config_se &= ~SE_MAP_MASK; 3572 + 3573 + if (!se_mask[idx]) { 3574 + raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3575 + } else { 3576 + raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3577 + } 3578 + } 3579 + 3580 + pkr0_mask &= rb_mask; 3581 + pkr1_mask &= rb_mask; 3582 + if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3583 + raster_config_se &= ~PKR_MAP_MASK; 3584 + 3585 + if (!pkr0_mask) { 3586 + raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3587 + } else { 3588 + raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3589 + } 3590 + } 3591 + 3592 + if (rb_per_se >= 2) { 3593 + unsigned rb0_mask = 1 << (se * rb_per_se); 3594 + unsigned rb1_mask = rb0_mask << 1; 3595 + 3596 + rb0_mask &= rb_mask; 3597 + rb1_mask &= rb_mask; 3598 + if (!rb0_mask || !rb1_mask) { 3599 + raster_config_se &= ~RB_MAP_PKR0_MASK; 3600 + 3601 + if (!rb0_mask) { 3602 + raster_config_se |= 3603 + RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3604 + } else { 3605 + raster_config_se |= 3606 + RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3607 + } 3608 + } 3609 + 3610 + if (rb_per_se > 2) { 3611 + rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3612 + rb1_mask = rb0_mask << 1; 3613 + rb0_mask &= rb_mask; 3614 + rb1_mask &= rb_mask; 3615 + if (!rb0_mask || !rb1_mask) { 3616 + raster_config_se &= ~RB_MAP_PKR1_MASK; 3617 + 3618 + if (!rb0_mask) { 3619 + raster_config_se |= 3620 + RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3621 + } else { 3622 + raster_config_se |= 3623 + RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3624 + } 3625 + } 3626 + } 3627 + } 3628 + 3629 + /* GRBM_GFX_INDEX has a different offset on VI */ 3630 + gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3631 + WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3632 + WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3633 + } 3634 + 3635 + /* GRBM_GFX_INDEX has a different offset on VI */ 3636 + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3637 + } 3638 + 3491 3639 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3492 3640 { 3493 3641 int i, j; 3494 3642 u32 data; 3643 + u32 raster_config = 0, raster_config_1 = 0; 3495 3644 u32 active_rbs = 0; 3496 3645 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3497 3646 adev->gfx.config.max_sh_per_se; 3647 + unsigned num_rb_pipes; 3498 3648 3499 3649 mutex_lock(&adev->grbm_idx_mutex); 3500 3650 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { ··· 3656 3506 } 3657 3507 } 3658 3508 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3659 - mutex_unlock(&adev->grbm_idx_mutex); 3660 3509 3661 3510 adev->gfx.config.backend_enable_mask = active_rbs; 3662 3511 adev->gfx.config.num_rbs = hweight32(active_rbs); 3512 + 3513 + num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3514 + adev->gfx.config.max_shader_engines, 16); 3515 + 3516 + gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3517 + 3518 + if (!adev->gfx.config.backend_enable_mask || 3519 + adev->gfx.config.num_rbs >= num_rb_pipes) { 3520 + WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3521 + WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3522 + } else { 3523 + gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3524 + adev->gfx.config.backend_enable_mask, 3525 + num_rb_pipes); 3526 + } 3527 + 3528 + mutex_unlock(&adev->grbm_idx_mutex); 3663 3529 } 3664 3530 3665 3531 /**
+37
drivers/gpu/drm/amd/amdgpu/vid.h
··· 373 373 #define VCE_CMD_WAIT_GE 0x00000106 374 374 #define VCE_CMD_UPDATE_PTB 0x00000107 375 375 #define VCE_CMD_FLUSH_TLB 0x00000108 376 + 377 + /* mmPA_SC_RASTER_CONFIG mask */ 378 + #define RB_MAP_PKR0(x) ((x) << 0) 379 + #define RB_MAP_PKR0_MASK (0x3 << 0) 380 + #define RB_MAP_PKR1(x) ((x) << 2) 381 + #define RB_MAP_PKR1_MASK (0x3 << 2) 382 + #define RB_XSEL2(x) ((x) << 4) 383 + #define RB_XSEL2_MASK (0x3 << 4) 384 + #define RB_XSEL (1 << 6) 385 + #define RB_YSEL (1 << 7) 386 + #define PKR_MAP(x) ((x) << 8) 387 + #define PKR_MAP_MASK (0x3 << 8) 388 + #define PKR_XSEL(x) ((x) << 10) 389 + #define PKR_XSEL_MASK (0x3 << 10) 390 + #define PKR_YSEL(x) ((x) << 12) 391 + #define PKR_YSEL_MASK (0x3 << 12) 392 + #define SC_MAP(x) ((x) << 16) 393 + #define SC_MAP_MASK (0x3 << 16) 394 + #define SC_XSEL(x) ((x) << 18) 395 + #define SC_XSEL_MASK (0x3 << 18) 396 + #define SC_YSEL(x) ((x) << 20) 397 + #define SC_YSEL_MASK (0x3 << 20) 398 + #define SE_MAP(x) ((x) << 24) 399 + #define SE_MAP_MASK (0x3 << 24) 400 + #define SE_XSEL(x) ((x) << 26) 401 + #define SE_XSEL_MASK (0x3 << 26) 402 + #define SE_YSEL(x) ((x) << 28) 403 + #define SE_YSEL_MASK (0x3 << 28) 404 + 405 + /* mmPA_SC_RASTER_CONFIG_1 mask */ 406 + #define SE_PAIR_MAP(x) ((x) << 0) 407 + #define SE_PAIR_MAP_MASK (0x3 << 0) 408 + #define SE_PAIR_XSEL(x) ((x) << 2) 409 + #define SE_PAIR_XSEL_MASK (0x3 << 2) 410 + #define SE_PAIR_YSEL(x) ((x) << 4) 411 + #define SE_PAIR_YSEL_MASK (0x3 << 4) 412 + 376 413 #endif