Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'drm-next-3.15' of git://people.freedesktop.org/~deathsimple/linux into drm-next

So this is the initial pull request for radeon drm-next 3.15. Highlights:
- VCE bringup including DPM support
- Few cleanups for the ring handling code

* 'drm-next-3.15' of git://people.freedesktop.org/~deathsimple/linux:
drm/radeon: cleanup false positive lockup handling
drm/radeon: drop radeon_ring_force_activity
drm/radeon: drop drivers copy of the rptr
drm/radeon/cik: enable/disable vce cg when encoding v2
drm/radeon: add support for vce 2.0 clock gating
drm/radeon/dpm: properly enable/disable vce when vce pg is enabled
drm/radeon/dpm: enable dynamic vce state switching v2
drm/radeon: add vce dpm support for KV/KB
drm/radeon: enable vce dpm on CI
drm/radeon: add vce dpm support for CI
drm/radeon: fill in set_vce_clocks for CIK asics
drm/radeon/dpm: fetch vce states from the vbios
drm/radeon/dpm: fill in some initial vce infrastructure
drm/radeon/dpm: move platform caps fetching to a separate function
drm/radeon: add callback for setting vce clocks
drm/radeon: add VCE version parsing and checking
drm/radeon: add VCE ring query
drm/radeon: initial VCE support v4
drm/radeon: fix CP semaphores on CIK

+1691 -175
+6
drivers/gpu/drm/radeon/Makefile
··· 99 99 uvd_v3_1.o \ 100 100 uvd_v4_2.o 101 101 102 + # add VCE block 103 + radeon-y += \ 104 + radeon_vce.o \ 105 + vce_v1_0.o \ 106 + vce_v2_0.o \ 107 + 102 108 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o 103 109 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o 104 110 radeon-$(CONFIG_ACPI) += radeon_acpi.o
+4
drivers/gpu/drm/radeon/btc_dpm.c
··· 2601 2601 pi->min_vddc_in_table = 0; 2602 2602 pi->max_vddc_in_table = 0; 2603 2603 2604 + ret = r600_get_platform_caps(rdev); 2605 + if (ret) 2606 + return ret; 2607 + 2604 2608 ret = rv7xx_parse_power_table(rdev); 2605 2609 if (ret) 2606 2610 return ret;
+54 -13
drivers/gpu/drm/radeon/ci_dpm.c
··· 172 172 extern void cik_enter_rlc_safe_mode(struct radeon_device *rdev); 173 173 extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev); 174 174 extern int ci_mc_load_microcode(struct radeon_device *rdev); 175 + extern void cik_update_cg(struct radeon_device *rdev, 176 + u32 block, bool enable); 175 177 176 178 static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev, 177 179 struct atom_voltage_table_entry *voltage_table, ··· 748 746 u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; 749 747 int i; 750 748 749 + if (rps->vce_active) { 750 + rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; 751 + rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; 752 + } else { 753 + rps->evclk = 0; 754 + rps->ecclk = 0; 755 + } 756 + 751 757 if ((rdev->pm.dpm.new_active_crtc_count > 1) || 752 758 ci_dpm_vblank_too_short(rdev)) 753 759 disable_mclk_switching = true; ··· 812 802 } else { 813 803 mclk = ps->performance_levels[0].mclk; 814 804 sclk = ps->performance_levels[0].sclk; 805 + } 806 + 807 + if (rps->vce_active) { 808 + if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) 809 + sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; 810 + if (mclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk) 811 + mclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk; 815 812 } 816 813 817 814 ps->performance_levels[0].sclk = sclk; ··· 3485 3468 0 : -EINVAL; 3486 3469 } 3487 3470 3488 - #if 0 3489 3471 static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) 3490 3472 { 3491 3473 struct ci_power_info *pi = ci_get_pi(rdev); ··· 3517 3501 0 : -EINVAL; 3518 3502 } 3519 3503 3504 + #if 0 3520 3505 static int ci_enable_samu_dpm(struct radeon_device *rdev, bool enable) 3521 3506 { 3522 3507 struct ci_power_info *pi = ci_get_pi(rdev); ··· 3604 3587 return ci_enable_uvd_dpm(rdev, !gate); 3605 3588 } 3606 3589 3607 - #if 0 3608 3590 static u8 ci_get_vce_boot_level(struct radeon_device *rdev) 3609 3591 { 3610 3592 u8 i; ··· 3624 3608 struct radeon_ps *radeon_current_state) 3625 3609 { 3626 3610 struct ci_power_info *pi = ci_get_pi(rdev); 3627 - bool new_vce_clock_non_zero = (radeon_new_state->evclk != 0); 3628 - bool old_vce_clock_non_zero = (radeon_current_state->evclk != 0); 3629 3611 int ret = 0; 3630 3612 u32 tmp; 3631 3613 3632 - if (new_vce_clock_non_zero != old_vce_clock_non_zero) { 3633 - if (new_vce_clock_non_zero) { 3634 - pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); 3614 + if (radeon_current_state->evclk != radeon_new_state->evclk) { 3615 + if (radeon_new_state->evclk) { 3616 + /* turn the clocks on when encoding */ 3617 + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false); 3635 3618 3619 + pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); 3636 3620 tmp = RREG32_SMC(DPM_TABLE_475); 3637 3621 tmp &= ~VceBootLevel_MASK; 3638 3622 tmp |= VceBootLevel(pi->smc_state_table.VceBootLevel); ··· 3640 3624 3641 3625 ret = ci_enable_vce_dpm(rdev, true); 3642 3626 } else { 3627 + /* turn the clocks off when not encoding */ 3628 + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true); 3629 + 3643 3630 ret = ci_enable_vce_dpm(rdev, false); 3644 3631 } 3645 3632 } 3646 3633 return ret; 3647 3634 } 3648 3635 3636 + #if 0 3649 3637 static int ci_update_samu_dpm(struct radeon_device *rdev, bool gate) 3650 3638 { 3651 3639 return ci_enable_samu_dpm(rdev, gate); ··· 4772 4752 DRM_ERROR("ci_generate_dpm_level_enable_mask failed\n"); 4773 4753 return ret; 4774 4754 } 4775 - #if 0 4755 + 4776 4756 ret = ci_update_vce_dpm(rdev, new_ps, old_ps); 4777 4757 if (ret) { 4778 4758 DRM_ERROR("ci_update_vce_dpm failed\n"); 4779 4759 return ret; 4780 4760 } 4781 - #endif 4761 + 4782 4762 ret = ci_update_sclk_t(rdev); 4783 4763 if (ret) { 4784 4764 DRM_ERROR("ci_update_sclk_t failed\n"); ··· 4979 4959 if (!rdev->pm.dpm.ps) 4980 4960 return -ENOMEM; 4981 4961 power_state_offset = (u8 *)state_array->states; 4982 - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); 4983 - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); 4984 - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); 4985 4962 for (i = 0; i < state_array->ucNumEntries; i++) { 4986 4963 u8 *idx; 4987 4964 power_state = (union pplib_power_state *)power_state_offset; ··· 5015 4998 power_state_offset += 2 + power_state->v2.ucNumDPMLevels; 5016 4999 } 5017 5000 rdev->pm.dpm.num_ps = state_array->ucNumEntries; 5001 + 5002 + /* fill in the vce power states */ 5003 + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { 5004 + u32 sclk, mclk; 5005 + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; 5006 + clock_info = (union pplib_clock_info *) 5007 + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; 5008 + sclk = le16_to_cpu(clock_info->ci.usEngineClockLow); 5009 + sclk |= clock_info->ci.ucEngineClockHigh << 16; 5010 + mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow); 5011 + mclk |= clock_info->ci.ucMemoryClockHigh << 16; 5012 + rdev->pm.dpm.vce_states[i].sclk = sclk; 5013 + rdev->pm.dpm.vce_states[i].mclk = mclk; 5014 + } 5015 + 5018 5016 return 0; 5019 5017 } 5020 5018 ··· 5109 5077 ci_dpm_fini(rdev); 5110 5078 return ret; 5111 5079 } 5112 - ret = ci_parse_power_table(rdev); 5080 + 5081 + ret = r600_get_platform_caps(rdev); 5113 5082 if (ret) { 5114 5083 ci_dpm_fini(rdev); 5115 5084 return ret; 5116 5085 } 5086 + 5117 5087 ret = r600_parse_extended_power_table(rdev); 5088 + if (ret) { 5089 + ci_dpm_fini(rdev); 5090 + return ret; 5091 + } 5092 + 5093 + ret = ci_parse_power_table(rdev); 5118 5094 if (ret) { 5119 5095 ci_dpm_fini(rdev); 5120 5096 return ret; ··· 5160 5120 pi->caps_sclk_throttle_low_notification = false; 5161 5121 5162 5122 pi->caps_uvd_dpm = true; 5123 + pi->caps_vce_dpm = true; 5163 5124 5164 5125 ci_get_leakage_voltages(rdev); 5165 5126 ci_patch_dependency_tables_with_leakage(rdev);
+102 -7
drivers/gpu/drm/radeon/cik.c
··· 75 75 extern int cik_sdma_resume(struct radeon_device *rdev); 76 76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable); 77 77 extern void cik_sdma_fini(struct radeon_device *rdev); 78 + extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable); 78 79 static void cik_rlc_stop(struct radeon_device *rdev); 79 80 static void cik_pcie_gen3_enable(struct radeon_device *rdev); 80 81 static void cik_program_aspm(struct radeon_device *rdev); ··· 4031 4030 WREG32(CP_RB0_BASE, rb_addr); 4032 4031 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4033 4032 4034 - ring->rptr = RREG32(CP_RB0_RPTR); 4035 - 4036 4033 /* start the ring */ 4037 4034 cik_cp_gfx_start(rdev); 4038 4035 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; ··· 4585 4586 rdev->ring[idx].wptr = 0; 4586 4587 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; 4587 4588 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 4588 - rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR); 4589 - mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr; 4589 + mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR); 4590 4590 4591 4591 /* set the vmid for the queue */ 4592 4592 mqd->queue_state.cp_hqd_vmid = 0; ··· 5115 5117 if (!(reset_mask & (RADEON_RESET_GFX | 5116 5118 RADEON_RESET_COMPUTE | 5117 5119 RADEON_RESET_CP))) { 5118 - radeon_ring_lockup_update(ring); 5120 + radeon_ring_lockup_update(rdev, ring); 5119 5121 return false; 5120 5122 } 5121 - /* force CP activities */ 5122 - radeon_ring_force_activity(rdev, ring); 5123 5123 return radeon_ring_test_lockup(rdev, ring); 5124 5124 } 5125 5125 ··· 6136 6140 if (block & RADEON_CG_BLOCK_HDP) { 6137 6141 cik_enable_hdp_mgcg(rdev, enable); 6138 6142 cik_enable_hdp_ls(rdev, enable); 6143 + } 6144 + 6145 + if (block & RADEON_CG_BLOCK_VCE) { 6146 + vce_v2_0_enable_mgcg(rdev, enable); 6139 6147 } 6140 6148 } 6141 6149 ··· 7490 7490 /* reset addr and status */ 7491 7491 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); 7492 7492 break; 7493 + case 167: /* VCE */ 7494 + DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data); 7495 + switch (src_data) { 7496 + case 0: 7497 + radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX); 7498 + break; 7499 + case 1: 7500 + radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX); 7501 + break; 7502 + default: 7503 + DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); 7504 + break; 7505 + } 7506 + break; 7493 7507 case 176: /* GFX RB CP_INT */ 7494 7508 case 177: /* GFX IB CP_INT */ 7495 7509 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); ··· 7803 7789 if (r) 7804 7790 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; 7805 7791 7792 + r = radeon_vce_resume(rdev); 7793 + if (!r) { 7794 + r = vce_v2_0_resume(rdev); 7795 + if (!r) 7796 + r = radeon_fence_driver_start_ring(rdev, 7797 + TN_RING_TYPE_VCE1_INDEX); 7798 + if (!r) 7799 + r = radeon_fence_driver_start_ring(rdev, 7800 + TN_RING_TYPE_VCE2_INDEX); 7801 + } 7802 + if (r) { 7803 + dev_err(rdev->dev, "VCE init error (%d).\n", r); 7804 + rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; 7805 + rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; 7806 + } 7807 + 7806 7808 /* Enable IRQ */ 7807 7809 if (!rdev->irq.installed) { 7808 7810 r = radeon_irq_kms_init(rdev); ··· 7894 7864 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); 7895 7865 } 7896 7866 7867 + r = -ENOENT; 7868 + 7869 + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; 7870 + if (ring->ring_size) 7871 + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 7872 + VCE_CMD_NO_OP); 7873 + 7874 + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; 7875 + if (ring->ring_size) 7876 + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 7877 + VCE_CMD_NO_OP); 7878 + 7879 + if (!r) 7880 + r = vce_v1_0_init(rdev); 7881 + else if (r != -ENOENT) 7882 + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); 7883 + 7897 7884 r = radeon_ib_pool_init(rdev); 7898 7885 if (r) { 7899 7886 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); ··· 7981 7934 cik_sdma_enable(rdev, false); 7982 7935 uvd_v1_0_fini(rdev); 7983 7936 radeon_uvd_suspend(rdev); 7937 + radeon_vce_suspend(rdev); 7984 7938 cik_fini_pg(rdev); 7985 7939 cik_fini_cg(rdev); 7986 7940 cik_irq_suspend(rdev); ··· 8114 8066 r600_ring_init(rdev, ring, 4096); 8115 8067 } 8116 8068 8069 + r = radeon_vce_init(rdev); 8070 + if (!r) { 8071 + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; 8072 + ring->ring_obj = NULL; 8073 + r600_ring_init(rdev, ring, 4096); 8074 + 8075 + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; 8076 + ring->ring_obj = NULL; 8077 + r600_ring_init(rdev, ring, 4096); 8078 + } 8079 + 8117 8080 rdev->ih.ring_obj = NULL; 8118 8081 r600_ih_ring_init(rdev, 64 * 1024); 8119 8082 ··· 8186 8127 radeon_irq_kms_fini(rdev); 8187 8128 uvd_v1_0_fini(rdev); 8188 8129 radeon_uvd_fini(rdev); 8130 + radeon_vce_fini(rdev); 8189 8131 cik_pcie_gart_fini(rdev); 8190 8132 r600_vram_scratch_fini(rdev); 8191 8133 radeon_gem_fini(rdev); ··· 8923 8863 8924 8864 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS); 8925 8865 return r; 8866 + } 8867 + 8868 + int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) 8869 + { 8870 + int r, i; 8871 + struct atom_clock_dividers dividers; 8872 + u32 tmp; 8873 + 8874 + r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 8875 + ecclk, false, &dividers); 8876 + if (r) 8877 + return r; 8878 + 8879 + for (i = 0; i < 100; i++) { 8880 + if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) 8881 + break; 8882 + mdelay(10); 8883 + } 8884 + if (i == 100) 8885 + return -ETIMEDOUT; 8886 + 8887 + tmp = RREG32_SMC(CG_ECLK_CNTL); 8888 + tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK); 8889 + tmp |= dividers.post_divider; 8890 + WREG32_SMC(CG_ECLK_CNTL, tmp); 8891 + 8892 + for (i = 0; i < 100; i++) { 8893 + if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) 8894 + break; 8895 + mdelay(10); 8896 + } 8897 + if (i == 100) 8898 + return -ETIMEDOUT; 8899 + 8900 + return 0; 8926 8901 } 8927 8902 8928 8903 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
+1 -5
drivers/gpu/drm/radeon/cik_sdma.c
··· 362 362 ring->wptr = 0; 363 363 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); 364 364 365 - ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; 366 - 367 365 /* enable DMA RB */ 368 366 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); 369 367 ··· 711 713 mask = RADEON_RESET_DMA1; 712 714 713 715 if (!(reset_mask & mask)) { 714 - radeon_ring_lockup_update(ring); 716 + radeon_ring_lockup_update(rdev, ring); 715 717 return false; 716 718 } 717 - /* force ring activities */ 718 - radeon_ring_force_activity(rdev, ring); 719 719 return radeon_ring_test_lockup(rdev, ring); 720 720 } 721 721
+49
drivers/gpu/drm/radeon/cikd.h
··· 203 203 #define CTF_TEMP_MASK 0x0003fe00 204 204 #define CTF_TEMP_SHIFT 9 205 205 206 + #define CG_ECLK_CNTL 0xC05000AC 207 + # define ECLK_DIVIDER_MASK 0x7f 208 + # define ECLK_DIR_CNTL_EN (1 << 8) 209 + #define CG_ECLK_STATUS 0xC05000B0 210 + # define ECLK_STATUS (1 << 0) 211 + 206 212 #define CG_SPLL_FUNC_CNTL 0xC0500140 207 213 #define SPLL_RESET (1 << 0) 208 214 #define SPLL_PWRON (1 << 1) ··· 2015 2009 2016 2010 /* UVD CTX indirect */ 2017 2011 #define UVD_CGC_MEM_CTRL 0xC0 2012 + 2013 + /* VCE */ 2014 + 2015 + #define VCE_VCPU_CACHE_OFFSET0 0x20024 2016 + #define VCE_VCPU_CACHE_SIZE0 0x20028 2017 + #define VCE_VCPU_CACHE_OFFSET1 0x2002c 2018 + #define VCE_VCPU_CACHE_SIZE1 0x20030 2019 + #define VCE_VCPU_CACHE_OFFSET2 0x20034 2020 + #define VCE_VCPU_CACHE_SIZE2 0x20038 2021 + #define VCE_RB_RPTR2 0x20178 2022 + #define VCE_RB_WPTR2 0x2017c 2023 + #define VCE_RB_RPTR 0x2018c 2024 + #define VCE_RB_WPTR 0x20190 2025 + #define VCE_CLOCK_GATING_A 0x202f8 2026 + # define CGC_CLK_GATE_DLY_TIMER_MASK (0xf << 0) 2027 + # define CGC_CLK_GATE_DLY_TIMER(x) ((x) << 0) 2028 + # define CGC_CLK_GATER_OFF_DLY_TIMER_MASK (0xff << 4) 2029 + # define CGC_CLK_GATER_OFF_DLY_TIMER(x) ((x) << 4) 2030 + # define CGC_UENC_WAIT_AWAKE (1 << 18) 2031 + #define VCE_CLOCK_GATING_B 0x202fc 2032 + #define VCE_CGTT_CLK_OVERRIDE 0x207a0 2033 + #define VCE_UENC_CLOCK_GATING 0x207bc 2034 + # define CLOCK_ON_DELAY_MASK (0xf << 0) 2035 + # define CLOCK_ON_DELAY(x) ((x) << 0) 2036 + # define CLOCK_OFF_DELAY_MASK (0xff << 4) 2037 + # define CLOCK_OFF_DELAY(x) ((x) << 4) 2038 + #define VCE_UENC_REG_CLOCK_GATING 0x207c0 2039 + #define VCE_SYS_INT_EN 0x21300 2040 + # define VCE_SYS_INT_TRAP_INTERRUPT_EN (1 << 3) 2041 + #define VCE_LMI_CTRL2 0x21474 2042 + #define VCE_LMI_CTRL 0x21498 2043 + #define VCE_LMI_VM_CTRL 0x214a0 2044 + #define VCE_LMI_SWAP_CNTL 0x214b4 2045 + #define VCE_LMI_SWAP_CNTL1 0x214b8 2046 + #define VCE_LMI_CACHE_CTRL 0x214f4 2047 + 2048 + #define VCE_CMD_NO_OP 0x00000000 2049 + #define VCE_CMD_END 0x00000001 2050 + #define VCE_CMD_IB 0x00000002 2051 + #define VCE_CMD_FENCE 0x00000003 2052 + #define VCE_CMD_TRAP 0x00000004 2053 + #define VCE_CMD_IB_AUTO 0x00000005 2054 + #define VCE_CMD_SEMAPHORE 0x00000006 2018 2055 2019 2056 #endif
+4
drivers/gpu/drm/radeon/cypress_dpm.c
··· 2036 2036 pi->min_vddc_in_table = 0; 2037 2037 pi->max_vddc_in_table = 0; 2038 2038 2039 + ret = r600_get_platform_caps(rdev); 2040 + if (ret) 2041 + return ret; 2042 + 2039 2043 ret = rv7xx_parse_power_table(rdev); 2040 2044 if (ret) 2041 2045 return ret;
+1 -5
drivers/gpu/drm/radeon/evergreen.c
··· 2990 2990 WREG32(CP_RB_BASE, ring->gpu_addr >> 8); 2991 2991 WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); 2992 2992 2993 - ring->rptr = RREG32(CP_RB_RPTR); 2994 - 2995 2993 evergreen_cp_start(rdev); 2996 2994 ring->ready = true; 2997 2995 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring); ··· 3950 3952 if (!(reset_mask & (RADEON_RESET_GFX | 3951 3953 RADEON_RESET_COMPUTE | 3952 3954 RADEON_RESET_CP))) { 3953 - radeon_ring_lockup_update(ring); 3955 + radeon_ring_lockup_update(rdev, ring); 3954 3956 return false; 3955 3957 } 3956 - /* force CP activities */ 3957 - radeon_ring_force_activity(rdev, ring); 3958 3958 return radeon_ring_test_lockup(rdev, ring); 3959 3959 } 3960 3960
+1 -3
drivers/gpu/drm/radeon/evergreen_dma.c
··· 174 174 u32 reset_mask = evergreen_gpu_check_soft_reset(rdev); 175 175 176 176 if (!(reset_mask & RADEON_RESET_DMA)) { 177 - radeon_ring_lockup_update(ring); 177 + radeon_ring_lockup_update(rdev, ring); 178 178 return false; 179 179 } 180 - /* force ring activities */ 181 - radeon_ring_force_activity(rdev, ring); 182 180 return radeon_ring_test_lockup(rdev, ring); 183 181 } 184 182
+48 -16
drivers/gpu/drm/radeon/kv_dpm.c
··· 1338 1338 PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable); 1339 1339 } 1340 1340 1341 - #if 0 1342 1341 static int kv_enable_vce_dpm(struct radeon_device *rdev, bool enable) 1343 1342 { 1344 1343 return kv_notify_message_to_smu(rdev, enable ? 1345 1344 PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable); 1346 1345 } 1347 - #endif 1348 1346 1349 1347 static int kv_enable_samu_dpm(struct radeon_device *rdev, bool enable) 1350 1348 { ··· 1387 1389 return kv_enable_uvd_dpm(rdev, !gate); 1388 1390 } 1389 1391 1390 - #if 0 1391 1392 static u8 kv_get_vce_boot_level(struct radeon_device *rdev) 1392 1393 { 1393 1394 u8 i; ··· 1411 1414 int ret; 1412 1415 1413 1416 if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { 1417 + kv_dpm_powergate_vce(rdev, false); 1418 + /* turn the clocks on when encoding */ 1419 + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false); 1414 1420 if (pi->caps_stable_p_state) 1415 1421 pi->vce_boot_level = table->count - 1; 1416 1422 else ··· 1436 1436 kv_enable_vce_dpm(rdev, true); 1437 1437 } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { 1438 1438 kv_enable_vce_dpm(rdev, false); 1439 + /* turn the clocks off when not encoding */ 1440 + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true); 1441 + kv_dpm_powergate_vce(rdev, true); 1439 1442 } 1440 1443 1441 1444 return 0; 1442 1445 } 1443 - #endif 1444 1446 1445 1447 static int kv_update_samu_dpm(struct radeon_device *rdev, bool gate) 1446 1448 { ··· 1577 1575 pi->vce_power_gated = gate; 1578 1576 1579 1577 if (gate) { 1580 - if (pi->caps_vce_pg) 1578 + if (pi->caps_vce_pg) { 1579 + /* XXX do we need a vce_v1_0_stop() ? */ 1581 1580 kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerOFF); 1581 + } 1582 1582 } else { 1583 - if (pi->caps_vce_pg) 1583 + if (pi->caps_vce_pg) { 1584 1584 kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerON); 1585 + vce_v2_0_resume(rdev); 1586 + vce_v1_0_start(rdev); 1587 + } 1585 1588 } 1586 1589 } 1587 1590 ··· 1775 1768 { 1776 1769 struct kv_power_info *pi = kv_get_pi(rdev); 1777 1770 struct radeon_ps *new_ps = &pi->requested_rps; 1778 - /*struct radeon_ps *old_ps = &pi->current_rps;*/ 1771 + struct radeon_ps *old_ps = &pi->current_rps; 1779 1772 int ret; 1780 1773 1781 1774 if (pi->bapm_enable) { ··· 1805 1798 kv_set_enabled_levels(rdev); 1806 1799 kv_force_lowest_valid(rdev); 1807 1800 kv_unforce_levels(rdev); 1808 - #if 0 1801 + 1809 1802 ret = kv_update_vce_dpm(rdev, new_ps, old_ps); 1810 1803 if (ret) { 1811 1804 DRM_ERROR("kv_update_vce_dpm failed\n"); 1812 1805 return ret; 1813 1806 } 1814 - #endif 1815 1807 kv_update_sclk_t(rdev); 1816 1808 } 1817 1809 } else { ··· 1829 1823 kv_program_nbps_index_settings(rdev, new_ps); 1830 1824 kv_freeze_sclk_dpm(rdev, false); 1831 1825 kv_set_enabled_levels(rdev); 1832 - #if 0 1833 1826 ret = kv_update_vce_dpm(rdev, new_ps, old_ps); 1834 1827 if (ret) { 1835 1828 DRM_ERROR("kv_update_vce_dpm failed\n"); 1836 1829 return ret; 1837 1830 } 1838 - #endif 1839 1831 kv_update_acp_boot_level(rdev); 1840 1832 kv_update_sclk_t(rdev); 1841 1833 kv_enable_nb_dpm(rdev); ··· 2041 2037 struct radeon_clock_and_voltage_limits *max_limits = 2042 2038 &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; 2043 2039 2040 + if (new_rps->vce_active) { 2041 + new_rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; 2042 + new_rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; 2043 + } else { 2044 + new_rps->evclk = 0; 2045 + new_rps->ecclk = 0; 2046 + } 2047 + 2044 2048 mclk = max_limits->mclk; 2045 2049 sclk = min_sclk; 2046 2050 ··· 2066 2054 stable_p_state_sclk = table->entries[0].clk; 2067 2055 2068 2056 sclk = stable_p_state_sclk; 2057 + } 2058 + 2059 + if (new_rps->vce_active) { 2060 + if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) 2061 + sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; 2069 2062 } 2070 2063 2071 2064 ps->need_dfs_bypass = true; ··· 2109 2092 } 2110 2093 } 2111 2094 2112 - pi->video_start = new_rps->dclk || new_rps->vclk; 2095 + pi->video_start = new_rps->dclk || new_rps->vclk || 2096 + new_rps->evclk || new_rps->ecclk; 2113 2097 2114 2098 if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == 2115 2099 ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) ··· 2556 2538 if (!rdev->pm.dpm.ps) 2557 2539 return -ENOMEM; 2558 2540 power_state_offset = (u8 *)state_array->states; 2559 - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); 2560 - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); 2561 - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); 2562 2541 for (i = 0; i < state_array->ucNumEntries; i++) { 2563 2542 u8 *idx; 2564 2543 power_state = (union pplib_power_state *)power_state_offset; ··· 2592 2577 power_state_offset += 2 + power_state->v2.ucNumDPMLevels; 2593 2578 } 2594 2579 rdev->pm.dpm.num_ps = state_array->ucNumEntries; 2580 + 2581 + /* fill in the vce power states */ 2582 + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { 2583 + u32 sclk; 2584 + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; 2585 + clock_info = (union pplib_clock_info *) 2586 + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; 2587 + sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow); 2588 + sclk |= clock_info->sumo.ucEngineClockHigh << 16; 2589 + rdev->pm.dpm.vce_states[i].sclk = sclk; 2590 + rdev->pm.dpm.vce_states[i].mclk = 0; 2591 + } 2592 + 2595 2593 return 0; 2596 2594 } 2597 2595 ··· 2617 2589 if (pi == NULL) 2618 2590 return -ENOMEM; 2619 2591 rdev->pm.dpm.priv = pi; 2592 + 2593 + ret = r600_get_platform_caps(rdev); 2594 + if (ret) 2595 + return ret; 2620 2596 2621 2597 ret = r600_parse_extended_power_table(rdev); 2622 2598 if (ret) ··· 2655 2623 pi->caps_fps = false; /* true? */ 2656 2624 pi->caps_uvd_pg = true; 2657 2625 pi->caps_uvd_dpm = true; 2658 - pi->caps_vce_pg = false; 2626 + pi->caps_vce_pg = false; /* XXX true */ 2659 2627 pi->caps_samu_pg = false; 2660 2628 pi->caps_acp_pg = false; 2661 2629 pi->caps_stable_p_state = false;
+3 -5
drivers/gpu/drm/radeon/ni.c
··· 1642 1642 ring = &rdev->ring[ridx[i]]; 1643 1643 WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA); 1644 1644 1645 - ring->rptr = ring->wptr = 0; 1646 - WREG32(cp_rb_rptr[i], ring->rptr); 1645 + ring->wptr = 0; 1646 + WREG32(cp_rb_rptr[i], 0); 1647 1647 WREG32(cp_rb_wptr[i], ring->wptr); 1648 1648 1649 1649 mdelay(1); ··· 1917 1917 if (!(reset_mask & (RADEON_RESET_GFX | 1918 1918 RADEON_RESET_COMPUTE | 1919 1919 RADEON_RESET_CP))) { 1920 - radeon_ring_lockup_update(ring); 1920 + radeon_ring_lockup_update(rdev, ring); 1921 1921 return false; 1922 1922 } 1923 - /* force CP activities */ 1924 - radeon_ring_force_activity(rdev, ring); 1925 1923 return radeon_ring_test_lockup(rdev, ring); 1926 1924 } 1927 1925
+1 -5
drivers/gpu/drm/radeon/ni_dma.c
··· 248 248 ring->wptr = 0; 249 249 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); 250 250 251 - ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2; 252 - 253 251 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); 254 252 255 253 ring->ready = true; ··· 300 302 mask = RADEON_RESET_DMA1; 301 303 302 304 if (!(reset_mask & mask)) { 303 - radeon_ring_lockup_update(ring); 305 + radeon_ring_lockup_update(rdev, ring); 304 306 return false; 305 307 } 306 - /* force ring activities */ 307 - radeon_ring_force_activity(rdev, ring); 308 308 return radeon_ring_test_lockup(rdev, ring); 309 309 } 310 310
+4 -3
drivers/gpu/drm/radeon/ni_dpm.c
··· 4025 4025 power_info->pplib.ucNumStates, GFP_KERNEL); 4026 4026 if (!rdev->pm.dpm.ps) 4027 4027 return -ENOMEM; 4028 - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); 4029 - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); 4030 - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); 4031 4028 4032 4029 for (i = 0; i < power_info->pplib.ucNumStates; i++) { 4033 4030 power_state = (union pplib_power_state *) ··· 4085 4088 eg_pi->acpi_vddci = 0; 4086 4089 pi->min_vddc_in_table = 0; 4087 4090 pi->max_vddc_in_table = 0; 4091 + 4092 + ret = r600_get_platform_caps(rdev); 4093 + if (ret) 4094 + return ret; 4088 4095 4089 4096 ret = ni_parse_power_table(rdev); 4090 4097 if (ret)
+1 -4
drivers/gpu/drm/radeon/r100.c
··· 1193 1193 1194 1194 WREG32(RADEON_CP_RB_CNTL, tmp); 1195 1195 udelay(10); 1196 - ring->rptr = RREG32(RADEON_CP_RB_RPTR); 1197 1196 /* Set cp mode to bus mastering & enable cp*/ 1198 1197 WREG32(RADEON_CP_CSQ_MODE, 1199 1198 REG_SET(RADEON_INDIRECT2_START, indirect2_start) | ··· 2522 2523 2523 2524 rbbm_status = RREG32(R_000E40_RBBM_STATUS); 2524 2525 if (!G_000E40_GUI_ACTIVE(rbbm_status)) { 2525 - radeon_ring_lockup_update(ring); 2526 + radeon_ring_lockup_update(rdev, ring); 2526 2527 return false; 2527 2528 } 2528 - /* force CP activities */ 2529 - radeon_ring_force_activity(rdev, ring); 2530 2529 return radeon_ring_test_lockup(rdev, ring); 2531 2530 } 2532 2531
+1 -5
drivers/gpu/drm/radeon/r600.c
··· 1748 1748 if (!(reset_mask & (RADEON_RESET_GFX | 1749 1749 RADEON_RESET_COMPUTE | 1750 1750 RADEON_RESET_CP))) { 1751 - radeon_ring_lockup_update(ring); 1751 + radeon_ring_lockup_update(rdev, ring); 1752 1752 return false; 1753 1753 } 1754 - /* force CP activities */ 1755 - radeon_ring_force_activity(rdev, ring); 1756 1754 return radeon_ring_test_lockup(rdev, ring); 1757 1755 } 1758 1756 ··· 2601 2603 2602 2604 WREG32(CP_RB_BASE, ring->gpu_addr >> 8); 2603 2605 WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); 2604 - 2605 - ring->rptr = RREG32(CP_RB_RPTR); 2606 2606 2607 2607 r600_cp_start(rdev); 2608 2608 ring->ready = true;
+1 -5
drivers/gpu/drm/radeon/r600_dma.c
··· 176 176 ring->wptr = 0; 177 177 WREG32(DMA_RB_WPTR, ring->wptr << 2); 178 178 179 - ring->rptr = RREG32(DMA_RB_RPTR) >> 2; 180 - 181 179 WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); 182 180 183 181 ring->ready = true; ··· 219 221 u32 reset_mask = r600_gpu_check_soft_reset(rdev); 220 222 221 223 if (!(reset_mask & RADEON_RESET_DMA)) { 222 - radeon_ring_lockup_update(ring); 224 + radeon_ring_lockup_update(rdev, ring); 223 225 return false; 224 226 } 225 - /* force ring activities */ 226 - radeon_ring_force_activity(rdev, ring); 227 227 return radeon_ring_test_lockup(rdev, ring); 228 228 } 229 229
+47 -1
drivers/gpu/drm/radeon/r600_dpm.c
··· 834 834 return 0; 835 835 } 836 836 837 + int r600_get_platform_caps(struct radeon_device *rdev) 838 + { 839 + struct radeon_mode_info *mode_info = &rdev->mode_info; 840 + union power_info *power_info; 841 + int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); 842 + u16 data_offset; 843 + u8 frev, crev; 844 + 845 + if (!atom_parse_data_header(mode_info->atom_context, index, NULL, 846 + &frev, &crev, &data_offset)) 847 + return -EINVAL; 848 + power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); 849 + 850 + rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); 851 + rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); 852 + rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); 853 + 854 + return 0; 855 + } 856 + 837 857 /* sizeof(ATOM_PPLIB_EXTENDEDHEADER) */ 838 858 #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V2 12 839 859 #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3 14 ··· 1063 1043 (mode_info->atom_context->bios + data_offset + 1064 1044 le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + 1065 1045 1 + array->ucNumEntries * sizeof(VCEClockInfo)); 1046 + ATOM_PPLIB_VCE_State_Table *states = 1047 + (ATOM_PPLIB_VCE_State_Table *) 1048 + (mode_info->atom_context->bios + data_offset + 1049 + le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + 1050 + 1 + (array->ucNumEntries * sizeof (VCEClockInfo)) + 1051 + 1 + (limits->numEntries * sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record))); 1066 1052 ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *entry; 1053 + ATOM_PPLIB_VCE_State_Record *state_entry; 1054 + VCEClockInfo *vce_clk; 1067 1055 u32 size = limits->numEntries * 1068 1056 sizeof(struct radeon_vce_clock_voltage_dependency_entry); 1069 1057 rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries = ··· 1083 1055 rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count = 1084 1056 limits->numEntries; 1085 1057 entry = &limits->entries[0]; 1058 + state_entry = &states->entries[0]; 1086 1059 for (i = 0; i < limits->numEntries; i++) { 1087 - VCEClockInfo *vce_clk = (VCEClockInfo *) 1060 + vce_clk = (VCEClockInfo *) 1088 1061 ((u8 *)&array->entries[0] + 1089 1062 (entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); 1090 1063 rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].evclk = ··· 1096 1067 le16_to_cpu(entry->usVoltage); 1097 1068 entry = (ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *) 1098 1069 ((u8 *)entry + sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record)); 1070 + } 1071 + for (i = 0; i < states->numEntries; i++) { 1072 + if (i >= RADEON_MAX_VCE_LEVELS) 1073 + break; 1074 + vce_clk = (VCEClockInfo *) 1075 + ((u8 *)&array->entries[0] + 1076 + (state_entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); 1077 + rdev->pm.dpm.vce_states[i].evclk = 1078 + le16_to_cpu(vce_clk->usEVClkLow) | (vce_clk->ucEVClkHigh << 16); 1079 + rdev->pm.dpm.vce_states[i].ecclk = 1080 + le16_to_cpu(vce_clk->usECClkLow) | (vce_clk->ucECClkHigh << 16); 1081 + rdev->pm.dpm.vce_states[i].clk_idx = 1082 + state_entry->ucClockInfoIndex & 0x3f; 1083 + rdev->pm.dpm.vce_states[i].pstate = 1084 + (state_entry->ucClockInfoIndex & 0xc0) >> 6; 1085 + state_entry = (ATOM_PPLIB_VCE_State_Record *) 1086 + ((u8 *)state_entry + sizeof(ATOM_PPLIB_VCE_State_Record)); 1099 1087 } 1100 1088 } 1101 1089 if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3) &&
+2
drivers/gpu/drm/radeon/r600_dpm.h
··· 215 215 216 216 bool r600_is_internal_thermal_sensor(enum radeon_int_thermal_type sensor); 217 217 218 + int r600_get_platform_caps(struct radeon_device *rdev); 219 + 218 220 int r600_parse_extended_power_table(struct radeon_device *rdev); 219 221 void r600_free_extended_power_table(struct radeon_device *rdev); 220 222
+89 -10
drivers/gpu/drm/radeon/radeon.h
··· 113 113 #define RADEONFB_CONN_LIMIT 4 114 114 #define RADEON_BIOS_NUM_SCRATCH 8 115 115 116 - /* max number of rings */ 117 - #define RADEON_NUM_RINGS 6 118 - 119 116 /* fence seq are set to this number when signaled */ 120 117 #define RADEON_FENCE_SIGNALED_SEQ 0LL 121 118 122 119 /* internal ring indices */ 123 120 /* r1xx+ has gfx CP ring */ 124 - #define RADEON_RING_TYPE_GFX_INDEX 0 121 + #define RADEON_RING_TYPE_GFX_INDEX 0 125 122 126 123 /* cayman has 2 compute CP rings */ 127 - #define CAYMAN_RING_TYPE_CP1_INDEX 1 128 - #define CAYMAN_RING_TYPE_CP2_INDEX 2 124 + #define CAYMAN_RING_TYPE_CP1_INDEX 1 125 + #define CAYMAN_RING_TYPE_CP2_INDEX 2 129 126 130 127 /* R600+ has an async dma ring */ 131 128 #define R600_RING_TYPE_DMA_INDEX 3 ··· 130 133 #define CAYMAN_RING_TYPE_DMA1_INDEX 4 131 134 132 135 /* R600+ */ 133 - #define R600_RING_TYPE_UVD_INDEX 5 136 + #define R600_RING_TYPE_UVD_INDEX 5 137 + 138 + /* TN+ */ 139 + #define TN_RING_TYPE_VCE1_INDEX 6 140 + #define TN_RING_TYPE_VCE2_INDEX 7 141 + 142 + /* max number of rings */ 143 + #define RADEON_NUM_RINGS 8 144 + 145 + /* number of hw syncs before falling back on blocking */ 146 + #define RADEON_NUM_SYNCS 4 134 147 135 148 /* number of hw syncs before falling back on blocking */ 136 149 #define RADEON_NUM_SYNCS 4 ··· 796 789 struct radeon_ring { 797 790 struct radeon_bo *ring_obj; 798 791 volatile uint32_t *ring; 799 - unsigned rptr; 800 792 unsigned rptr_offs; 801 793 unsigned rptr_save_reg; 802 794 u64 next_rptr_gpu_addr; ··· 959 953 void radeon_ring_undo(struct radeon_ring *ring); 960 954 void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp); 961 955 int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); 962 - void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring); 963 - void radeon_ring_lockup_update(struct radeon_ring *ring); 956 + void radeon_ring_lockup_update(struct radeon_device *rdev, 957 + struct radeon_ring *ring); 964 958 bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring); 965 959 unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring, 966 960 uint32_t **data); ··· 1261 1255 RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 1262 1256 }; 1263 1257 1258 + #define RADEON_MAX_VCE_LEVELS 6 1259 + 1260 + enum radeon_vce_level { 1261 + RADEON_VCE_LEVEL_AC_ALL = 0, /* AC, All cases */ 1262 + RADEON_VCE_LEVEL_DC_EE = 1, /* DC, entropy encoding */ 1263 + RADEON_VCE_LEVEL_DC_LL_LOW = 2, /* DC, low latency queue, res <= 720 */ 1264 + RADEON_VCE_LEVEL_DC_LL_HIGH = 3, /* DC, low latency queue, 1080 >= res > 720 */ 1265 + RADEON_VCE_LEVEL_DC_GP_LOW = 4, /* DC, general purpose queue, res <= 720 */ 1266 + RADEON_VCE_LEVEL_DC_GP_HIGH = 5, /* DC, general purpose queue, 1080 >= res > 720 */ 1267 + }; 1268 + 1264 1269 struct radeon_ps { 1265 1270 u32 caps; /* vbios flags */ 1266 1271 u32 class; /* vbios flags */ ··· 1282 1265 /* VCE clocks */ 1283 1266 u32 evclk; 1284 1267 u32 ecclk; 1268 + bool vce_active; 1269 + enum radeon_vce_level vce_level; 1285 1270 /* asic priv */ 1286 1271 void *ps_priv; 1287 1272 }; ··· 1458 1439 RADEON_DPM_FORCED_LEVEL_HIGH = 2, 1459 1440 }; 1460 1441 1442 + struct radeon_vce_state { 1443 + /* vce clocks */ 1444 + u32 evclk; 1445 + u32 ecclk; 1446 + /* gpu clocks */ 1447 + u32 sclk; 1448 + u32 mclk; 1449 + u8 clk_idx; 1450 + u8 pstate; 1451 + }; 1452 + 1461 1453 struct radeon_dpm { 1462 1454 struct radeon_ps *ps; 1463 1455 /* number of valid power states */ ··· 1481 1451 struct radeon_ps *boot_ps; 1482 1452 /* default uvd power state */ 1483 1453 struct radeon_ps *uvd_ps; 1454 + /* vce requirements */ 1455 + struct radeon_vce_state vce_states[RADEON_MAX_VCE_LEVELS]; 1456 + enum radeon_vce_level vce_level; 1484 1457 enum radeon_pm_state_type state; 1485 1458 enum radeon_pm_state_type user_state; 1486 1459 u32 platform_caps; ··· 1509 1476 /* special states active */ 1510 1477 bool thermal_active; 1511 1478 bool uvd_active; 1479 + bool vce_active; 1512 1480 /* thermal handling */ 1513 1481 struct radeon_dpm_thermal thermal; 1514 1482 /* forced levels */ ··· 1520 1486 }; 1521 1487 1522 1488 void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable); 1489 + void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable); 1523 1490 1524 1491 struct radeon_pm { 1525 1492 struct mutex mutex; ··· 1625 1590 unsigned *optimal_dclk_div); 1626 1591 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 1627 1592 unsigned cg_upll_func_cntl); 1593 + 1594 + /* 1595 + * VCE 1596 + */ 1597 + #define RADEON_MAX_VCE_HANDLES 16 1598 + #define RADEON_VCE_STACK_SIZE (1024*1024) 1599 + #define RADEON_VCE_HEAP_SIZE (4*1024*1024) 1600 + 1601 + struct radeon_vce { 1602 + struct radeon_bo *vcpu_bo; 1603 + void *cpu_addr; 1604 + uint64_t gpu_addr; 1605 + unsigned fw_version; 1606 + unsigned fb_version; 1607 + atomic_t handles[RADEON_MAX_VCE_HANDLES]; 1608 + struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; 1609 + struct delayed_work idle_work; 1610 + }; 1611 + 1612 + int radeon_vce_init(struct radeon_device *rdev); 1613 + void radeon_vce_fini(struct radeon_device *rdev); 1614 + int radeon_vce_suspend(struct radeon_device *rdev); 1615 + int radeon_vce_resume(struct radeon_device *rdev); 1616 + int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, 1617 + uint32_t handle, struct radeon_fence **fence); 1618 + int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, 1619 + uint32_t handle, struct radeon_fence **fence); 1620 + void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp); 1621 + void radeon_vce_note_usage(struct radeon_device *rdev); 1622 + int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi); 1623 + int radeon_vce_cs_parse(struct radeon_cs_parser *p); 1624 + bool radeon_vce_semaphore_emit(struct radeon_device *rdev, 1625 + struct radeon_ring *ring, 1626 + struct radeon_semaphore *semaphore, 1627 + bool emit_wait); 1628 + void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); 1629 + void radeon_vce_fence_emit(struct radeon_device *rdev, 1630 + struct radeon_fence *fence); 1631 + int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); 1632 + int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); 1628 1633 1629 1634 struct r600_audio_pin { 1630 1635 int channels; ··· 1855 1780 void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes); 1856 1781 void (*set_clock_gating)(struct radeon_device *rdev, int enable); 1857 1782 int (*set_uvd_clocks)(struct radeon_device *rdev, u32 vclk, u32 dclk); 1783 + int (*set_vce_clocks)(struct radeon_device *rdev, u32 evclk, u32 ecclk); 1858 1784 int (*get_temperature)(struct radeon_device *rdev); 1859 1785 } pm; 1860 1786 /* dynamic power management */ ··· 2262 2186 struct radeon_gem gem; 2263 2187 struct radeon_pm pm; 2264 2188 struct radeon_uvd uvd; 2189 + struct radeon_vce vce; 2265 2190 uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; 2266 2191 struct radeon_wb wb; 2267 2192 struct radeon_dummy_page dummy_page; ··· 2282 2205 const struct firmware *sdma_fw; /* CIK SDMA firmware */ 2283 2206 const struct firmware *smc_fw; /* SMC firmware */ 2284 2207 const struct firmware *uvd_fw; /* UVD firmware */ 2208 + const struct firmware *vce_fw; /* VCE firmware */ 2285 2209 struct r600_vram_scratch vram_scratch; 2286 2210 int msi_enabled; /* msi enabled */ 2287 2211 struct r600_ih ih; /* r6/700 interrupt ring */ ··· 2717 2639 #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->pm.set_pcie_lanes((rdev), (l)) 2718 2640 #define radeon_set_clock_gating(rdev, e) (rdev)->asic->pm.set_clock_gating((rdev), (e)) 2719 2641 #define radeon_set_uvd_clocks(rdev, v, d) (rdev)->asic->pm.set_uvd_clocks((rdev), (v), (d)) 2642 + #define radeon_set_vce_clocks(rdev, ev, ec) (rdev)->asic->pm.set_vce_clocks((rdev), (ev), (ec)) 2720 2643 #define radeon_get_temperature(rdev) (rdev)->asic->pm.get_temperature((rdev)) 2721 2644 #define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->surface.set_reg((rdev), (r), (f), (p), (o), (s))) 2722 2645 #define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->surface.clear_reg((rdev), (r)))
+19
drivers/gpu/drm/radeon/radeon_asic.c
··· 1987 1987 .set_wptr = &cik_sdma_set_wptr, 1988 1988 }; 1989 1989 1990 + static struct radeon_asic_ring ci_vce_ring = { 1991 + .ib_execute = &radeon_vce_ib_execute, 1992 + .emit_fence = &radeon_vce_fence_emit, 1993 + .emit_semaphore = &radeon_vce_semaphore_emit, 1994 + .cs_parse = &radeon_vce_cs_parse, 1995 + .ring_test = &radeon_vce_ring_test, 1996 + .ib_test = &radeon_vce_ib_test, 1997 + .is_lockup = &radeon_ring_test_lockup, 1998 + .get_rptr = &vce_v1_0_get_rptr, 1999 + .get_wptr = &vce_v1_0_get_wptr, 2000 + .set_wptr = &vce_v1_0_set_wptr, 2001 + }; 2002 + 1990 2003 static struct radeon_asic ci_asic = { 1991 2004 .init = &cik_init, 1992 2005 .fini = &cik_fini, ··· 2028 2015 [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, 2029 2016 [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, 2030 2017 [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, 2018 + [TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring, 2019 + [TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, 2031 2020 }, 2032 2021 .irq = { 2033 2022 .set = &cik_irq_set, ··· 2076 2061 .set_pcie_lanes = NULL, 2077 2062 .set_clock_gating = NULL, 2078 2063 .set_uvd_clocks = &cik_set_uvd_clocks, 2064 + .set_vce_clocks = &cik_set_vce_clocks, 2079 2065 .get_temperature = &ci_get_temp, 2080 2066 }, 2081 2067 .dpm = { ··· 2133 2117 [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, 2134 2118 [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, 2135 2119 [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, 2120 + [TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring, 2121 + [TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, 2136 2122 }, 2137 2123 .irq = { 2138 2124 .set = &cik_irq_set, ··· 2181 2163 .set_pcie_lanes = NULL, 2182 2164 .set_clock_gating = NULL, 2183 2165 .set_uvd_clocks = &cik_set_uvd_clocks, 2166 + .set_vce_clocks = &cik_set_vce_clocks, 2184 2167 .get_temperature = &kv_get_temp, 2185 2168 }, 2186 2169 .dpm = {
+14
drivers/gpu/drm/radeon/radeon_asic.h
··· 717 717 uint32_t cik_pciep_rreg(struct radeon_device *rdev, uint32_t reg); 718 718 void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); 719 719 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); 720 + int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk); 720 721 void cik_sdma_fence_ring_emit(struct radeon_device *rdev, 721 722 struct radeon_fence *fence); 722 723 bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, ··· 863 862 864 863 /* uvd v4.2 */ 865 864 int uvd_v4_2_resume(struct radeon_device *rdev); 865 + 866 + /* vce v1.0 */ 867 + uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, 868 + struct radeon_ring *ring); 869 + uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, 870 + struct radeon_ring *ring); 871 + void vce_v1_0_set_wptr(struct radeon_device *rdev, 872 + struct radeon_ring *ring); 873 + int vce_v1_0_init(struct radeon_device *rdev); 874 + int vce_v1_0_start(struct radeon_device *rdev); 875 + 876 + /* vce v2.0 */ 877 + int vce_v2_0_resume(struct radeon_device *rdev); 866 878 867 879 #endif
+7
drivers/gpu/drm/radeon/radeon_cs.c
··· 147 147 case RADEON_CS_RING_UVD: 148 148 p->ring = R600_RING_TYPE_UVD_INDEX; 149 149 break; 150 + case RADEON_CS_RING_VCE: 151 + /* TODO: only use the low priority ring for now */ 152 + p->ring = TN_RING_TYPE_VCE1_INDEX; 153 + break; 150 154 } 151 155 return 0; 152 156 } ··· 347 343 348 344 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 349 345 radeon_uvd_note_usage(rdev); 346 + else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || 347 + (parser->ring == TN_RING_TYPE_VCE2_INDEX)) 348 + radeon_vce_note_usage(rdev); 350 349 351 350 radeon_cs_sync_rings(parser); 352 351 r = radeon_ib_schedule(rdev, &parser->ib, NULL);
+10
drivers/gpu/drm/radeon/radeon_kms.c
··· 433 433 case RADEON_CS_RING_UVD: 434 434 *value = rdev->ring[R600_RING_TYPE_UVD_INDEX].ready; 435 435 break; 436 + case RADEON_CS_RING_VCE: 437 + *value = rdev->ring[TN_RING_TYPE_VCE1_INDEX].ready; 438 + break; 436 439 default: 437 440 return -EINVAL; 438 441 } ··· 479 476 *value = rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk * 10; 480 477 else 481 478 *value = rdev->pm.default_sclk * 10; 479 + break; 480 + case RADEON_INFO_VCE_FW_VERSION: 481 + *value = rdev->vce.fw_version; 482 + break; 483 + case RADEON_INFO_VCE_FB_VERSION: 484 + *value = rdev->vce.fb_version; 482 485 break; 483 486 default: 484 487 DRM_DEBUG_KMS("Invalid request %d\n", info->request); ··· 619 610 if (rdev->cmask_filp == file_priv) 620 611 rdev->cmask_filp = NULL; 621 612 radeon_uvd_free_handles(rdev, file_priv); 613 + radeon_vce_free_handles(rdev, file_priv); 622 614 } 623 615 624 616 /*
+25
drivers/gpu/drm/radeon/radeon_pm.c
··· 826 826 827 827 /* no need to reprogram if nothing changed unless we are on BTC+ */ 828 828 if (rdev->pm.dpm.current_ps == rdev->pm.dpm.requested_ps) { 829 + /* vce just modifies an existing state so force a change */ 830 + if (ps->vce_active != rdev->pm.dpm.vce_active) 831 + goto force; 829 832 if ((rdev->family < CHIP_BARTS) || (rdev->flags & RADEON_IS_IGP)) { 830 833 /* for pre-BTC and APUs if the num crtcs changed but state is the same, 831 834 * all we need to do is update the display configuration. ··· 865 862 } 866 863 } 867 864 865 + force: 868 866 if (radeon_dpm == 1) { 869 867 printk("switching from power state:\n"); 870 868 radeon_dpm_print_power_state(rdev, rdev->pm.dpm.current_ps); 871 869 printk("switching to power state:\n"); 872 870 radeon_dpm_print_power_state(rdev, rdev->pm.dpm.requested_ps); 873 871 } 872 + 874 873 mutex_lock(&rdev->ddev->struct_mutex); 875 874 down_write(&rdev->pm.mclk_lock); 876 875 mutex_lock(&rdev->ring_lock); 876 + 877 + /* update whether vce is active */ 878 + ps->vce_active = rdev->pm.dpm.vce_active; 877 879 878 880 ret = radeon_dpm_pre_set_power_state(rdev); 879 881 if (ret) ··· 966 958 967 959 radeon_pm_compute_clocks(rdev); 968 960 } 961 + } 962 + 963 + void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable) 964 + { 965 + if (enable) { 966 + mutex_lock(&rdev->pm.mutex); 967 + rdev->pm.dpm.vce_active = true; 968 + /* XXX select vce level based on ring/task */ 969 + rdev->pm.dpm.vce_level = RADEON_VCE_LEVEL_AC_ALL; 970 + mutex_unlock(&rdev->pm.mutex); 971 + } else { 972 + mutex_lock(&rdev->pm.mutex); 973 + rdev->pm.dpm.vce_active = false; 974 + mutex_unlock(&rdev->pm.mutex); 975 + } 976 + 977 + radeon_pm_compute_clocks(rdev); 969 978 } 970 979 971 980 static void radeon_pm_suspend_old(struct radeon_device *rdev)
+19 -55
drivers/gpu/drm/radeon/radeon_ring.c
··· 342 342 */ 343 343 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) 344 344 { 345 - ring->rptr = radeon_ring_get_rptr(rdev, ring); 345 + uint32_t rptr = radeon_ring_get_rptr(rdev, ring); 346 + 346 347 /* This works because ring_size is a power of 2 */ 347 - ring->ring_free_dw = (ring->rptr + (ring->ring_size / 4)); 348 + ring->ring_free_dw = rptr + (ring->ring_size / 4); 348 349 ring->ring_free_dw -= ring->wptr; 349 350 ring->ring_free_dw &= ring->ptr_mask; 350 351 if (!ring->ring_free_dw) { 352 + /* this is an empty ring */ 351 353 ring->ring_free_dw = ring->ring_size / 4; 354 + /* update lockup info to avoid false positive */ 355 + radeon_ring_lockup_update(rdev, ring); 352 356 } 353 357 } 354 358 ··· 376 372 /* Align requested size with padding so unlock_commit can 377 373 * pad safely */ 378 374 radeon_ring_free_size(rdev, ring); 379 - if (ring->ring_free_dw == (ring->ring_size / 4)) { 380 - /* This is an empty ring update lockup info to avoid 381 - * false positive. 382 - */ 383 - radeon_ring_lockup_update(ring); 384 - } 385 375 ndw = (ndw + ring->align_mask) & ~ring->align_mask; 386 376 while (ndw > (ring->ring_free_dw - 1)) { 387 377 radeon_ring_free_size(rdev, ring); ··· 476 478 } 477 479 478 480 /** 479 - * radeon_ring_force_activity - add some nop packets to the ring 480 - * 481 - * @rdev: radeon_device pointer 482 - * @ring: radeon_ring structure holding ring information 483 - * 484 - * Add some nop packets to the ring to force activity (all asics). 485 - * Used for lockup detection to see if the rptr is advancing. 486 - */ 487 - void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring) 488 - { 489 - int r; 490 - 491 - radeon_ring_free_size(rdev, ring); 492 - if (ring->rptr == ring->wptr) { 493 - r = radeon_ring_alloc(rdev, ring, 1); 494 - if (!r) { 495 - radeon_ring_write(ring, ring->nop); 496 - radeon_ring_commit(rdev, ring); 497 - } 498 - } 499 - } 500 - 501 - /** 502 481 * radeon_ring_lockup_update - update lockup variables 503 482 * 504 483 * @ring: radeon_ring structure holding ring information 505 484 * 506 485 * Update the last rptr value and timestamp (all asics). 507 486 */ 508 - void radeon_ring_lockup_update(struct radeon_ring *ring) 487 + void radeon_ring_lockup_update(struct radeon_device *rdev, 488 + struct radeon_ring *ring) 509 489 { 510 - ring->last_rptr = ring->rptr; 490 + ring->last_rptr = radeon_ring_get_rptr(rdev, ring); 511 491 ring->last_activity = jiffies; 512 492 } 513 493 ··· 494 518 * @rdev: radeon device structure 495 519 * @ring: radeon_ring structure holding ring information 496 520 * 497 - * We don't need to initialize the lockup tracking information as we will either 498 - * have CP rptr to a different value of jiffies wrap around which will force 499 - * initialization of the lockup tracking informations. 500 - * 501 - * A possible false positivie is if we get call after while and last_cp_rptr == 502 - * the current CP rptr, even if it's unlikely it might happen. To avoid this 503 - * if the elapsed time since last call is bigger than 2 second than we return 504 - * false and update the tracking information. Due to this the caller must call 505 - * radeon_ring_test_lockup several time in less than 2sec for lockup to be reported 506 - * the fencing code should be cautious about that. 507 - * 508 - * Caller should write to the ring to force CP to do something so we don't get 509 - * false positive when CP is just gived nothing to do. 510 - * 511 - **/ 521 + */ 512 522 bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 513 523 { 524 + uint32_t rptr = radeon_ring_get_rptr(rdev, ring); 514 525 unsigned long cjiffies, elapsed; 515 526 516 527 cjiffies = jiffies; 517 528 if (!time_after(cjiffies, ring->last_activity)) { 518 529 /* likely a wrap around */ 519 - radeon_ring_lockup_update(ring); 530 + radeon_ring_lockup_update(rdev, ring); 520 531 return false; 521 532 } 522 - ring->rptr = radeon_ring_get_rptr(rdev, ring); 523 - if (ring->rptr != ring->last_rptr) { 533 + if (rptr != ring->last_rptr) { 524 534 /* CP is still working no lockup */ 525 - radeon_ring_lockup_update(ring); 535 + radeon_ring_lockup_update(rdev, ring); 526 536 return false; 527 537 } 528 538 elapsed = jiffies_to_msecs(cjiffies - ring->last_activity); ··· 671 709 if (radeon_debugfs_ring_init(rdev, ring)) { 672 710 DRM_ERROR("Failed to register debugfs file for rings !\n"); 673 711 } 674 - radeon_ring_lockup_update(ring); 712 + radeon_ring_lockup_update(rdev, ring); 675 713 return 0; 676 714 } 677 715 ··· 742 780 743 781 seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", 744 782 ring->wptr, ring->wptr); 745 - seq_printf(m, "driver's copy of the rptr: 0x%08x [%5d]\n", 746 - ring->rptr, ring->rptr); 747 783 seq_printf(m, "last semaphore signal addr : 0x%016llx\n", 748 784 ring->last_semaphore_signal_addr); 749 785 seq_printf(m, "last semaphore wait addr : 0x%016llx\n", ··· 774 814 static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX; 775 815 static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; 776 816 static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX; 817 + static int si_vce1_index = TN_RING_TYPE_VCE1_INDEX; 818 + static int si_vce2_index = TN_RING_TYPE_VCE2_INDEX; 777 819 778 820 static struct drm_info_list radeon_debugfs_ring_info_list[] = { 779 821 {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, ··· 784 822 {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, 785 823 {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, 786 824 {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index}, 825 + {"radeon_ring_vce1", radeon_debugfs_ring_info, 0, &si_vce1_index}, 826 + {"radeon_ring_vce2", radeon_debugfs_ring_info, 0, &si_vce2_index}, 787 827 }; 788 828 789 829 static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
+37 -2
drivers/gpu/drm/radeon/radeon_test.c
··· 257 257 struct radeon_ring *ring, 258 258 struct radeon_fence **fence) 259 259 { 260 + uint32_t handle = ring->idx ^ 0xdeafbeef; 260 261 int r; 261 262 262 263 if (ring->idx == R600_RING_TYPE_UVD_INDEX) { 263 - r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); 264 + r = radeon_uvd_get_create_msg(rdev, ring->idx, handle, NULL); 264 265 if (r) { 265 266 DRM_ERROR("Failed to get dummy create msg\n"); 266 267 return r; 267 268 } 268 269 269 - r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence); 270 + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, handle, fence); 270 271 if (r) { 271 272 DRM_ERROR("Failed to get dummy destroy msg\n"); 272 273 return r; 273 274 } 275 + 276 + } else if (ring->idx == TN_RING_TYPE_VCE1_INDEX || 277 + ring->idx == TN_RING_TYPE_VCE2_INDEX) { 278 + r = radeon_vce_get_create_msg(rdev, ring->idx, handle, NULL); 279 + if (r) { 280 + DRM_ERROR("Failed to get dummy create msg\n"); 281 + return r; 282 + } 283 + 284 + r = radeon_vce_get_destroy_msg(rdev, ring->idx, handle, fence); 285 + if (r) { 286 + DRM_ERROR("Failed to get dummy destroy msg\n"); 287 + return r; 288 + } 289 + 274 290 } else { 275 291 r = radeon_ring_lock(rdev, ring, 64); 276 292 if (r) { ··· 502 486 printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); 503 487 } 504 488 489 + static bool radeon_test_sync_possible(struct radeon_ring *ringA, 490 + struct radeon_ring *ringB) 491 + { 492 + if (ringA->idx == TN_RING_TYPE_VCE2_INDEX && 493 + ringB->idx == TN_RING_TYPE_VCE1_INDEX) 494 + return false; 495 + 496 + return true; 497 + } 498 + 505 499 void radeon_test_syncing(struct radeon_device *rdev) 506 500 { 507 501 int i, j, k; ··· 526 500 if (!ringB->ready) 527 501 continue; 528 502 503 + if (!radeon_test_sync_possible(ringA, ringB)) 504 + continue; 505 + 529 506 DRM_INFO("Testing syncing between rings %d and %d...\n", i, j); 530 507 radeon_test_ring_sync(rdev, ringA, ringB); 531 508 ··· 538 509 for (k = 0; k < j; ++k) { 539 510 struct radeon_ring *ringC = &rdev->ring[k]; 540 511 if (!ringC->ready) 512 + continue; 513 + 514 + if (!radeon_test_sync_possible(ringA, ringC)) 515 + continue; 516 + 517 + if (!radeon_test_sync_possible(ringB, ringC)) 541 518 continue; 542 519 543 520 DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k);
+694
drivers/gpu/drm/radeon/radeon_vce.c
··· 1 + /* 2 + * Copyright 2013 Advanced Micro Devices, Inc. 3 + * All Rights Reserved. 4 + * 5 + * Permission is hereby granted, free of charge, to any person obtaining a 6 + * copy of this software and associated documentation files (the 7 + * "Software"), to deal in the Software without restriction, including 8 + * without limitation the rights to use, copy, modify, merge, publish, 9 + * distribute, sub license, and/or sell copies of the Software, and to 10 + * permit persons to whom the Software is furnished to do so, subject to 11 + * the following conditions: 12 + * 13 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 + * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 + * 21 + * The above copyright notice and this permission notice (including the 22 + * next paragraph) shall be included in all copies or substantial portions 23 + * of the Software. 24 + * 25 + * Authors: Christian König <christian.koenig@amd.com> 26 + */ 27 + 28 + #include <linux/firmware.h> 29 + #include <linux/module.h> 30 + #include <drm/drmP.h> 31 + #include <drm/drm.h> 32 + 33 + #include "radeon.h" 34 + #include "radeon_asic.h" 35 + #include "sid.h" 36 + 37 + /* 1 second timeout */ 38 + #define VCE_IDLE_TIMEOUT_MS 1000 39 + 40 + /* Firmware Names */ 41 + #define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin" 42 + 43 + MODULE_FIRMWARE(FIRMWARE_BONAIRE); 44 + 45 + static void radeon_vce_idle_work_handler(struct work_struct *work); 46 + 47 + /** 48 + * radeon_vce_init - allocate memory, load vce firmware 49 + * 50 + * @rdev: radeon_device pointer 51 + * 52 + * First step to get VCE online, allocate memory and load the firmware 53 + */ 54 + int radeon_vce_init(struct radeon_device *rdev) 55 + { 56 + static const char *fw_version = "[ATI LIB=VCEFW,"; 57 + static const char *fb_version = "[ATI LIB=VCEFWSTATS,"; 58 + unsigned long size; 59 + const char *fw_name, *c; 60 + uint8_t start, mid, end; 61 + int i, r; 62 + 63 + INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler); 64 + 65 + switch (rdev->family) { 66 + case CHIP_BONAIRE: 67 + case CHIP_KAVERI: 68 + case CHIP_KABINI: 69 + fw_name = FIRMWARE_BONAIRE; 70 + break; 71 + 72 + default: 73 + return -EINVAL; 74 + } 75 + 76 + r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev); 77 + if (r) { 78 + dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n", 79 + fw_name); 80 + return r; 81 + } 82 + 83 + /* search for firmware version */ 84 + 85 + size = rdev->vce_fw->size - strlen(fw_version) - 9; 86 + c = rdev->vce_fw->data; 87 + for (;size > 0; --size, ++c) 88 + if (strncmp(c, fw_version, strlen(fw_version)) == 0) 89 + break; 90 + 91 + if (size == 0) 92 + return -EINVAL; 93 + 94 + c += strlen(fw_version); 95 + if (sscanf(c, "%2hhd.%2hhd.%2hhd]", &start, &mid, &end) != 3) 96 + return -EINVAL; 97 + 98 + /* search for feedback version */ 99 + 100 + size = rdev->vce_fw->size - strlen(fb_version) - 3; 101 + c = rdev->vce_fw->data; 102 + for (;size > 0; --size, ++c) 103 + if (strncmp(c, fb_version, strlen(fb_version)) == 0) 104 + break; 105 + 106 + if (size == 0) 107 + return -EINVAL; 108 + 109 + c += strlen(fb_version); 110 + if (sscanf(c, "%2u]", &rdev->vce.fb_version) != 1) 111 + return -EINVAL; 112 + 113 + DRM_INFO("Found VCE firmware/feedback version %hhd.%hhd.%hhd / %d!\n", 114 + start, mid, end, rdev->vce.fb_version); 115 + 116 + rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8); 117 + 118 + /* we can only work with this fw version for now */ 119 + if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) 120 + return -EINVAL; 121 + 122 + /* load firmware into VRAM */ 123 + 124 + size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + 125 + RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; 126 + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, 127 + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo); 128 + if (r) { 129 + dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); 130 + return r; 131 + } 132 + 133 + r = radeon_vce_resume(rdev); 134 + if (r) 135 + return r; 136 + 137 + memset(rdev->vce.cpu_addr, 0, size); 138 + memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); 139 + 140 + r = radeon_vce_suspend(rdev); 141 + if (r) 142 + return r; 143 + 144 + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { 145 + atomic_set(&rdev->vce.handles[i], 0); 146 + rdev->vce.filp[i] = NULL; 147 + } 148 + 149 + return 0; 150 + } 151 + 152 + /** 153 + * radeon_vce_fini - free memory 154 + * 155 + * @rdev: radeon_device pointer 156 + * 157 + * Last step on VCE teardown, free firmware memory 158 + */ 159 + void radeon_vce_fini(struct radeon_device *rdev) 160 + { 161 + radeon_vce_suspend(rdev); 162 + radeon_bo_unref(&rdev->vce.vcpu_bo); 163 + } 164 + 165 + /** 166 + * radeon_vce_suspend - unpin VCE fw memory 167 + * 168 + * @rdev: radeon_device pointer 169 + * 170 + * TODO: Test VCE suspend/resume 171 + */ 172 + int radeon_vce_suspend(struct radeon_device *rdev) 173 + { 174 + int r; 175 + 176 + if (rdev->vce.vcpu_bo == NULL) 177 + return 0; 178 + 179 + r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); 180 + if (!r) { 181 + radeon_bo_kunmap(rdev->vce.vcpu_bo); 182 + radeon_bo_unpin(rdev->vce.vcpu_bo); 183 + radeon_bo_unreserve(rdev->vce.vcpu_bo); 184 + } 185 + return r; 186 + } 187 + 188 + /** 189 + * radeon_vce_resume - pin VCE fw memory 190 + * 191 + * @rdev: radeon_device pointer 192 + * 193 + * TODO: Test VCE suspend/resume 194 + */ 195 + int radeon_vce_resume(struct radeon_device *rdev) 196 + { 197 + int r; 198 + 199 + if (rdev->vce.vcpu_bo == NULL) 200 + return -EINVAL; 201 + 202 + r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); 203 + if (r) { 204 + radeon_bo_unref(&rdev->vce.vcpu_bo); 205 + dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); 206 + return r; 207 + } 208 + 209 + r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 210 + &rdev->vce.gpu_addr); 211 + if (r) { 212 + radeon_bo_unreserve(rdev->vce.vcpu_bo); 213 + radeon_bo_unref(&rdev->vce.vcpu_bo); 214 + dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r); 215 + return r; 216 + } 217 + 218 + r = radeon_bo_kmap(rdev->vce.vcpu_bo, &rdev->vce.cpu_addr); 219 + if (r) { 220 + dev_err(rdev->dev, "(%d) VCE map failed\n", r); 221 + return r; 222 + } 223 + 224 + radeon_bo_unreserve(rdev->vce.vcpu_bo); 225 + 226 + return 0; 227 + } 228 + 229 + /** 230 + * radeon_vce_idle_work_handler - power off VCE 231 + * 232 + * @work: pointer to work structure 233 + * 234 + * power of VCE when it's not used any more 235 + */ 236 + static void radeon_vce_idle_work_handler(struct work_struct *work) 237 + { 238 + struct radeon_device *rdev = 239 + container_of(work, struct radeon_device, vce.idle_work.work); 240 + 241 + if ((radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE1_INDEX) == 0) && 242 + (radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE2_INDEX) == 0)) { 243 + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 244 + radeon_dpm_enable_vce(rdev, false); 245 + } else { 246 + radeon_set_vce_clocks(rdev, 0, 0); 247 + } 248 + } else { 249 + schedule_delayed_work(&rdev->vce.idle_work, 250 + msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); 251 + } 252 + } 253 + 254 + /** 255 + * radeon_vce_note_usage - power up VCE 256 + * 257 + * @rdev: radeon_device pointer 258 + * 259 + * Make sure VCE is powerd up when we want to use it 260 + */ 261 + void radeon_vce_note_usage(struct radeon_device *rdev) 262 + { 263 + bool streams_changed = false; 264 + bool set_clocks = !cancel_delayed_work_sync(&rdev->vce.idle_work); 265 + set_clocks &= schedule_delayed_work(&rdev->vce.idle_work, 266 + msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); 267 + 268 + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 269 + /* XXX figure out if the streams changed */ 270 + streams_changed = false; 271 + } 272 + 273 + if (set_clocks || streams_changed) { 274 + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 275 + radeon_dpm_enable_vce(rdev, true); 276 + } else { 277 + radeon_set_vce_clocks(rdev, 53300, 40000); 278 + } 279 + } 280 + } 281 + 282 + /** 283 + * radeon_vce_free_handles - free still open VCE handles 284 + * 285 + * @rdev: radeon_device pointer 286 + * @filp: drm file pointer 287 + * 288 + * Close all VCE handles still open by this file pointer 289 + */ 290 + void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp) 291 + { 292 + int i, r; 293 + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { 294 + uint32_t handle = atomic_read(&rdev->vce.handles[i]); 295 + if (!handle || rdev->vce.filp[i] != filp) 296 + continue; 297 + 298 + radeon_vce_note_usage(rdev); 299 + 300 + r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX, 301 + handle, NULL); 302 + if (r) 303 + DRM_ERROR("Error destroying VCE handle (%d)!\n", r); 304 + 305 + rdev->vce.filp[i] = NULL; 306 + atomic_set(&rdev->vce.handles[i], 0); 307 + } 308 + } 309 + 310 + /** 311 + * radeon_vce_get_create_msg - generate a VCE create msg 312 + * 313 + * @rdev: radeon_device pointer 314 + * @ring: ring we should submit the msg to 315 + * @handle: VCE session handle to use 316 + * @fence: optional fence to return 317 + * 318 + * Open up a stream for HW test 319 + */ 320 + int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, 321 + uint32_t handle, struct radeon_fence **fence) 322 + { 323 + const unsigned ib_size_dw = 1024; 324 + struct radeon_ib ib; 325 + uint64_t dummy; 326 + int i, r; 327 + 328 + r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4); 329 + if (r) { 330 + DRM_ERROR("radeon: failed to get ib (%d).\n", r); 331 + return r; 332 + } 333 + 334 + dummy = ib.gpu_addr + 1024; 335 + 336 + /* stitch together an VCE create msg */ 337 + ib.length_dw = 0; 338 + ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ 339 + ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ 340 + ib.ptr[ib.length_dw++] = handle; 341 + 342 + ib.ptr[ib.length_dw++] = 0x00000030; /* len */ 343 + ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */ 344 + ib.ptr[ib.length_dw++] = 0x00000000; 345 + ib.ptr[ib.length_dw++] = 0x00000042; 346 + ib.ptr[ib.length_dw++] = 0x0000000a; 347 + ib.ptr[ib.length_dw++] = 0x00000001; 348 + ib.ptr[ib.length_dw++] = 0x00000080; 349 + ib.ptr[ib.length_dw++] = 0x00000060; 350 + ib.ptr[ib.length_dw++] = 0x00000100; 351 + ib.ptr[ib.length_dw++] = 0x00000100; 352 + ib.ptr[ib.length_dw++] = 0x0000000c; 353 + ib.ptr[ib.length_dw++] = 0x00000000; 354 + 355 + ib.ptr[ib.length_dw++] = 0x00000014; /* len */ 356 + ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ 357 + ib.ptr[ib.length_dw++] = upper_32_bits(dummy); 358 + ib.ptr[ib.length_dw++] = dummy; 359 + ib.ptr[ib.length_dw++] = 0x00000001; 360 + 361 + for (i = ib.length_dw; i < ib_size_dw; ++i) 362 + ib.ptr[i] = 0x0; 363 + 364 + r = radeon_ib_schedule(rdev, &ib, NULL); 365 + if (r) { 366 + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 367 + } 368 + 369 + if (fence) 370 + *fence = radeon_fence_ref(ib.fence); 371 + 372 + radeon_ib_free(rdev, &ib); 373 + 374 + return r; 375 + } 376 + 377 + /** 378 + * radeon_vce_get_destroy_msg - generate a VCE destroy msg 379 + * 380 + * @rdev: radeon_device pointer 381 + * @ring: ring we should submit the msg to 382 + * @handle: VCE session handle to use 383 + * @fence: optional fence to return 384 + * 385 + * Close up a stream for HW test or if userspace failed to do so 386 + */ 387 + int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, 388 + uint32_t handle, struct radeon_fence **fence) 389 + { 390 + const unsigned ib_size_dw = 1024; 391 + struct radeon_ib ib; 392 + uint64_t dummy; 393 + int i, r; 394 + 395 + r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4); 396 + if (r) { 397 + DRM_ERROR("radeon: failed to get ib (%d).\n", r); 398 + return r; 399 + } 400 + 401 + dummy = ib.gpu_addr + 1024; 402 + 403 + /* stitch together an VCE destroy msg */ 404 + ib.length_dw = 0; 405 + ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ 406 + ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ 407 + ib.ptr[ib.length_dw++] = handle; 408 + 409 + ib.ptr[ib.length_dw++] = 0x00000014; /* len */ 410 + ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ 411 + ib.ptr[ib.length_dw++] = upper_32_bits(dummy); 412 + ib.ptr[ib.length_dw++] = dummy; 413 + ib.ptr[ib.length_dw++] = 0x00000001; 414 + 415 + ib.ptr[ib.length_dw++] = 0x00000008; /* len */ 416 + ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */ 417 + 418 + for (i = ib.length_dw; i < ib_size_dw; ++i) 419 + ib.ptr[i] = 0x0; 420 + 421 + r = radeon_ib_schedule(rdev, &ib, NULL); 422 + if (r) { 423 + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 424 + } 425 + 426 + if (fence) 427 + *fence = radeon_fence_ref(ib.fence); 428 + 429 + radeon_ib_free(rdev, &ib); 430 + 431 + return r; 432 + } 433 + 434 + /** 435 + * radeon_vce_cs_reloc - command submission relocation 436 + * 437 + * @p: parser context 438 + * @lo: address of lower dword 439 + * @hi: address of higher dword 440 + * 441 + * Patch relocation inside command stream with real buffer address 442 + */ 443 + int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi) 444 + { 445 + struct radeon_cs_chunk *relocs_chunk; 446 + uint64_t offset; 447 + unsigned idx; 448 + 449 + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 450 + offset = radeon_get_ib_value(p, lo); 451 + idx = radeon_get_ib_value(p, hi); 452 + 453 + if (idx >= relocs_chunk->length_dw) { 454 + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 455 + idx, relocs_chunk->length_dw); 456 + return -EINVAL; 457 + } 458 + 459 + offset += p->relocs_ptr[(idx / 4)]->lobj.gpu_offset; 460 + 461 + p->ib.ptr[lo] = offset & 0xFFFFFFFF; 462 + p->ib.ptr[hi] = offset >> 32; 463 + 464 + return 0; 465 + } 466 + 467 + /** 468 + * radeon_vce_cs_parse - parse and validate the command stream 469 + * 470 + * @p: parser context 471 + * 472 + */ 473 + int radeon_vce_cs_parse(struct radeon_cs_parser *p) 474 + { 475 + uint32_t handle = 0; 476 + bool destroy = false; 477 + int i, r; 478 + 479 + while (p->idx < p->chunks[p->chunk_ib_idx].length_dw) { 480 + uint32_t len = radeon_get_ib_value(p, p->idx); 481 + uint32_t cmd = radeon_get_ib_value(p, p->idx + 1); 482 + 483 + if ((len < 8) || (len & 3)) { 484 + DRM_ERROR("invalid VCE command length (%d)!\n", len); 485 + return -EINVAL; 486 + } 487 + 488 + switch (cmd) { 489 + case 0x00000001: // session 490 + handle = radeon_get_ib_value(p, p->idx + 2); 491 + break; 492 + 493 + case 0x00000002: // task info 494 + case 0x01000001: // create 495 + case 0x04000001: // config extension 496 + case 0x04000002: // pic control 497 + case 0x04000005: // rate control 498 + case 0x04000007: // motion estimation 499 + case 0x04000008: // rdo 500 + break; 501 + 502 + case 0x03000001: // encode 503 + r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9); 504 + if (r) 505 + return r; 506 + 507 + r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11); 508 + if (r) 509 + return r; 510 + break; 511 + 512 + case 0x02000001: // destroy 513 + destroy = true; 514 + break; 515 + 516 + case 0x05000001: // context buffer 517 + case 0x05000004: // video bitstream buffer 518 + case 0x05000005: // feedback buffer 519 + r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2); 520 + if (r) 521 + return r; 522 + break; 523 + 524 + default: 525 + DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); 526 + return -EINVAL; 527 + } 528 + 529 + p->idx += len / 4; 530 + } 531 + 532 + if (destroy) { 533 + /* IB contains a destroy msg, free the handle */ 534 + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) 535 + atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0); 536 + 537 + return 0; 538 + } 539 + 540 + /* create or encode, validate the handle */ 541 + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { 542 + if (atomic_read(&p->rdev->vce.handles[i]) == handle) 543 + return 0; 544 + } 545 + 546 + /* handle not found try to alloc a new one */ 547 + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { 548 + if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) { 549 + p->rdev->vce.filp[i] = p->filp; 550 + return 0; 551 + } 552 + } 553 + 554 + DRM_ERROR("No more free VCE handles!\n"); 555 + return -EINVAL; 556 + } 557 + 558 + /** 559 + * radeon_vce_semaphore_emit - emit a semaphore command 560 + * 561 + * @rdev: radeon_device pointer 562 + * @ring: engine to use 563 + * @semaphore: address of semaphore 564 + * @emit_wait: true=emit wait, false=emit signal 565 + * 566 + */ 567 + bool radeon_vce_semaphore_emit(struct radeon_device *rdev, 568 + struct radeon_ring *ring, 569 + struct radeon_semaphore *semaphore, 570 + bool emit_wait) 571 + { 572 + uint64_t addr = semaphore->gpu_addr; 573 + 574 + radeon_ring_write(ring, VCE_CMD_SEMAPHORE); 575 + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); 576 + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); 577 + radeon_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0)); 578 + if (!emit_wait) 579 + radeon_ring_write(ring, VCE_CMD_END); 580 + 581 + return true; 582 + } 583 + 584 + /** 585 + * radeon_vce_ib_execute - execute indirect buffer 586 + * 587 + * @rdev: radeon_device pointer 588 + * @ib: the IB to execute 589 + * 590 + */ 591 + void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 592 + { 593 + struct radeon_ring *ring = &rdev->ring[ib->ring]; 594 + radeon_ring_write(ring, VCE_CMD_IB); 595 + radeon_ring_write(ring, ib->gpu_addr); 596 + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr)); 597 + radeon_ring_write(ring, ib->length_dw); 598 + } 599 + 600 + /** 601 + * radeon_vce_fence_emit - add a fence command to the ring 602 + * 603 + * @rdev: radeon_device pointer 604 + * @fence: the fence 605 + * 606 + */ 607 + void radeon_vce_fence_emit(struct radeon_device *rdev, 608 + struct radeon_fence *fence) 609 + { 610 + struct radeon_ring *ring = &rdev->ring[fence->ring]; 611 + uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr; 612 + 613 + radeon_ring_write(ring, VCE_CMD_FENCE); 614 + radeon_ring_write(ring, addr); 615 + radeon_ring_write(ring, upper_32_bits(addr)); 616 + radeon_ring_write(ring, fence->seq); 617 + radeon_ring_write(ring, VCE_CMD_TRAP); 618 + radeon_ring_write(ring, VCE_CMD_END); 619 + } 620 + 621 + /** 622 + * radeon_vce_ring_test - test if VCE ring is working 623 + * 624 + * @rdev: radeon_device pointer 625 + * @ring: the engine to test on 626 + * 627 + */ 628 + int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 629 + { 630 + uint32_t rptr = vce_v1_0_get_rptr(rdev, ring); 631 + unsigned i; 632 + int r; 633 + 634 + r = radeon_ring_lock(rdev, ring, 16); 635 + if (r) { 636 + DRM_ERROR("radeon: vce failed to lock ring %d (%d).\n", 637 + ring->idx, r); 638 + return r; 639 + } 640 + radeon_ring_write(ring, VCE_CMD_END); 641 + radeon_ring_unlock_commit(rdev, ring); 642 + 643 + for (i = 0; i < rdev->usec_timeout; i++) { 644 + if (vce_v1_0_get_rptr(rdev, ring) != rptr) 645 + break; 646 + DRM_UDELAY(1); 647 + } 648 + 649 + if (i < rdev->usec_timeout) { 650 + DRM_INFO("ring test on %d succeeded in %d usecs\n", 651 + ring->idx, i); 652 + } else { 653 + DRM_ERROR("radeon: ring %d test failed\n", 654 + ring->idx); 655 + r = -ETIMEDOUT; 656 + } 657 + 658 + return r; 659 + } 660 + 661 + /** 662 + * radeon_vce_ib_test - test if VCE IBs are working 663 + * 664 + * @rdev: radeon_device pointer 665 + * @ring: the engine to test on 666 + * 667 + */ 668 + int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 669 + { 670 + struct radeon_fence *fence = NULL; 671 + int r; 672 + 673 + r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL); 674 + if (r) { 675 + DRM_ERROR("radeon: failed to get create msg (%d).\n", r); 676 + goto error; 677 + } 678 + 679 + r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence); 680 + if (r) { 681 + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); 682 + goto error; 683 + } 684 + 685 + r = radeon_fence_wait(fence, false); 686 + if (r) { 687 + DRM_ERROR("radeon: fence wait failed (%d).\n", r); 688 + } else { 689 + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 690 + } 691 + error: 692 + radeon_fence_unref(&fence); 693 + return r; 694 + }
+4 -3
drivers/gpu/drm/radeon/rs780_dpm.c
··· 807 807 power_info->pplib.ucNumStates, GFP_KERNEL); 808 808 if (!rdev->pm.dpm.ps) 809 809 return -ENOMEM; 810 - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); 811 - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); 812 - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); 813 810 814 811 for (i = 0; i < power_info->pplib.ucNumStates; i++) { 815 812 power_state = (union pplib_power_state *) ··· 855 858 if (pi == NULL) 856 859 return -ENOMEM; 857 860 rdev->pm.dpm.priv = pi; 861 + 862 + ret = r600_get_platform_caps(rdev); 863 + if (ret) 864 + return ret; 858 865 859 866 ret = rs780_parse_power_table(rdev); 860 867 if (ret)
+4 -3
drivers/gpu/drm/radeon/rv6xx_dpm.c
··· 1891 1891 power_info->pplib.ucNumStates, GFP_KERNEL); 1892 1892 if (!rdev->pm.dpm.ps) 1893 1893 return -ENOMEM; 1894 - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); 1895 - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); 1896 - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); 1897 1894 1898 1895 for (i = 0; i < power_info->pplib.ucNumStates; i++) { 1899 1896 power_state = (union pplib_power_state *) ··· 1939 1942 if (pi == NULL) 1940 1943 return -ENOMEM; 1941 1944 rdev->pm.dpm.priv = pi; 1945 + 1946 + ret = r600_get_platform_caps(rdev); 1947 + if (ret) 1948 + return ret; 1942 1949 1943 1950 ret = rv6xx_parse_power_table(rdev); 1944 1951 if (ret)
+4 -3
drivers/gpu/drm/radeon/rv770_dpm.c
··· 2281 2281 power_info->pplib.ucNumStates, GFP_KERNEL); 2282 2282 if (!rdev->pm.dpm.ps) 2283 2283 return -ENOMEM; 2284 - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); 2285 - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); 2286 - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); 2287 2284 2288 2285 for (i = 0; i < power_info->pplib.ucNumStates; i++) { 2289 2286 power_state = (union pplib_power_state *) ··· 2357 2360 pi->acpi_vddc = 0; 2358 2361 pi->min_vddc_in_table = 0; 2359 2362 pi->max_vddc_in_table = 0; 2363 + 2364 + ret = r600_get_platform_caps(rdev); 2365 + if (ret) 2366 + return ret; 2360 2367 2361 2368 ret = rv7xx_parse_power_table(rdev); 2362 2369 if (ret)
+1 -9
drivers/gpu/drm/radeon/si.c
··· 3434 3434 3435 3435 WREG32(CP_RB0_BASE, ring->gpu_addr >> 8); 3436 3436 3437 - ring->rptr = RREG32(CP_RB0_RPTR); 3438 - 3439 3437 /* ring1 - compute only */ 3440 3438 /* Set ring buffer size */ 3441 3439 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; ··· 3458 3460 3459 3461 WREG32(CP_RB1_BASE, ring->gpu_addr >> 8); 3460 3462 3461 - ring->rptr = RREG32(CP_RB1_RPTR); 3462 - 3463 3463 /* ring2 - compute only */ 3464 3464 /* Set ring buffer size */ 3465 3465 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; ··· 3481 3485 WREG32(CP_RB2_CNTL, tmp); 3482 3486 3483 3487 WREG32(CP_RB2_BASE, ring->gpu_addr >> 8); 3484 - 3485 - ring->rptr = RREG32(CP_RB2_RPTR); 3486 3488 3487 3489 /* start the rings */ 3488 3490 si_cp_start(rdev); ··· 3866 3872 if (!(reset_mask & (RADEON_RESET_GFX | 3867 3873 RADEON_RESET_COMPUTE | 3868 3874 RADEON_RESET_CP))) { 3869 - radeon_ring_lockup_update(ring); 3875 + radeon_ring_lockup_update(rdev, ring); 3870 3876 return false; 3871 3877 } 3872 - /* force CP activities */ 3873 - radeon_ring_force_activity(rdev, ring); 3874 3878 return radeon_ring_test_lockup(rdev, ring); 3875 3879 } 3876 3880
+1 -3
drivers/gpu/drm/radeon/si_dma.c
··· 49 49 mask = RADEON_RESET_DMA1; 50 50 51 51 if (!(reset_mask & mask)) { 52 - radeon_ring_lockup_update(ring); 52 + radeon_ring_lockup_update(rdev, ring); 53 53 return false; 54 54 } 55 - /* force ring activities */ 56 - radeon_ring_force_activity(rdev, ring); 57 55 return radeon_ring_test_lockup(rdev, ring); 58 56 } 59 57
+4 -3
drivers/gpu/drm/radeon/si_dpm.c
··· 6271 6271 if (!rdev->pm.dpm.ps) 6272 6272 return -ENOMEM; 6273 6273 power_state_offset = (u8 *)state_array->states; 6274 - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); 6275 - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); 6276 - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); 6277 6274 for (i = 0; i < state_array->ucNumEntries; i++) { 6278 6275 u8 *idx; 6279 6276 power_state = (union pplib_power_state *)power_state_offset; ··· 6346 6349 eg_pi->acpi_vddci = 0; 6347 6350 pi->min_vddc_in_table = 0; 6348 6351 pi->max_vddc_in_table = 0; 6352 + 6353 + ret = r600_get_platform_caps(rdev); 6354 + if (ret) 6355 + return ret; 6349 6356 6350 6357 ret = si_parse_power_table(rdev); 6351 6358 if (ret)
+47
drivers/gpu/drm/radeon/sid.h
··· 1798 1798 #define DMA_PACKET_CONSTANT_FILL 0xd 1799 1799 #define DMA_PACKET_NOP 0xf 1800 1800 1801 + #define VCE_STATUS 0x20004 1802 + #define VCE_VCPU_CNTL 0x20014 1803 + #define VCE_CLK_EN (1 << 0) 1804 + #define VCE_VCPU_CACHE_OFFSET0 0x20024 1805 + #define VCE_VCPU_CACHE_SIZE0 0x20028 1806 + #define VCE_VCPU_CACHE_OFFSET1 0x2002c 1807 + #define VCE_VCPU_CACHE_SIZE1 0x20030 1808 + #define VCE_VCPU_CACHE_OFFSET2 0x20034 1809 + #define VCE_VCPU_CACHE_SIZE2 0x20038 1810 + #define VCE_SOFT_RESET 0x20120 1811 + #define VCE_ECPU_SOFT_RESET (1 << 0) 1812 + #define VCE_FME_SOFT_RESET (1 << 2) 1813 + #define VCE_RB_BASE_LO2 0x2016c 1814 + #define VCE_RB_BASE_HI2 0x20170 1815 + #define VCE_RB_SIZE2 0x20174 1816 + #define VCE_RB_RPTR2 0x20178 1817 + #define VCE_RB_WPTR2 0x2017c 1818 + #define VCE_RB_BASE_LO 0x20180 1819 + #define VCE_RB_BASE_HI 0x20184 1820 + #define VCE_RB_SIZE 0x20188 1821 + #define VCE_RB_RPTR 0x2018c 1822 + #define VCE_RB_WPTR 0x20190 1823 + #define VCE_CLOCK_GATING_A 0x202f8 1824 + #define VCE_CLOCK_GATING_B 0x202fc 1825 + #define VCE_UENC_CLOCK_GATING 0x205bc 1826 + #define VCE_UENC_REG_CLOCK_GATING 0x205c0 1827 + #define VCE_FW_REG_STATUS 0x20e10 1828 + # define VCE_FW_REG_STATUS_BUSY (1 << 0) 1829 + # define VCE_FW_REG_STATUS_PASS (1 << 3) 1830 + # define VCE_FW_REG_STATUS_DONE (1 << 11) 1831 + #define VCE_LMI_FW_START_KEYSEL 0x20e18 1832 + #define VCE_LMI_FW_PERIODIC_CTRL 0x20e20 1833 + #define VCE_LMI_CTRL2 0x20e74 1834 + #define VCE_LMI_CTRL 0x20e98 1835 + #define VCE_LMI_VM_CTRL 0x20ea0 1836 + #define VCE_LMI_SWAP_CNTL 0x20eb4 1837 + #define VCE_LMI_SWAP_CNTL1 0x20eb8 1838 + #define VCE_LMI_CACHE_CTRL 0x20ef4 1839 + 1840 + #define VCE_CMD_NO_OP 0x00000000 1841 + #define VCE_CMD_END 0x00000001 1842 + #define VCE_CMD_IB 0x00000002 1843 + #define VCE_CMD_FENCE 0x00000003 1844 + #define VCE_CMD_TRAP 0x00000004 1845 + #define VCE_CMD_IB_AUTO 0x00000005 1846 + #define VCE_CMD_SEMAPHORE 0x00000006 1847 + 1801 1848 #endif
+4 -3
drivers/gpu/drm/radeon/sumo_dpm.c
··· 1484 1484 if (!rdev->pm.dpm.ps) 1485 1485 return -ENOMEM; 1486 1486 power_state_offset = (u8 *)state_array->states; 1487 - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); 1488 - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); 1489 - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); 1490 1487 for (i = 0; i < state_array->ucNumEntries; i++) { 1491 1488 u8 *idx; 1492 1489 power_state = (union pplib_power_state *)power_state_offset; ··· 1768 1771 return ret; 1769 1772 1770 1773 sumo_construct_boot_and_acpi_state(rdev); 1774 + 1775 + ret = r600_get_platform_caps(rdev); 1776 + if (ret) 1777 + return ret; 1771 1778 1772 1779 ret = sumo_parse_power_table(rdev); 1773 1780 if (ret)
+4 -3
drivers/gpu/drm/radeon/trinity_dpm.c
··· 1694 1694 if (!rdev->pm.dpm.ps) 1695 1695 return -ENOMEM; 1696 1696 power_state_offset = (u8 *)state_array->states; 1697 - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); 1698 - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); 1699 - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); 1700 1697 for (i = 0; i < state_array->ucNumEntries; i++) { 1701 1698 u8 *idx; 1702 1699 power_state = (union pplib_power_state *)power_state_offset; ··· 1891 1894 return ret; 1892 1895 1893 1896 trinity_construct_boot_state(rdev); 1897 + 1898 + ret = r600_get_platform_caps(rdev); 1899 + if (ret) 1900 + return ret; 1894 1901 1895 1902 ret = trinity_parse_power_table(rdev); 1896 1903 if (ret)
+1 -1
drivers/gpu/drm/radeon/uvd_v1_0.c
··· 262 262 /* Initialize the ring buffer's read and write pointers */ 263 263 WREG32(UVD_RBC_RB_RPTR, 0x0); 264 264 265 - ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR); 265 + ring->wptr = RREG32(UVD_RBC_RB_RPTR); 266 266 WREG32(UVD_RBC_RB_WPTR, ring->wptr); 267 267 268 268 /* set the ring address */
+187
drivers/gpu/drm/radeon/vce_v1_0.c
··· 1 + /* 2 + * Copyright 2013 Advanced Micro Devices, Inc. 3 + * All Rights Reserved. 4 + * 5 + * Permission is hereby granted, free of charge, to any person obtaining a 6 + * copy of this software and associated documentation files (the 7 + * "Software"), to deal in the Software without restriction, including 8 + * without limitation the rights to use, copy, modify, merge, publish, 9 + * distribute, sub license, and/or sell copies of the Software, and to 10 + * permit persons to whom the Software is furnished to do so, subject to 11 + * the following conditions: 12 + * 13 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 + * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 + * 21 + * The above copyright notice and this permission notice (including the 22 + * next paragraph) shall be included in all copies or substantial portions 23 + * of the Software. 24 + * 25 + * Authors: Christian König <christian.koenig@amd.com> 26 + */ 27 + 28 + #include <linux/firmware.h> 29 + #include <drm/drmP.h> 30 + #include "radeon.h" 31 + #include "radeon_asic.h" 32 + #include "sid.h" 33 + 34 + /** 35 + * vce_v1_0_get_rptr - get read pointer 36 + * 37 + * @rdev: radeon_device pointer 38 + * @ring: radeon_ring pointer 39 + * 40 + * Returns the current hardware read pointer 41 + */ 42 + uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, 43 + struct radeon_ring *ring) 44 + { 45 + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) 46 + return RREG32(VCE_RB_RPTR); 47 + else 48 + return RREG32(VCE_RB_RPTR2); 49 + } 50 + 51 + /** 52 + * vce_v1_0_get_wptr - get write pointer 53 + * 54 + * @rdev: radeon_device pointer 55 + * @ring: radeon_ring pointer 56 + * 57 + * Returns the current hardware write pointer 58 + */ 59 + uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, 60 + struct radeon_ring *ring) 61 + { 62 + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) 63 + return RREG32(VCE_RB_WPTR); 64 + else 65 + return RREG32(VCE_RB_WPTR2); 66 + } 67 + 68 + /** 69 + * vce_v1_0_set_wptr - set write pointer 70 + * 71 + * @rdev: radeon_device pointer 72 + * @ring: radeon_ring pointer 73 + * 74 + * Commits the write pointer to the hardware 75 + */ 76 + void vce_v1_0_set_wptr(struct radeon_device *rdev, 77 + struct radeon_ring *ring) 78 + { 79 + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) 80 + WREG32(VCE_RB_WPTR, ring->wptr); 81 + else 82 + WREG32(VCE_RB_WPTR2, ring->wptr); 83 + } 84 + 85 + /** 86 + * vce_v1_0_start - start VCE block 87 + * 88 + * @rdev: radeon_device pointer 89 + * 90 + * Setup and start the VCE block 91 + */ 92 + int vce_v1_0_start(struct radeon_device *rdev) 93 + { 94 + struct radeon_ring *ring; 95 + int i, j, r; 96 + 97 + /* set BUSY flag */ 98 + WREG32_P(VCE_STATUS, 1, ~1); 99 + 100 + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; 101 + WREG32(VCE_RB_RPTR, ring->wptr); 102 + WREG32(VCE_RB_WPTR, ring->wptr); 103 + WREG32(VCE_RB_BASE_LO, ring->gpu_addr); 104 + WREG32(VCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); 105 + WREG32(VCE_RB_SIZE, ring->ring_size / 4); 106 + 107 + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; 108 + WREG32(VCE_RB_RPTR2, ring->wptr); 109 + WREG32(VCE_RB_WPTR2, ring->wptr); 110 + WREG32(VCE_RB_BASE_LO2, ring->gpu_addr); 111 + WREG32(VCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); 112 + WREG32(VCE_RB_SIZE2, ring->ring_size / 4); 113 + 114 + WREG32_P(VCE_VCPU_CNTL, VCE_CLK_EN, ~VCE_CLK_EN); 115 + 116 + WREG32_P(VCE_SOFT_RESET, 117 + VCE_ECPU_SOFT_RESET | 118 + VCE_FME_SOFT_RESET, ~( 119 + VCE_ECPU_SOFT_RESET | 120 + VCE_FME_SOFT_RESET)); 121 + 122 + mdelay(100); 123 + 124 + WREG32_P(VCE_SOFT_RESET, 0, ~( 125 + VCE_ECPU_SOFT_RESET | 126 + VCE_FME_SOFT_RESET)); 127 + 128 + for (i = 0; i < 10; ++i) { 129 + uint32_t status; 130 + for (j = 0; j < 100; ++j) { 131 + status = RREG32(VCE_STATUS); 132 + if (status & 2) 133 + break; 134 + mdelay(10); 135 + } 136 + r = 0; 137 + if (status & 2) 138 + break; 139 + 140 + DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 141 + WREG32_P(VCE_SOFT_RESET, VCE_ECPU_SOFT_RESET, ~VCE_ECPU_SOFT_RESET); 142 + mdelay(10); 143 + WREG32_P(VCE_SOFT_RESET, 0, ~VCE_ECPU_SOFT_RESET); 144 + mdelay(10); 145 + r = -1; 146 + } 147 + 148 + /* clear BUSY flag */ 149 + WREG32_P(VCE_STATUS, 0, ~1); 150 + 151 + if (r) { 152 + DRM_ERROR("VCE not responding, giving up!!!\n"); 153 + return r; 154 + } 155 + 156 + return 0; 157 + } 158 + 159 + int vce_v1_0_init(struct radeon_device *rdev) 160 + { 161 + struct radeon_ring *ring; 162 + int r; 163 + 164 + r = vce_v1_0_start(rdev); 165 + if (r) 166 + return r; 167 + 168 + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; 169 + ring->ready = true; 170 + r = radeon_ring_test(rdev, TN_RING_TYPE_VCE1_INDEX, ring); 171 + if (r) { 172 + ring->ready = false; 173 + return r; 174 + } 175 + 176 + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; 177 + ring->ready = true; 178 + r = radeon_ring_test(rdev, TN_RING_TYPE_VCE2_INDEX, ring); 179 + if (r) { 180 + ring->ready = false; 181 + return r; 182 + } 183 + 184 + DRM_INFO("VCE initialized successfully.\n"); 185 + 186 + return 0; 187 + }
+181
drivers/gpu/drm/radeon/vce_v2_0.c
··· 1 + /* 2 + * Copyright 2013 Advanced Micro Devices, Inc. 3 + * All Rights Reserved. 4 + * 5 + * Permission is hereby granted, free of charge, to any person obtaining a 6 + * copy of this software and associated documentation files (the 7 + * "Software"), to deal in the Software without restriction, including 8 + * without limitation the rights to use, copy, modify, merge, publish, 9 + * distribute, sub license, and/or sell copies of the Software, and to 10 + * permit persons to whom the Software is furnished to do so, subject to 11 + * the following conditions: 12 + * 13 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 + * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 + * 21 + * The above copyright notice and this permission notice (including the 22 + * next paragraph) shall be included in all copies or substantial portions 23 + * of the Software. 24 + * 25 + * Authors: Christian König <christian.koenig@amd.com> 26 + */ 27 + 28 + #include <linux/firmware.h> 29 + #include <drm/drmP.h> 30 + #include "radeon.h" 31 + #include "radeon_asic.h" 32 + #include "cikd.h" 33 + 34 + static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated) 35 + { 36 + u32 tmp; 37 + 38 + if (gated) { 39 + tmp = RREG32(VCE_CLOCK_GATING_B); 40 + tmp |= 0xe70000; 41 + WREG32(VCE_CLOCK_GATING_B, tmp); 42 + 43 + tmp = RREG32(VCE_UENC_CLOCK_GATING); 44 + tmp |= 0xff000000; 45 + WREG32(VCE_UENC_CLOCK_GATING, tmp); 46 + 47 + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); 48 + tmp &= ~0x3fc; 49 + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); 50 + 51 + WREG32(VCE_CGTT_CLK_OVERRIDE, 0); 52 + } else { 53 + tmp = RREG32(VCE_CLOCK_GATING_B); 54 + tmp |= 0xe7; 55 + tmp &= ~0xe70000; 56 + WREG32(VCE_CLOCK_GATING_B, tmp); 57 + 58 + tmp = RREG32(VCE_UENC_CLOCK_GATING); 59 + tmp |= 0x1fe000; 60 + tmp &= ~0xff000000; 61 + WREG32(VCE_UENC_CLOCK_GATING, tmp); 62 + 63 + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); 64 + tmp |= 0x3fc; 65 + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); 66 + } 67 + } 68 + 69 + static void vce_v2_0_set_dyn_cg(struct radeon_device *rdev, bool gated) 70 + { 71 + u32 orig, tmp; 72 + 73 + tmp = RREG32(VCE_CLOCK_GATING_B); 74 + tmp &= ~0x00060006; 75 + if (gated) { 76 + tmp |= 0xe10000; 77 + } else { 78 + tmp |= 0xe1; 79 + tmp &= ~0xe10000; 80 + } 81 + WREG32(VCE_CLOCK_GATING_B, tmp); 82 + 83 + orig = tmp = RREG32(VCE_UENC_CLOCK_GATING); 84 + tmp &= ~0x1fe000; 85 + tmp &= ~0xff000000; 86 + if (tmp != orig) 87 + WREG32(VCE_UENC_CLOCK_GATING, tmp); 88 + 89 + orig = tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); 90 + tmp &= ~0x3fc; 91 + if (tmp != orig) 92 + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); 93 + 94 + if (gated) 95 + WREG32(VCE_CGTT_CLK_OVERRIDE, 0); 96 + } 97 + 98 + static void vce_v2_0_disable_cg(struct radeon_device *rdev) 99 + { 100 + WREG32(VCE_CGTT_CLK_OVERRIDE, 7); 101 + } 102 + 103 + void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable) 104 + { 105 + bool sw_cg = false; 106 + 107 + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_VCE_MGCG)) { 108 + if (sw_cg) 109 + vce_v2_0_set_sw_cg(rdev, true); 110 + else 111 + vce_v2_0_set_dyn_cg(rdev, true); 112 + } else { 113 + vce_v2_0_disable_cg(rdev); 114 + 115 + if (sw_cg) 116 + vce_v2_0_set_sw_cg(rdev, false); 117 + else 118 + vce_v2_0_set_dyn_cg(rdev, false); 119 + } 120 + } 121 + 122 + static void vce_v2_0_init_cg(struct radeon_device *rdev) 123 + { 124 + u32 tmp; 125 + 126 + tmp = RREG32(VCE_CLOCK_GATING_A); 127 + tmp &= ~(CGC_CLK_GATE_DLY_TIMER_MASK | CGC_CLK_GATER_OFF_DLY_TIMER_MASK); 128 + tmp |= (CGC_CLK_GATE_DLY_TIMER(0) | CGC_CLK_GATER_OFF_DLY_TIMER(4)); 129 + tmp |= CGC_UENC_WAIT_AWAKE; 130 + WREG32(VCE_CLOCK_GATING_A, tmp); 131 + 132 + tmp = RREG32(VCE_UENC_CLOCK_GATING); 133 + tmp &= ~(CLOCK_ON_DELAY_MASK | CLOCK_OFF_DELAY_MASK); 134 + tmp |= (CLOCK_ON_DELAY(0) | CLOCK_OFF_DELAY(4)); 135 + WREG32(VCE_UENC_CLOCK_GATING, tmp); 136 + 137 + tmp = RREG32(VCE_CLOCK_GATING_B); 138 + tmp |= 0x10; 139 + tmp &= ~0x100000; 140 + WREG32(VCE_CLOCK_GATING_B, tmp); 141 + } 142 + 143 + int vce_v2_0_resume(struct radeon_device *rdev) 144 + { 145 + uint64_t addr = rdev->vce.gpu_addr; 146 + uint32_t size; 147 + 148 + WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16)); 149 + WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); 150 + WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); 151 + WREG32(VCE_CLOCK_GATING_B, 0xf7); 152 + 153 + WREG32(VCE_LMI_CTRL, 0x00398000); 154 + WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1); 155 + WREG32(VCE_LMI_SWAP_CNTL, 0); 156 + WREG32(VCE_LMI_SWAP_CNTL1, 0); 157 + WREG32(VCE_LMI_VM_CTRL, 0); 158 + 159 + size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size); 160 + WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); 161 + WREG32(VCE_VCPU_CACHE_SIZE0, size); 162 + 163 + addr += size; 164 + size = RADEON_VCE_STACK_SIZE; 165 + WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); 166 + WREG32(VCE_VCPU_CACHE_SIZE1, size); 167 + 168 + addr += size; 169 + size = RADEON_VCE_HEAP_SIZE; 170 + WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); 171 + WREG32(VCE_VCPU_CACHE_SIZE2, size); 172 + 173 + WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100); 174 + 175 + WREG32_P(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 176 + ~VCE_SYS_INT_TRAP_INTERRUPT_EN); 177 + 178 + vce_v2_0_init_cg(rdev); 179 + 180 + return 0; 181 + }
+5
include/uapi/drm/radeon_drm.h
··· 919 919 #define RADEON_CS_RING_COMPUTE 1 920 920 #define RADEON_CS_RING_DMA 2 921 921 #define RADEON_CS_RING_UVD 3 922 + #define RADEON_CS_RING_VCE 4 922 923 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ 923 924 /* 0 = normal, + = higher priority, - = lower priority */ 924 925 ··· 988 987 #define RADEON_INFO_SI_BACKEND_ENABLED_MASK 0x19 989 988 /* max engine clock - needed for OpenCL */ 990 989 #define RADEON_INFO_MAX_SCLK 0x1a 990 + /* version of VCE firmware */ 991 + #define RADEON_INFO_VCE_FW_VERSION 0x1b 992 + /* version of VCE feedback */ 993 + #define RADEON_INFO_VCE_FB_VERSION 0x1c 991 994 992 995 993 996 struct drm_radeon_info {