Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/display: fixup DPP programming sequence

[why]
DC does not correct account for the fact that DPP DTO is double buffered while DPP ref is not.
This means that when DPP ref clock is lowered when it's "safe to lower", the DPP blocks that need
an increased divider will temporarily have actual DPP clock drop below minimum while DTO
double buffering takes effect. This results in temporary underflow.

[how]
To fix this, DPP clock cannot be programmed atomically, but rather be broken up into the DTO and the
ref. Each has a separate "safe to lower" logic. When doing "prepare" the ref and dividers may only increase.
When doing "optimize", both may decrease. It is guaranteed that we won't exceed max DPP clock because
we do not use dividers larger than 1.

Signed-off-by: Jun Lei <Jun.Lei@amd.com>
Reviewed-by: Eric Yang <eric.yang2@amd.com>
Acked-by: Leo Li <sunpeng.li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jun Lei and committed by
Alex Deucher
f7f38ffe 9adc8050

+141 -47
+97 -36
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
··· 104 104 { 105 105 int i; 106 106 107 - clk_mgr->dccg->ref_dppclk = clk_mgr->base.clks.dppclk_khz; 108 107 for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { 109 108 int dpp_inst, dppclk_khz; 110 109 ··· 113 114 dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst; 114 115 dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; 115 116 clk_mgr->dccg->funcs->update_dpp_dto( 116 - clk_mgr->dccg, dpp_inst, dppclk_khz); 117 + clk_mgr->dccg, dpp_inst, dppclk_khz, false); 117 118 } 118 119 } 119 120 120 - void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr) 121 + static void update_global_dpp_clk(struct clk_mgr_internal *clk_mgr, unsigned int khz) 121 122 { 122 123 int dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR 123 - * clk_mgr->dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz; 124 - int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR 125 - * clk_mgr->dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz; 124 + * clk_mgr->dentist_vco_freq_khz / khz; 126 125 127 126 uint32_t dppclk_wdivider = dentist_get_did_from_divider(dpp_divider); 128 - uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider); 129 127 130 - REG_UPDATE(DENTIST_DISPCLK_CNTL, 131 - DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider); 132 - // REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100); 133 128 REG_UPDATE(DENTIST_DISPCLK_CNTL, 134 129 DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider); 135 130 REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100); 136 131 } 137 132 133 + static void update_display_clk(struct clk_mgr_internal *clk_mgr, unsigned int khz) 134 + { 135 + int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR 136 + * clk_mgr->dentist_vco_freq_khz / khz; 137 + 138 + uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider); 139 + 140 + REG_UPDATE(DENTIST_DISPCLK_CNTL, 141 + DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider); 142 + } 143 + 144 + static void request_voltage_and_program_disp_clk(struct clk_mgr *clk_mgr_base, unsigned int khz) 145 + { 146 + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); 147 + struct dc *dc = clk_mgr_base->ctx->dc; 148 + struct pp_smu_funcs_nv *pp_smu = NULL; 149 + bool going_up = clk_mgr->base.clks.dispclk_khz < khz; 150 + 151 + if (dc->res_pool->pp_smu) 152 + pp_smu = &dc->res_pool->pp_smu->nv_funcs; 153 + 154 + clk_mgr->base.clks.dispclk_khz = khz; 155 + 156 + if (going_up && pp_smu && pp_smu->set_voltage_by_freq) 157 + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000); 158 + 159 + update_display_clk(clk_mgr, khz); 160 + 161 + if (!going_up && pp_smu && pp_smu->set_voltage_by_freq) 162 + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000); 163 + } 164 + 165 + static void request_voltage_and_program_global_dpp_clk(struct clk_mgr *clk_mgr_base, unsigned int khz) 166 + { 167 + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); 168 + struct dc *dc = clk_mgr_base->ctx->dc; 169 + struct pp_smu_funcs_nv *pp_smu = NULL; 170 + bool going_up = clk_mgr->base.clks.dppclk_khz < khz; 171 + 172 + if (dc->res_pool->pp_smu) 173 + pp_smu = &dc->res_pool->pp_smu->nv_funcs; 174 + 175 + clk_mgr->base.clks.dppclk_khz = khz; 176 + clk_mgr->dccg->ref_dppclk = khz; 177 + 178 + if (going_up && pp_smu && pp_smu->set_voltage_by_freq) 179 + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000); 180 + 181 + update_global_dpp_clk(clk_mgr, khz); 182 + 183 + if (!going_up && pp_smu && pp_smu->set_voltage_by_freq) 184 + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000); 185 + } 138 186 139 187 void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, 140 188 struct dc_state *context, ··· 192 146 struct dc *dc = clk_mgr_base->ctx->dc; 193 147 struct pp_smu_funcs_nv *pp_smu = NULL; 194 148 int display_count; 195 - bool update_dppclk = false; 196 149 bool update_dispclk = false; 197 150 bool enter_display_off = false; 198 - bool dpp_clock_lowered = false; 199 151 struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; 200 152 bool force_reset = false; 153 + int i; 201 154 202 155 if (clk_mgr_base->clks.dispclk_khz == 0 || 203 156 dc->debug.force_clock_mode & 0x1) { ··· 221 176 if (pp_smu && pp_smu->set_voltage_by_freq) 222 177 pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000); 223 178 } 179 + 224 180 225 181 if (dc->debug.force_min_dcfclk_mhz > 0) 226 182 new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ? ··· 248 202 249 203 if (should_update_pstate_support(safe_to_lower, new_clocks->p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { 250 204 clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; 205 + 251 206 clk_mgr_base->clks.p_state_change_support = new_clocks->p_state_change_support; 252 207 if (pp_smu && pp_smu->set_pstate_handshake_support) 253 208 pp_smu->set_pstate_handshake_support(&pp_smu->pp_smu, clk_mgr_base->clks.p_state_change_support); 254 209 } 210 + clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; 255 211 256 212 if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) { 257 213 clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz; ··· 261 213 pp_smu->set_hard_min_uclk_by_freq(&pp_smu->pp_smu, clk_mgr_base->clks.dramclk_khz / 1000); 262 214 } 263 215 264 - if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { 265 - if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) 266 - dpp_clock_lowered = true; 267 - clk_mgr->base.clks.dppclk_khz = new_clocks->dppclk_khz; 216 + if (dc->config.forced_clocks == false) { 217 + // First update display clock 218 + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) 219 + request_voltage_and_program_disp_clk(clk_mgr_base, new_clocks->dispclk_khz); 268 220 269 - if (pp_smu && pp_smu->set_voltage_by_freq) 270 - pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000); 221 + // Updating DPP clock requires some more logic 222 + if (!safe_to_lower) { 223 + // For pre-programming, we need to make sure any DPP clock that will go up has to go up 271 224 272 - update_dppclk = true; 273 - } 225 + // First raise the global reference if needed 226 + if (new_clocks->dppclk_khz > clk_mgr_base->clks.dppclk_khz) 227 + request_voltage_and_program_global_dpp_clk(clk_mgr_base, new_clocks->dppclk_khz); 274 228 275 - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { 276 - clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; 277 - if (pp_smu && pp_smu->set_voltage_by_freq) 278 - pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000); 229 + // Then raise any dividers that need raising 230 + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { 231 + int dpp_inst, dppclk_khz; 279 232 280 - update_dispclk = true; 281 - } 282 - if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) { 283 - if (dpp_clock_lowered) { 284 - // if clock is being lowered, increase DTO before lowering refclk 285 - dcn20_update_clocks_update_dpp_dto(clk_mgr, context); 286 - dcn20_update_clocks_update_dentist(clk_mgr); 233 + if (!context->res_ctx.pipe_ctx[i].plane_state) 234 + continue; 235 + 236 + dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst; 237 + dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; 238 + 239 + clk_mgr->dccg->funcs->update_dpp_dto(clk_mgr->dccg, dpp_inst, dppclk_khz, true); 240 + } 287 241 } else { 288 - // if clock is being raised, increase refclk before lowering DTO 289 - if (update_dppclk || update_dispclk) 290 - dcn20_update_clocks_update_dentist(clk_mgr); 291 - if (update_dppclk) 292 - dcn20_update_clocks_update_dpp_dto(clk_mgr, context); 242 + // For post-programming, we can lower ref clk if needed, and unconditionally set all the DTOs 243 + 244 + if (new_clocks->dppclk_khz < clk_mgr_base->clks.dppclk_khz) 245 + request_voltage_and_program_global_dpp_clk(clk_mgr_base, new_clocks->dppclk_khz); 246 + 247 + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { 248 + int dpp_inst, dppclk_khz; 249 + 250 + if (!context->res_ctx.pipe_ctx[i].plane_state) 251 + continue; 252 + 253 + dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst; 254 + dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; 255 + 256 + clk_mgr->dccg->funcs->update_dpp_dto(clk_mgr->dccg, dpp_inst, dppclk_khz, false); 257 + } 293 258 } 294 259 } 295 260 if (update_dispclk &&
+3
drivers/gpu/drm/amd/display/dc/core/dc.c
··· 1603 1603 for (i = 0; i < surface_count; i++) 1604 1604 updates[i].surface->update_flags.raw = 0xFFFFFFFF; 1605 1605 1606 + if (type == UPDATE_TYPE_FAST && memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) 1607 + dc->optimized_required = true; 1608 + 1606 1609 return type; 1607 1610 } 1608 1611
+2 -1
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
··· 2155 2155 dc->res_pool->dccg->funcs->update_dpp_dto( 2156 2156 dc->res_pool->dccg, 2157 2157 dpp->inst, 2158 - pipe_ctx->plane_res.bw.dppclk_khz); 2158 + pipe_ctx->plane_res.bw.dppclk_khz, 2159 + false); 2159 2160 else 2160 2161 dc->clk_mgr->clks.dppclk_khz = should_divided_by_2 ? 2161 2162 dc->clk_mgr->clks.dispclk_khz / 2 :
+27 -4
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c
··· 44 44 #define DC_LOGGER \ 45 45 dccg->ctx->logger 46 46 47 - void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk) 47 + void dccg2_update_dpp_dto(struct dccg *dccg, 48 + int dpp_inst, 49 + int req_dppclk, 50 + bool reduce_divider_only) 48 51 { 49 52 struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); 50 53 51 54 if (dccg->ref_dppclk && req_dppclk) { 52 55 int ref_dppclk = dccg->ref_dppclk; 56 + int current_phase, current_modulo; 53 57 54 58 ASSERT(req_dppclk <= ref_dppclk); 55 59 /* need to clamp to 8 bits */ ··· 65 61 if (req_dppclk > ref_dppclk) 66 62 req_dppclk = ref_dppclk; 67 63 } 68 - REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, 69 - DPPCLK0_DTO_PHASE, req_dppclk, 70 - DPPCLK0_DTO_MODULO, ref_dppclk); 64 + 65 + REG_GET_2(DPPCLK_DTO_PARAM[dpp_inst], 66 + DPPCLK0_DTO_PHASE, &current_phase, 67 + DPPCLK0_DTO_MODULO, &current_modulo); 68 + 69 + if (reduce_divider_only) { 70 + // requested phase/modulo greater than current 71 + if (req_dppclk * current_modulo >= current_phase * ref_dppclk) { 72 + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, 73 + DPPCLK0_DTO_PHASE, req_dppclk, 74 + DPPCLK0_DTO_MODULO, ref_dppclk); 75 + } else { 76 + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, 77 + DPPCLK0_DTO_PHASE, current_phase, 78 + DPPCLK0_DTO_MODULO, current_modulo); 79 + } 80 + } else { 81 + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, 82 + DPPCLK0_DTO_PHASE, req_dppclk, 83 + DPPCLK0_DTO_MODULO, ref_dppclk); 84 + } 85 + 71 86 REG_UPDATE(DPPCLK_DTO_CTRL, 72 87 DPPCLK_DTO_ENABLE[dpp_inst], 1); 73 88 } else {
+1 -1
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
··· 97 97 const struct dccg_mask *dccg_mask; 98 98 }; 99 99 100 - void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk); 100 + void dccg2_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk, bool raise_divider_only); 101 101 102 102 void dccg2_get_dccg_ref_freq(struct dccg *dccg, 103 103 unsigned int xtalin_freq_inKhz,
+1 -1
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
··· 2482 2482 context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; 2483 2483 context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; 2484 2484 context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; 2485 - context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; 2485 + context->bw_ctx.bw.dcn.clk.fclk_khz = 0; 2486 2486 context->bw_ctx.bw.dcn.clk.p_state_change_support = 2487 2487 context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] 2488 2488 != dm_dram_clock_change_unsupported;
-1
drivers/gpu/drm/amd/display/dc/inc/core_types.h
··· 228 228 229 229 struct dcn_fe_bandwidth { 230 230 int dppclk_khz; 231 - 232 231 }; 233 232 234 233 struct stream_resource {
+8 -2
drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
··· 281 281 282 282 static inline bool should_update_pstate_support(bool safe_to_lower, bool calc_support, bool cur_support) 283 283 { 284 - // Whenever we are transitioning pstate support, we always want to notify prior to committing state 285 - return (calc_support != cur_support) ? !safe_to_lower : false; 284 + if (cur_support != calc_support) { 285 + if (calc_support == true && safe_to_lower) 286 + return true; 287 + else if (calc_support == false && !safe_to_lower) 288 + return true; 289 + } 290 + 291 + return false; 286 292 } 287 293 288 294 int clk_mgr_helper_get_active_display_cnt(
+2 -1
drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
··· 38 38 struct dccg_funcs { 39 39 void (*update_dpp_dto)(struct dccg *dccg, 40 40 int dpp_inst, 41 - int req_dppclk); 41 + int req_dppclk, 42 + bool reduce_divider_only); 42 43 void (*get_dccg_ref_freq)(struct dccg *dccg, 43 44 unsigned int xtalin_freq_inKhz, 44 45 unsigned int *dccg_ref_freq_inKhz);