Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/display: Add support for zstate during extended vblank

[why]
When we enter FREESYNC_STATE_VIDEO, we want to use the extra vblank
portion to enter zstate if possible.

[how]
When we enter freesync, a full update is triggered and the new vtotal
with extra lines is passed to dml in a stream update. The time gained
from extra vblank lines is calculated in microseconds. We allow zstate
entry if the time gained is greater than 5 ms, which is the current
policy. Furthermore, an optimized value for min_dst_y_next_start is
calculated and written to its register. When exiting freesync, another
full update is triggered and default values are restored.

Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Acked-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Gabe Teeger <gabe.teeger@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Gabe Teeger and committed by
Alex Deucher
e5fc7825 02fc996d

+80 -4
+19
drivers/gpu/drm/amd/display/dc/core/dc.c
··· 2393 2393 2394 2394 if (stream_update->mst_bw_update) 2395 2395 su_flags->bits.mst_bw = 1; 2396 + if (stream_update->crtc_timing_adjust && dc_extended_blank_supported(dc)) 2397 + su_flags->bits.crtc_timing_adjust = 1; 2396 2398 2397 2399 if (su_flags->raw != 0) 2398 2400 overall_type = UPDATE_TYPE_FULL; ··· 2655 2653 2656 2654 if (update->vrr_infopacket) 2657 2655 stream->vrr_infopacket = *update->vrr_infopacket; 2656 + 2657 + if (update->crtc_timing_adjust) 2658 + stream->adjust = *update->crtc_timing_adjust; 2658 2659 2659 2660 if (update->dpms_off) 2660 2661 stream->dpms_off = *update->dpms_off; ··· 4059 4054 4060 4055 if (pipe->stream_res.abm && pipe->stream_res.abm->funcs->set_abm_pause) 4061 4056 pipe->stream_res.abm->funcs->set_abm_pause(pipe->stream_res.abm, !enable, i, pipe->stream_res.tg->inst); 4057 + } 4058 + /* 4059 + * dc_extended_blank_supported: Decide whether extended blank is supported 4060 + * 4061 + * Extended blank is a freesync optimization feature to be enabled in the future. 4062 + * During the extra vblank period gained from freesync, we have the ability to enter z9/z10. 4063 + * 4064 + * @param [in] dc: Current DC state 4065 + * @return: Indicate whether extended blank is supported (true or false) 4066 + */ 4067 + bool dc_extended_blank_supported(struct dc *dc) 4068 + { 4069 + return dc->debug.extended_blank_optimization && !dc->debug.disable_z10 4070 + && dc->caps.zstate_support && dc->caps.is_apu; 4062 4071 }
+5 -1
drivers/gpu/drm/amd/display/dc/dc.h
··· 188 188 bool psp_setup_panel_mode; 189 189 bool extended_aux_timeout_support; 190 190 bool dmcub_support; 191 + bool zstate_support; 191 192 uint32_t num_of_internal_disp; 192 193 enum dp_protocol_version max_dp_protocol_version; 193 194 unsigned int mall_size_per_mem_channel; ··· 704 703 bool enable_driver_sequence_debug; 705 704 enum det_size crb_alloc_policy; 706 705 int crb_alloc_policy_min_disp_count; 707 - #if defined(CONFIG_DRM_AMD_DC_DCN) 708 706 bool disable_z10; 707 + #if defined(CONFIG_DRM_AMD_DC_DCN) 709 708 bool enable_z9_disable_interface; 710 709 bool enable_sw_cntl_psr; 711 710 union dpia_debug_options dpia_debug; 712 711 #endif 713 712 bool apply_vendor_specific_lttpr_wa; 713 + bool extended_blank_optimization; 714 714 bool ignore_dpref_ss; 715 715 uint8_t psr_power_use_phy_fsm; 716 716 }; ··· 1370 1368 uint32_t dongle_max_pix_clk; 1371 1369 bool converter_disable_audio; 1372 1370 }; 1371 + 1372 + bool dc_extended_blank_supported(struct dc *dc); 1373 1373 1374 1374 struct dc_sink *dc_sink_create(const struct dc_sink_init_data *init_params); 1375 1375
+2
drivers/gpu/drm/amd/display/dc/dc_stream.h
··· 131 131 uint32_t wb_update:1; 132 132 uint32_t dsc_changed : 1; 133 133 uint32_t mst_bw : 1; 134 + uint32_t crtc_timing_adjust : 1; 134 135 } bits; 135 136 136 137 uint32_t raw; ··· 290 289 struct dc_3dlut *lut3d_func; 291 290 292 291 struct test_pattern *pending_test_pattern; 292 + struct dc_crtc_timing_adjust *crtc_timing_adjust; 293 293 }; 294 294 295 295 bool dc_is_stream_unchanged(
+12
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
··· 1857 1857 struct dc_state *context) 1858 1858 { 1859 1859 struct hubbub *hubbub = dc->res_pool->hubbub; 1860 + int i; 1860 1861 1861 1862 /* program dchubbub watermarks */ 1862 1863 hubbub->funcs->program_watermarks(hubbub, ··· 1874 1873 dc->clk_mgr, 1875 1874 context, 1876 1875 true); 1876 + if (dc_extended_blank_supported(dc) && context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) { 1877 + for (i = 0; i < dc->res_pool->pipe_count; ++i) { 1878 + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; 1879 + 1880 + if (pipe_ctx->stream && pipe_ctx->plane_res.hubp->funcs->program_extended_blank 1881 + && pipe_ctx->stream->adjust.v_total_min == pipe_ctx->stream->adjust.v_total_max 1882 + && pipe_ctx->stream->adjust.v_total_max > pipe_ctx->stream->timing.v_total) 1883 + pipe_ctx->plane_res.hubp->funcs->program_extended_blank(pipe_ctx->plane_res.hubp, 1884 + pipe_ctx->dlg_regs.optimized_min_dst_y_next_start); 1885 + } 1886 + } 1877 1887 /* increase compbuf size */ 1878 1888 if (hubbub->funcs->program_compbuf_size) 1879 1889 hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true);
+8
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c
··· 54 54 REG_UPDATE(DCHUBP_CNTL, HUBP_SOFT_RESET, reset); 55 55 } 56 56 57 + void hubp31_program_extended_blank(struct hubp *hubp, unsigned int min_dst_y_next_start_optimized) 58 + { 59 + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); 60 + 61 + REG_SET(BLANK_OFFSET_1, 0, MIN_DST_Y_NEXT_START, min_dst_y_next_start_optimized); 62 + } 63 + 57 64 static struct hubp_funcs dcn31_hubp_funcs = { 58 65 .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, 59 66 .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, ··· 87 80 .set_unbounded_requesting = hubp31_set_unbounded_requesting, 88 81 .hubp_soft_reset = hubp31_soft_reset, 89 82 .hubp_in_blank = hubp1_in_blank, 83 + .program_extended_blank = hubp31_program_extended_blank, 90 84 }; 91 85 92 86 bool hubp31_construct(
+1
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
··· 2232 2232 dc->caps.extended_aux_timeout_support = true; 2233 2233 dc->caps.dmcub_support = true; 2234 2234 dc->caps.is_apu = true; 2235 + dc->caps.zstate_support = true; 2235 2236 2236 2237 /* Color pipeline capabilities */ 2237 2238 dc->caps.color.dpp.dcn_arch = 1;
+15 -3
drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
··· 722 722 { 723 723 int plane_count; 724 724 int i; 725 + unsigned int optimized_min_dst_y_next_start_us; 725 726 726 727 plane_count = 0; 728 + optimized_min_dst_y_next_start_us = 0; 727 729 for (i = 0; i < dc->res_pool->pipe_count; i++) { 728 730 if (context->res_ctx.pipe_ctx[i].plane_state) 729 731 plane_count++; ··· 746 744 struct dc_link *link = context->streams[0]->sink->link; 747 745 struct dc_stream_status *stream_status = &context->stream_status[0]; 748 746 747 + if (dc_extended_blank_supported(dc)) { 748 + for (i = 0; i < dc->res_pool->pipe_count; i++) { 749 + if (context->res_ctx.pipe_ctx[i].stream == context->streams[0] 750 + && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min == context->res_ctx.pipe_ctx[i].stream->adjust.v_total_max 751 + && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min > context->res_ctx.pipe_ctx[i].stream->timing.v_total) { 752 + optimized_min_dst_y_next_start_us = 753 + context->res_ctx.pipe_ctx[i].dlg_regs.optimized_min_dst_y_next_start_us; 754 + break; 755 + } 756 + } 757 + } 749 758 /* zstate only supported on PWRSEQ0 and when there's <2 planes*/ 750 759 if (link->link_index != 0 || stream_status->plane_count > 1) 751 760 return DCN_ZSTATE_SUPPORT_DISALLOW; 752 761 753 - if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) 762 + if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || optimized_min_dst_y_next_start_us > 5000) 754 763 return DCN_ZSTATE_SUPPORT_ALLOW; 755 764 else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !dc->debug.disable_psr) 756 765 return DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; ··· 798 785 context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] 799 786 != dm_dram_clock_change_unsupported; 800 787 context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; 801 - 802 - context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); 803 788 804 789 context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); 805 790 ··· 854 843 &pipes[pipe_idx].pipe); 855 844 pipe_idx++; 856 845 } 846 + context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); 857 847 } 858 848 859 849 static void swizzle_to_dml_params(
+13
drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
··· 1055 1055 1056 1056 float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1057 1057 float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA 1058 + int blank_lines; 1058 1059 1059 1060 memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); 1060 1061 memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); ··· 1081 1080 dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; 1082 1081 1083 1082 disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2)); 1083 + blank_lines = (dst->vblank_end + dst->vtotal_min - dst->vblank_start - dst->vstartup_start - 1); 1084 + if (blank_lines < 0) 1085 + blank_lines = 0; 1086 + if (blank_lines != 0) { 1087 + disp_dlg_regs->optimized_min_dst_y_next_start_us = 1088 + ((unsigned int) blank_lines * dst->hactive) / (unsigned int) dst->pixel_rate_mhz; 1089 + disp_dlg_regs->optimized_min_dst_y_next_start = 1090 + (unsigned int)(((double) (dlg_vblank_start + blank_lines)) * dml_pow(2, 2)); 1091 + } else { 1092 + // use unoptimized value 1093 + disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; 1094 + } 1084 1095 ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); 1085 1096 1086 1097 dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
+2
drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
··· 446 446 unsigned int refcyc_h_blank_end; 447 447 unsigned int dlg_vblank_end; 448 448 unsigned int min_dst_y_next_start; 449 + unsigned int optimized_min_dst_y_next_start; 450 + unsigned int optimized_min_dst_y_next_start_us; 449 451 unsigned int refcyc_per_htotal; 450 452 unsigned int refcyc_x_after_scaler; 451 453 unsigned int dst_y_after_scaler;
+3
drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
··· 195 195 196 196 void (*hubp_set_flip_int)(struct hubp *hubp); 197 197 198 + void (*program_extended_blank)(struct hubp *hubp, 199 + unsigned int min_dst_y_next_start_optimized); 200 + 198 201 void (*hubp_wait_pipe_read_start)(struct hubp *hubp); 199 202 }; 200 203