Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/display: Add 3DLUT FL HW bug workaround

[Why]
There is a known HW bug that causes the internal 3DLUT fetch signal to
be lost at VREADY, regardless of whether the OTG lock is being held or
not. A workaround is necessary to make sure that this internal signal
stays up after OTG unlock.

[How]
Set the 3DLUT_ENABLE bit immediately before and after the unlock. Also
use VUPDATE_KEEPOUT to prevent lock transition in the region between
VSTARTUP and VREADY, which could cause issues with this WA sequence.

Also including misc. 3DLUT DMA-related sequence fixes to address a few
regressions causing corruption.

Reviewed-by: Dillon Varone <dillon.varone@amd.com>
Signed-off-by: Ilya Bakoulin <Ilya.Bakoulin@amd.com>
Signed-off-by: Roman Li <roman.li@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Ilya Bakoulin and committed by
Alex Deucher
df60dcf5 f79f4dd6

+122 -13
+22 -8
drivers/gpu/drm/amd/display/dc/core/dc.c
··· 2650 2650 elevate_update_type(&overall_type, type); 2651 2651 } 2652 2652 2653 - if (update_flags->bits.lut_3d) { 2653 + if (update_flags->bits.lut_3d && 2654 + u->surface->mcm_luts.lut3d_data.lut3d_src != DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { 2654 2655 type = UPDATE_TYPE_FULL; 2655 2656 elevate_update_type(&overall_type, type); 2656 2657 } ··· 2927 2926 sizeof(struct dc_transfer_func_distributed_points)); 2928 2927 } 2929 2928 2930 - if (srf_update->func_shaper) 2929 + if (srf_update->cm2_params) { 2930 + surface->mcm_shaper_3dlut_setting = srf_update->cm2_params->component_settings.shaper_3dlut_setting; 2931 + surface->mcm_lut1d_enable = srf_update->cm2_params->component_settings.lut1d_enable; 2932 + surface->mcm_luts = srf_update->cm2_params->cm2_luts; 2933 + } 2934 + 2935 + if (srf_update->func_shaper) { 2931 2936 memcpy(&surface->in_shaper_func, srf_update->func_shaper, 2932 2937 sizeof(surface->in_shaper_func)); 2938 + 2939 + if (surface->mcm_shaper_3dlut_setting >= DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER) 2940 + surface->mcm_luts.shaper = &surface->in_shaper_func; 2941 + } 2933 2942 2934 2943 if (srf_update->lut3d_func) 2935 2944 memcpy(&surface->lut3d_func, srf_update->lut3d_func, ··· 2953 2942 surface->sdr_white_level_nits = 2954 2943 srf_update->sdr_white_level_nits; 2955 2944 2956 - if (srf_update->blend_tf) 2945 + if (srf_update->blend_tf) { 2957 2946 memcpy(&surface->blend_tf, srf_update->blend_tf, 2958 2947 sizeof(surface->blend_tf)); 2948 + 2949 + if (surface->mcm_lut1d_enable) 2950 + surface->mcm_luts.lut1d_func = &surface->blend_tf; 2951 + } 2952 + 2953 + if (srf_update->cm2_params || srf_update->blend_tf) 2954 + surface->lut_bank_a = !surface->lut_bank_a; 2959 2955 2960 2956 if (srf_update->input_csc_color_matrix) 2961 2957 surface->input_csc_color_matrix = ··· 2975 2957 if (srf_update->gamut_remap_matrix) 2976 2958 surface->gamut_remap_matrix = 2977 2959 *srf_update->gamut_remap_matrix; 2978 - if (srf_update->cm2_params) { 2979 - surface->mcm_shaper_3dlut_setting = srf_update->cm2_params->component_settings.shaper_3dlut_setting; 2980 - surface->mcm_lut1d_enable = srf_update->cm2_params->component_settings.lut1d_enable; 2981 - surface->mcm_luts = srf_update->cm2_params->cm2_luts; 2982 - } 2960 + 2983 2961 if (srf_update->cursor_csc_color_matrix) 2984 2962 surface->cursor_csc_color_matrix = 2985 2963 *srf_update->cursor_csc_color_matrix;
+6 -2
drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
··· 1458 1458 } else { 1459 1459 if (lock) 1460 1460 pipe->stream_res.tg->funcs->lock(pipe->stream_res.tg); 1461 - else 1462 - pipe->stream_res.tg->funcs->unlock(pipe->stream_res.tg); 1461 + else { 1462 + if (dc->hwseq->funcs.perform_3dlut_wa_unlock) 1463 + dc->hwseq->funcs.perform_3dlut_wa_unlock(pipe); 1464 + else 1465 + pipe->stream_res.tg->funcs->unlock(pipe->stream_res.tg); 1466 + } 1463 1467 } 1464 1468 } 1465 1469
+57 -3
drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
··· 506 506 dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); 507 507 508 508 /* 1D LUT */ 509 - if (mcm_luts.lut1d_func && lut3d_xable != MCM_LUT_DISABLE) { 509 + if (mcm_luts.lut1d_func) { 510 510 memset(&m_lut_params, 0, sizeof(m_lut_params)); 511 511 if (mcm_luts.lut1d_func->type == TF_TYPE_HWPWL) 512 512 m_lut_params.pwl = &mcm_luts.lut1d_func->pwl; ··· 521 521 mpc->funcs->populate_lut(mpc, MCM_LUT_1DLUT, m_lut_params, lut_bank_a, mpcc_id); 522 522 } 523 523 if (mpc->funcs->program_lut_mode) 524 - mpc->funcs->program_lut_mode(mpc, MCM_LUT_1DLUT, lut1d_xable, lut_bank_a, mpcc_id); 524 + mpc->funcs->program_lut_mode(mpc, MCM_LUT_1DLUT, lut1d_xable && m_lut_params.pwl, lut_bank_a, mpcc_id); 525 525 } 526 526 527 527 /* Shaper */ ··· 669 669 { 670 670 struct dpp *dpp_base = pipe_ctx->plane_res.dpp; 671 671 int mpcc_id = pipe_ctx->plane_res.hubp->inst; 672 - struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; 672 + struct dc *dc = pipe_ctx->stream_res.opp->ctx->dc; 673 + struct mpc *mpc = dc->res_pool->mpc; 673 674 bool result; 674 675 const struct pwl_params *lut_params = NULL; 675 676 bool rval; 677 + 678 + if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { 679 + dcn401_populate_mcm_luts(dc, pipe_ctx, plane_state->mcm_luts, plane_state->lut_bank_a); 680 + return true; 681 + } 676 682 677 683 mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); 678 684 pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE; ··· 1817 1811 1818 1812 dc->hwss.pipe_control_lock(dc, pipe, false); 1819 1813 } 1814 + } 1815 + } 1816 + 1817 + void dcn401_perform_3dlut_wa_unlock(struct pipe_ctx *pipe_ctx) 1818 + { 1819 + /* If 3DLUT FL is enabled and 3DLUT is in use, follow the workaround sequence for pipe unlock to make sure that 1820 + * HUBP will properly fetch 3DLUT contents after unlock. 1821 + * 1822 + * This is meant to work around a known HW issue where VREADY will cancel the pending 3DLUT_ENABLE signal regardless 1823 + * of whether OTG lock is currently being held or not. 1824 + */ 1825 + struct pipe_ctx *wa_pipes[MAX_PIPES] = { NULL }; 1826 + struct pipe_ctx *odm_pipe, *mpc_pipe; 1827 + int i, wa_pipe_ct = 0; 1828 + 1829 + for (odm_pipe = pipe_ctx; odm_pipe != NULL; odm_pipe = odm_pipe->next_odm_pipe) { 1830 + for (mpc_pipe = odm_pipe; mpc_pipe != NULL; mpc_pipe = mpc_pipe->bottom_pipe) { 1831 + if (mpc_pipe->plane_state && mpc_pipe->plane_state->mcm_luts.lut3d_data.lut3d_src 1832 + == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM 1833 + && mpc_pipe->plane_state->mcm_shaper_3dlut_setting 1834 + == DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER_3DLUT) { 1835 + wa_pipes[wa_pipe_ct++] = mpc_pipe; 1836 + } 1837 + } 1838 + } 1839 + 1840 + if (wa_pipe_ct > 0) { 1841 + if (pipe_ctx->stream_res.tg->funcs->set_vupdate_keepout) 1842 + pipe_ctx->stream_res.tg->funcs->set_vupdate_keepout(pipe_ctx->stream_res.tg, true); 1843 + 1844 + for (i = 0; i < wa_pipe_ct; ++i) { 1845 + if (wa_pipes[i]->plane_res.hubp->funcs->hubp_enable_3dlut_fl) 1846 + wa_pipes[i]->plane_res.hubp->funcs->hubp_enable_3dlut_fl(wa_pipes[i]->plane_res.hubp, true); 1847 + } 1848 + 1849 + pipe_ctx->stream_res.tg->funcs->unlock(pipe_ctx->stream_res.tg); 1850 + if (pipe_ctx->stream_res.tg->funcs->wait_update_lock_status) 1851 + pipe_ctx->stream_res.tg->funcs->wait_update_lock_status(pipe_ctx->stream_res.tg, false); 1852 + 1853 + for (i = 0; i < wa_pipe_ct; ++i) { 1854 + if (wa_pipes[i]->plane_res.hubp->funcs->hubp_enable_3dlut_fl) 1855 + wa_pipes[i]->plane_res.hubp->funcs->hubp_enable_3dlut_fl(wa_pipes[i]->plane_res.hubp, true); 1856 + } 1857 + 1858 + if (pipe_ctx->stream_res.tg->funcs->set_vupdate_keepout) 1859 + pipe_ctx->stream_res.tg->funcs->set_vupdate_keepout(pipe_ctx->stream_res.tg, false); 1860 + } else { 1861 + pipe_ctx->stream_res.tg->funcs->unlock(pipe_ctx->stream_res.tg); 1820 1862 } 1821 1863 } 1822 1864
+2
drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
··· 95 95 void dcn401_reset_hw_ctx_wrap( 96 96 struct dc *dc, 97 97 struct dc_state *context); 98 + void dcn401_perform_3dlut_wa_unlock(struct pipe_ctx *pipe_ctx); 99 + 98 100 #endif /* __DC_HWSS_DCN401_H__ */
+1
drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c
··· 138 138 .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, 139 139 .reset_back_end_for_pipe = dcn401_reset_back_end_for_pipe, 140 140 .populate_mcm_luts = NULL, 141 + .perform_3dlut_wa_unlock = dcn401_perform_3dlut_wa_unlock, 141 142 }; 142 143 143 144 void dcn401_hw_sequencer_init_functions(struct dc *dc)
+1
drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
··· 182 182 struct pipe_ctx *pipe_ctx, 183 183 struct dc_cm2_func_luts mcm_luts, 184 184 bool lut_bank_a); 185 + void (*perform_3dlut_wa_unlock)(struct pipe_ctx *pipe_ctx); 185 186 }; 186 187 187 188 struct dce_hwseq {
+2
drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
··· 345 345 bool (*get_optc_double_buffer_pending)(struct timing_generator *tg); 346 346 bool (*get_otg_double_buffer_pending)(struct timing_generator *tg); 347 347 bool (*get_pipe_update_pending)(struct timing_generator *tg); 348 + void (*set_vupdate_keepout)(struct timing_generator *tg, bool enable); 349 + bool (*wait_update_lock_status)(struct timing_generator *tg, bool locked); 348 350 }; 349 351 350 352 #endif
+31
drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c
··· 430 430 REG_UPDATE(OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, pstate_keepout); 431 431 } 432 432 433 + static void optc401_set_vupdate_keepout(struct timing_generator *tg, bool enable) 434 + { 435 + struct optc *optc1 = DCN10TG_FROM_TG(tg); 436 + 437 + REG_SET_3(OTG_VUPDATE_KEEPOUT, 0, 438 + MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, 0, 439 + MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, optc1->vready_offset + 10, 440 + OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, enable); 441 + 442 + return; 443 + } 444 + 445 + static bool optc401_wait_update_lock_status(struct timing_generator *tg, bool locked) 446 + { 447 + struct optc *optc1 = DCN10TG_FROM_TG(tg); 448 + uint32_t lock_status = 0; 449 + 450 + REG_WAIT(OTG_MASTER_UPDATE_LOCK, 451 + UPDATE_LOCK_STATUS, locked, 452 + 1, 150000); 453 + 454 + REG_GET(OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, &lock_status); 455 + 456 + if (lock_status != locked) 457 + return false; 458 + 459 + return true; 460 + } 461 + 433 462 static struct timing_generator_funcs dcn401_tg_funcs = { 434 463 .validate_timing = optc1_validate_timing, 435 464 .program_timing = optc1_program_timing, ··· 525 496 .get_optc_double_buffer_pending = optc3_get_optc_double_buffer_pending, 526 497 .get_otg_double_buffer_pending = optc3_get_otg_update_pending, 527 498 .get_pipe_update_pending = optc3_get_pipe_update_pending, 499 + .set_vupdate_keepout = optc401_set_vupdate_keepout, 500 + .wait_update_lock_status = optc401_wait_update_lock_status, 528 501 }; 529 502 530 503 void dcn401_timing_generator_init(struct optc *optc1)