Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/display: Move FPU associated DCN30 code to DML folder

[why & how]
As part of the FPU isolation work documented in
https://patchwork.freedesktop.org/series/93042/, isolate
code that uses FPU in DCN30 to DML, where all FPU code
should locate.

Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
Signed-off-by: Jasdeep Dhillon <jdhillon@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jasdeep Dhillon and committed by
Alex Deucher
e4b0eac3 66a19720

+757 -478
+10
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
··· 28 28 #include "dc.h" 29 29 #include "dcn_calc_math.h" 30 30 31 + #include "dml/dcn30/dcn30_fpu.h" 32 + 31 33 #define REG(reg)\ 32 34 optc1->tg_regs->reg 33 35 ··· 184 182 185 183 REG_UPDATE(OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, 0); 186 184 185 + } 186 + 187 + void optc3_set_vrr_m_const(struct timing_generator *optc, 188 + double vtotal_avg) 189 + { 190 + DC_FP_START(); 191 + optc3_fpu_set_vrr_m_const(optc, vtotal_avg); 192 + DC_FP_END(); 187 193 } 188 194 189 195 void optc3_set_odm_bypass(struct timing_generator *optc,
+41 -477
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
··· 84 84 #include "dce/dce_aux.h" 85 85 #include "dce/dce_i2c.h" 86 86 87 + #include "dml/dcn30/dcn30_fpu.h" 87 88 #include "dml/dcn30/display_mode_vba_30.h" 88 89 #include "vm_helper.h" 89 90 #include "dcn20/dcn20_vmid.h" 90 91 #include "amdgpu_socbb.h" 91 92 92 93 #define DC_LOGGER_INIT(logger) 93 - 94 - struct _vcs_dpi_ip_params_st dcn3_0_ip = { 95 - .use_min_dcfclk = 0, 96 - .clamp_min_dcfclk = 0, 97 - .odm_capable = 1, 98 - .gpuvm_enable = 0, 99 - .hostvm_enable = 0, 100 - .gpuvm_max_page_table_levels = 4, 101 - .hostvm_max_page_table_levels = 4, 102 - .hostvm_cached_page_table_levels = 0, 103 - .pte_group_size_bytes = 2048, 104 - .num_dsc = 6, 105 - .rob_buffer_size_kbytes = 184, 106 - .det_buffer_size_kbytes = 184, 107 - .dpte_buffer_size_in_pte_reqs_luma = 84, 108 - .pde_proc_buffer_size_64k_reqs = 48, 109 - .dpp_output_buffer_pixels = 2560, 110 - .opp_output_buffer_lines = 1, 111 - .pixel_chunk_size_kbytes = 8, 112 - .pte_enable = 1, 113 - .max_page_table_levels = 2, 114 - .pte_chunk_size_kbytes = 2, // ? 115 - .meta_chunk_size_kbytes = 2, 116 - .writeback_chunk_size_kbytes = 8, 117 - .line_buffer_size_bits = 789504, 118 - .is_line_buffer_bpp_fixed = 0, // ? 119 - .line_buffer_fixed_bpp = 0, // ? 120 - .dcc_supported = true, 121 - .writeback_interface_buffer_size_kbytes = 90, 122 - .writeback_line_buffer_buffer_size = 0, 123 - .max_line_buffer_lines = 12, 124 - .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640 125 - .writeback_chroma_buffer_size_kbytes = 8, 126 - .writeback_chroma_line_buffer_width_pixels = 4, 127 - .writeback_max_hscl_ratio = 1, 128 - .writeback_max_vscl_ratio = 1, 129 - .writeback_min_hscl_ratio = 1, 130 - .writeback_min_vscl_ratio = 1, 131 - .writeback_max_hscl_taps = 1, 132 - .writeback_max_vscl_taps = 1, 133 - .writeback_line_buffer_luma_buffer_size = 0, 134 - .writeback_line_buffer_chroma_buffer_size = 14643, 135 - .cursor_buffer_size = 8, 136 - .cursor_chunk_size = 2, 137 - .max_num_otg = 6, 138 - .max_num_dpp = 6, 139 - .max_num_wb = 1, 140 - .max_dchub_pscl_bw_pix_per_clk = 4, 141 - .max_pscl_lb_bw_pix_per_clk = 2, 142 - .max_lb_vscl_bw_pix_per_clk = 4, 143 - .max_vscl_hscl_bw_pix_per_clk = 4, 144 - .max_hscl_ratio = 6, 145 - .max_vscl_ratio = 6, 146 - .hscl_mults = 4, 147 - .vscl_mults = 4, 148 - .max_hscl_taps = 8, 149 - .max_vscl_taps = 8, 150 - .dispclk_ramp_margin_percent = 1, 151 - .underscan_factor = 1.11, 152 - .min_vblank_lines = 32, 153 - .dppclk_delay_subtotal = 46, 154 - .dynamic_metadata_vm_enabled = true, 155 - .dppclk_delay_scl_lb_only = 16, 156 - .dppclk_delay_scl = 50, 157 - .dppclk_delay_cnvc_formatter = 27, 158 - .dppclk_delay_cnvc_cursor = 6, 159 - .dispclk_delay_subtotal = 119, 160 - .dcfclk_cstate_latency = 5.2, // SRExitTime 161 - .max_inter_dcn_tile_repeaters = 8, 162 - .odm_combine_4to1_supported = true, 163 - 164 - .xfc_supported = false, 165 - .xfc_fill_bw_overhead_percent = 10.0, 166 - .xfc_fill_constant_bytes = 0, 167 - .gfx7_compat_tiling_supported = 0, 168 - .number_of_cursors = 1, 169 - }; 170 - 171 - struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = { 172 - .clock_limits = { 173 - { 174 - .state = 0, 175 - .dispclk_mhz = 562.0, 176 - .dppclk_mhz = 300.0, 177 - .phyclk_mhz = 300.0, 178 - .phyclk_d18_mhz = 667.0, 179 - .dscclk_mhz = 405.6, 180 - }, 181 - }, 182 - .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ 183 - .num_states = 1, 184 - .sr_exit_time_us = 15.5, 185 - .sr_enter_plus_exit_time_us = 20, 186 - .urgent_latency_us = 4.0, 187 - .urgent_latency_pixel_data_only_us = 4.0, 188 - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, 189 - .urgent_latency_vm_data_only_us = 4.0, 190 - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, 191 - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, 192 - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, 193 - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, 194 - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, 195 - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, 196 - .max_avg_sdp_bw_use_normal_percent = 60.0, 197 - .max_avg_dram_bw_use_normal_percent = 40.0, 198 - .writeback_latency_us = 12.0, 199 - .max_request_size_bytes = 256, 200 - .fabric_datapath_to_dcn_data_return_bytes = 64, 201 - .dcn_downspread_percent = 0.5, 202 - .downspread_percent = 0.38, 203 - .dram_page_open_time_ns = 50.0, 204 - .dram_rw_turnaround_time_ns = 17.5, 205 - .dram_return_buffer_per_channel_bytes = 8192, 206 - .round_trip_ping_latency_dcfclk_cycles = 191, 207 - .urgent_out_of_order_return_per_channel_bytes = 4096, 208 - .channel_interleave_bytes = 256, 209 - .num_banks = 8, 210 - .gpuvm_min_page_size_bytes = 4096, 211 - .hostvm_min_page_size_bytes = 4096, 212 - .dram_clock_change_latency_us = 404, 213 - .dummy_pstate_latency_us = 5, 214 - .writeback_dram_clock_change_latency_us = 23.0, 215 - .return_bus_width_bytes = 64, 216 - .dispclk_dppclk_vco_speed_mhz = 3650, 217 - .xfc_bus_transport_time_us = 20, // ? 218 - .xfc_xbuf_latency_tolerance_us = 4, // ? 219 - .use_urgent_burst_bw = 1, // ? 220 - .do_urgent_latency_adjustment = true, 221 - .urgent_latency_adjustment_fabric_clock_component_us = 1.0, 222 - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, 223 - }; 224 94 225 95 enum dcn30_clk_src_array_id { 226 96 DCN30_CLK_SRC_PLL0, ··· 1350 1480 void dcn30_populate_dml_writeback_from_context( 1351 1481 struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) 1352 1482 { 1353 - int pipe_cnt, i, j; 1354 - double max_calc_writeback_dispclk; 1355 - double writeback_dispclk; 1356 - struct writeback_st dout_wb; 1357 - 1358 - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { 1359 - struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; 1360 - 1361 - if (!stream) 1362 - continue; 1363 - max_calc_writeback_dispclk = 0; 1364 - 1365 - /* Set writeback information */ 1366 - pipes[pipe_cnt].dout.wb_enable = 0; 1367 - pipes[pipe_cnt].dout.num_active_wb = 0; 1368 - for (j = 0; j < stream->num_wb_info; j++) { 1369 - struct dc_writeback_info *wb_info = &stream->writeback_info[j]; 1370 - 1371 - if (wb_info->wb_enabled && wb_info->writeback_source_plane && 1372 - (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { 1373 - pipes[pipe_cnt].dout.wb_enable = 1; 1374 - pipes[pipe_cnt].dout.num_active_wb++; 1375 - dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? 1376 - wb_info->dwb_params.cnv_params.crop_height : 1377 - wb_info->dwb_params.cnv_params.src_height; 1378 - dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? 1379 - wb_info->dwb_params.cnv_params.crop_width : 1380 - wb_info->dwb_params.cnv_params.src_width; 1381 - dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; 1382 - dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; 1383 - 1384 - /* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */ 1385 - if (dc->dml.ip.writeback_max_hscl_taps > 1) { 1386 - dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; 1387 - dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps; 1388 - } else { 1389 - dout_wb.wb_htaps_luma = 1; 1390 - dout_wb.wb_vtaps_luma = 1; 1391 - } 1392 - dout_wb.wb_htaps_chroma = 0; 1393 - dout_wb.wb_vtaps_chroma = 0; 1394 - dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? 1395 - (double)wb_info->dwb_params.cnv_params.crop_width / 1396 - (double)wb_info->dwb_params.dest_width : 1397 - (double)wb_info->dwb_params.cnv_params.src_width / 1398 - (double)wb_info->dwb_params.dest_width; 1399 - dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? 1400 - (double)wb_info->dwb_params.cnv_params.crop_height / 1401 - (double)wb_info->dwb_params.dest_height : 1402 - (double)wb_info->dwb_params.cnv_params.src_height / 1403 - (double)wb_info->dwb_params.dest_height; 1404 - if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB || 1405 - wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA) 1406 - dout_wb.wb_pixel_format = dm_444_64; 1407 - else 1408 - dout_wb.wb_pixel_format = dm_444_32; 1409 - 1410 - /* Workaround for cases where multiple writebacks are connected to same plane 1411 - * In which case, need to compute worst case and set the associated writeback parameters 1412 - * This workaround is necessary due to DML computation assuming only 1 set of writeback 1413 - * parameters per pipe 1414 - */ 1415 - writeback_dispclk = dml30_CalculateWriteBackDISPCLK( 1416 - dout_wb.wb_pixel_format, 1417 - pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, 1418 - dout_wb.wb_hratio, 1419 - dout_wb.wb_vratio, 1420 - dout_wb.wb_htaps_luma, 1421 - dout_wb.wb_vtaps_luma, 1422 - dout_wb.wb_src_width, 1423 - dout_wb.wb_dst_width, 1424 - pipes[pipe_cnt].pipe.dest.htotal, 1425 - dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size); 1426 - 1427 - if (writeback_dispclk > max_calc_writeback_dispclk) { 1428 - max_calc_writeback_dispclk = writeback_dispclk; 1429 - pipes[pipe_cnt].dout.wb = dout_wb; 1430 - } 1431 - } 1432 - } 1433 - 1434 - pipe_cnt++; 1435 - } 1436 - 1483 + DC_FP_START(); 1484 + dcn30_fpu_populate_dml_writeback_from_context(dc, res_ctx, pipes); 1485 + DC_FP_END(); 1437 1486 } 1438 1487 1439 1488 unsigned int dcn30_calc_max_scaled_time( ··· 1387 1598 enum mmhubbub_wbif_mode wbif_mode; 1388 1599 struct display_mode_lib *dml = &context->bw_ctx.dml; 1389 1600 struct mcif_arb_params *wb_arb_params; 1390 - int i, j, k, dwb_pipe; 1601 + int i, j, dwb_pipe; 1391 1602 1392 1603 /* Writeback MCIF_WB arbitration parameters */ 1393 1604 dwb_pipe = 0; ··· 1411 1622 else 1412 1623 wbif_mode = PACKED_444; 1413 1624 1414 - for (k = 0; k < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); k++) { 1415 - wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000; 1416 - wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000; 1417 - } 1625 + DC_FP_START(); 1626 + dcn30_fpu_set_mcif_arb_params(wb_arb_params, dml, pipes, pipe_cnt, j); 1627 + DC_FP_END(); 1418 1628 wb_arb_params->time_per_pixel = (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* time_per_pixel should be in u6.6 format */ 1419 1629 wb_arb_params->slice_lines = 32; 1420 1630 wb_arb_params->arbitration_slice = 2; /* irrelevant since there is no YUV output */ 1421 1631 wb_arb_params->max_scaled_time = dcn30_calc_max_scaled_time(wb_arb_params->time_per_pixel, 1422 1632 wbif_mode, 1423 1633 wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */ 1424 - wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[j] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */ 1425 1634 1426 1635 dwb_pipe++; 1427 1636 ··· 1898 2111 return out; 1899 2112 } 1900 2113 1901 - /* 1902 - * This must be noinline to ensure anything that deals with FP registers 1903 - * is contained within this call; previously our compiling with hard-float 1904 - * would result in fp instructions being emitted outside of the boundaries 1905 - * of the DC_FP_START/END macros, which makes sense as the compiler has no 1906 - * idea about what is wrapped and what is not 1907 - * 1908 - * This is largely just a workaround to avoid breakage introduced with 5.6, 1909 - * ideally all fp-using code should be moved into its own file, only that 1910 - * should be compiled with hard-float, and all code exported from there 1911 - * should be strictly wrapped with DC_FP_START/END 1912 - */ 1913 - static noinline void dcn30_calculate_wm_and_dlg_fp( 1914 - struct dc *dc, struct dc_state *context, 1915 - display_e2e_pipe_params_st *pipes, 1916 - int pipe_cnt, 1917 - int vlevel) 1918 - { 1919 - int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; 1920 - int i, pipe_idx; 1921 - double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb]; 1922 - bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported; 1923 - 1924 - if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) 1925 - dcfclk = context->bw_ctx.dml.soc.min_dcfclk; 1926 - 1927 - pipes[0].clks_cfg.voltage = vlevel; 1928 - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; 1929 - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; 1930 - 1931 - /* Set B: 1932 - * DCFCLK: 1GHz or min required above 1GHz 1933 - * FCLK/UCLK: Max 1934 - */ 1935 - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { 1936 - if (vlevel == 0) { 1937 - pipes[0].clks_cfg.voltage = 1; 1938 - pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; 1939 - } 1940 - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; 1941 - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; 1942 - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; 1943 - } 1944 - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1945 - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1946 - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1947 - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1948 - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1949 - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1950 - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1951 - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1952 - 1953 - pipes[0].clks_cfg.voltage = vlevel; 1954 - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; 1955 - 1956 - /* Set D: 1957 - * DCFCLK: Min Required 1958 - * FCLK(proportional to UCLK): 1GHz or Max 1959 - * MALL stutter, sr_enter_exit = 4, sr_exit = 2us 1960 - */ 1961 - /* 1962 - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { 1963 - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; 1964 - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; 1965 - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; 1966 - } 1967 - context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1968 - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1969 - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1970 - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1971 - context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1972 - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1973 - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1974 - context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 1975 - */ 1976 - 1977 - /* Set C: 1978 - * DCFCLK: Min Required 1979 - * FCLK(proportional to UCLK): 1GHz or Max 1980 - * pstate latency overridden to 5us 1981 - */ 1982 - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { 1983 - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; 1984 - unsigned int min_dram_speed_mts_margin = 160; 1985 - 1986 - if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported) 1987 - min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; 1988 - 1989 - /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ 1990 - for (i = 3; i > 0; i--) 1991 - if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) 1992 - break; 1993 - 1994 - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; 1995 - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; 1996 - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; 1997 - } 1998 - 1999 - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2000 - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2001 - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2002 - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2003 - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2004 - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2005 - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2006 - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2007 - 2008 - if (!pstate_en) { 2009 - /* The only difference between A and C is p-state latency, if p-state is not supported we want to 2010 - * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark 2011 - */ 2012 - context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; 2013 - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; 2014 - } else { 2015 - /* Set A: 2016 - * DCFCLK: Min Required 2017 - * FCLK(proportional to UCLK): 1GHz or Max 2018 - * 2019 - * Set A calculated last so that following calculations are based on Set A 2020 - */ 2021 - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); 2022 - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2023 - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2024 - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2025 - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2026 - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2027 - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2028 - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2029 - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 2030 - } 2031 - 2032 - context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; 2033 - 2034 - /* Make set D = set A until set D is enabled */ 2035 - context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; 2036 - 2037 - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { 2038 - if (!context->res_ctx.pipe_ctx[i].stream) 2039 - continue; 2040 - 2041 - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); 2042 - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); 2043 - 2044 - if (dc->config.forced_clocks) { 2045 - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; 2046 - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; 2047 - } 2048 - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) 2049 - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; 2050 - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) 2051 - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; 2052 - 2053 - pipe_idx++; 2054 - } 2055 - 2056 - DC_FP_START(); 2057 - dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); 2058 - DC_FP_END(); 2059 - 2060 - if (!pstate_en) 2061 - /* Restore full p-state latency */ 2062 - context->bw_ctx.dml.soc.dram_clock_change_latency_us = 2063 - dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; 2064 - } 2065 - 2066 2114 void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) 2067 2115 { 2068 - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { 2069 - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; 2070 - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; 2071 - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; 2072 - } 2116 + DC_FP_START(); 2117 + dcn30_fpu_update_soc_for_wm_a(dc, context); 2118 + DC_FP_END(); 2073 2119 } 2074 2120 2075 2121 void dcn30_calculate_wm_and_dlg( ··· 1912 2292 int vlevel) 1913 2293 { 1914 2294 DC_FP_START(); 1915 - dcn30_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); 2295 + dcn30_fpu_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); 1916 2296 DC_FP_END(); 1917 2297 } 1918 2298 ··· 1971 2351 return out; 1972 2352 } 1973 2353 1974 - /* 1975 - * This must be noinline to ensure anything that deals with FP registers 1976 - * is contained within this call; previously our compiling with hard-float 1977 - * would result in fp instructions being emitted outside of the boundaries 1978 - * of the DC_FP_START/END macros, which makes sense as the compiler has no 1979 - * idea about what is wrapped and what is not 1980 - * 1981 - * This is largely just a workaround to avoid breakage introduced with 5.6, 1982 - * ideally all fp-using code should be moved into its own file, only that 1983 - * should be compiled with hard-float, and all code exported from there 1984 - * should be strictly wrapped with DC_FP_START/END 1985 - */ 1986 - static noinline void dcn30_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, 1987 - unsigned int *optimal_dcfclk, 1988 - unsigned int *optimal_fclk) 1989 - { 1990 - double bw_from_dram, bw_from_dram1, bw_from_dram2; 1991 - 1992 - bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans * 1993 - dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100); 1994 - bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans * 1995 - dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100); 1996 - 1997 - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; 1998 - 1999 - if (optimal_fclk) 2000 - *optimal_fclk = bw_from_dram / 2001 - (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); 2002 - 2003 - if (optimal_dcfclk) 2004 - *optimal_dcfclk = bw_from_dram / 2005 - (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); 2006 - } 2007 - 2008 2354 void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) 2009 2355 { 2010 2356 unsigned int i, j; ··· 1985 2399 unsigned int num_dcfclk_sta_targets = 4; 1986 2400 unsigned int num_uclk_states; 1987 2401 2402 + struct dc_bounding_box_max_clk dcn30_bb_max_clk; 2403 + 2404 + memset(&dcn30_bb_max_clk, 0, sizeof(dcn30_bb_max_clk)); 2405 + 1988 2406 if (dc->ctx->dc_bios->vram_info.num_chans) 1989 2407 dcn3_0_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; 1990 2408 1991 - if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) 1992 - dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; 1993 - 1994 - dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; 1995 - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; 2409 + DC_FP_START(); 2410 + dcn30_fpu_update_dram_channel_width_bytes(dc); 2411 + DC_FP_END(); 1996 2412 1997 2413 if (bw_params->clk_table.entries[0].memclk_mhz) { 1998 - int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; 1999 2414 2000 2415 for (i = 0; i < MAX_NUM_DPM_LVL; i++) { 2001 - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) 2002 - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; 2003 - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) 2004 - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; 2005 - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) 2006 - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; 2007 - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) 2008 - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; 2416 + if (bw_params->clk_table.entries[i].dcfclk_mhz > dcn30_bb_max_clk.max_dcfclk_mhz) 2417 + dcn30_bb_max_clk.max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; 2418 + if (bw_params->clk_table.entries[i].dispclk_mhz > dcn30_bb_max_clk.max_dispclk_mhz) 2419 + dcn30_bb_max_clk.max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; 2420 + if (bw_params->clk_table.entries[i].dppclk_mhz > dcn30_bb_max_clk.max_dppclk_mhz) 2421 + dcn30_bb_max_clk.max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; 2422 + if (bw_params->clk_table.entries[i].phyclk_mhz > dcn30_bb_max_clk.max_phyclk_mhz) 2423 + dcn30_bb_max_clk.max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; 2009 2424 } 2010 2425 2011 - if (!max_dcfclk_mhz) 2012 - max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz; 2013 - if (!max_dispclk_mhz) 2014 - max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz; 2015 - if (!max_dppclk_mhz) 2016 - max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz; 2017 - if (!max_phyclk_mhz) 2018 - max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz; 2426 + DC_FP_START(); 2427 + dcn30_fpu_update_max_clk(&dcn30_bb_max_clk); 2428 + DC_FP_END(); 2019 2429 2020 - if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { 2430 + if (dcn30_bb_max_clk.max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { 2021 2431 // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array 2022 - dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; 2432 + dcfclk_sta_targets[num_dcfclk_sta_targets] = dcn30_bb_max_clk.max_dcfclk_mhz; 2023 2433 num_dcfclk_sta_targets++; 2024 - } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { 2434 + } else if (dcn30_bb_max_clk.max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { 2025 2435 // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates 2026 2436 for (i = 0; i < num_dcfclk_sta_targets; i++) { 2027 - if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { 2028 - dcfclk_sta_targets[i] = max_dcfclk_mhz; 2437 + if (dcfclk_sta_targets[i] > dcn30_bb_max_clk.max_dcfclk_mhz) { 2438 + dcfclk_sta_targets[i] = dcn30_bb_max_clk.max_dcfclk_mhz; 2029 2439 break; 2030 2440 } 2031 2441 } ··· 2034 2452 // Calculate optimal dcfclk for each uclk 2035 2453 for (i = 0; i < num_uclk_states; i++) { 2036 2454 DC_FP_START(); 2037 - dcn30_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, 2455 + dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, 2038 2456 &optimal_dcfclk_for_uclk[i], NULL); 2039 2457 DC_FP_END(); 2040 2458 if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { ··· 2061 2479 dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; 2062 2480 dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; 2063 2481 } else { 2064 - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { 2482 + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) { 2065 2483 dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; 2066 2484 dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; 2067 2485 } else { ··· 2076 2494 } 2077 2495 2078 2496 while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && 2079 - optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { 2497 + optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) { 2080 2498 dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; 2081 2499 dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; 2082 2500 } 2083 2501 2084 2502 dcn3_0_soc.num_states = num_states; 2085 - for (i = 0; i < dcn3_0_soc.num_states; i++) { 2086 - dcn3_0_soc.clock_limits[i].state = i; 2087 - dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; 2088 - dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; 2089 - dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; 2090 - 2091 - /* Fill all states with max values of all other clocks */ 2092 - dcn3_0_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; 2093 - dcn3_0_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; 2094 - dcn3_0_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; 2095 - dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz; 2096 - /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */ 2097 - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ 2098 - dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz; 2099 - dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz; 2100 - dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz; 2101 - } 2102 - /* re-init DML with updated bb */ 2103 - dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); 2104 - if (dc->current_state) 2105 - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); 2503 + DC_FP_START(); 2504 + dcn30_fpu_update_bw_bounding_box(dc, bw_params, &dcn30_bb_max_clk, dcfclk_mhz, dram_speed_mts); 2505 + DC_FP_END(); 2106 2506 } 2107 2507 } 2108 2508
+5
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
··· 35 35 struct resource_pool; 36 36 struct _vcs_dpi_display_pipe_params_st; 37 37 38 + extern struct _vcs_dpi_ip_params_st dcn3_0_ip; 39 + extern struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc; 40 + 38 41 struct dcn30_resource_pool { 39 42 struct resource_pool base; 40 43 }; ··· 98 95 struct dc_stream_state *dc_stream); 99 96 100 97 void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); 98 + 99 + void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); 101 100 102 101 #endif /* _DCN30_RESOURCE_H_ */
+2
drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
··· 81 81 #include "dce/dce_aux.h" 82 82 #include "dce/dce_i2c.h" 83 83 84 + #include "dml/dcn30/dcn30_fpu.h" 85 + 84 86 #include "dml/dcn30/display_mode_vba_30.h" 85 87 #include "dml/dcn301/dcn301_fpu.h" 86 88 #include "vm_helper.h"
+2
drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
··· 43 43 #include "dcn20/dcn20_dsc.h" 44 44 #include "dcn20/dcn20_resource.h" 45 45 46 + #include "dml/dcn30/dcn30_fpu.h" 47 + 46 48 #include "dcn10/dcn10_resource.h" 47 49 48 50 #include "dce/dce_abm.h"
+2
drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
··· 25 25 #include "dcn20/dcn20_dsc.h" 26 26 #include "dcn20/dcn20_resource.h" 27 27 28 + #include "dml/dcn30/dcn30_fpu.h" 29 + 28 30 #include "dcn10/dcn10_resource.h" 29 31 30 32 #include "dc_link_ddc.h"
+2
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
··· 36 36 #include "dcn20/dcn20_resource.h" 37 37 #include "dcn30/dcn30_resource.h" 38 38 39 + #include "dml/dcn30/dcn30_fpu.h" 40 + 39 41 #include "dcn10/dcn10_ipp.h" 40 42 #include "dcn30/dcn30_hubbub.h" 41 43 #include "dcn31/dcn31_hubbub.h"
+2 -1
drivers/gpu/drm/amd/display/dc/dml/Makefile
··· 71 71 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) 72 72 CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) 73 73 CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) 74 + CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) 74 75 CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags) 75 76 CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) 76 77 CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) ··· 114 113 DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o 115 114 DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o 116 115 DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o 117 - DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o 116 + DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o 118 117 DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o 119 118 DML += dcn31/dcn31_fpu.o 120 119 DML += dcn301/dcn301_fpu.o
+617
drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
··· 1 + /* 2 + * Copyright 2020-2021 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + * Authors: AMD 23 + * 24 + */ 25 + #include "resource.h" 26 + #include "clk_mgr.h" 27 + #include "reg_helper.h" 28 + #include "dcn_calc_math.h" 29 + #include "dcn20/dcn20_resource.h" 30 + #include "dcn30/dcn30_resource.h" 31 + 32 + 33 + #include "display_mode_vba_30.h" 34 + #include "dcn30_fpu.h" 35 + 36 + #define REG(reg)\ 37 + optc1->tg_regs->reg 38 + 39 + #define CTX \ 40 + optc1->base.ctx 41 + 42 + #undef FN 43 + #define FN(reg_name, field_name) \ 44 + optc1->tg_shift->field_name, optc1->tg_mask->field_name 45 + 46 + 47 + struct _vcs_dpi_ip_params_st dcn3_0_ip = { 48 + .use_min_dcfclk = 0, 49 + .clamp_min_dcfclk = 0, 50 + .odm_capable = 1, 51 + .gpuvm_enable = 0, 52 + .hostvm_enable = 0, 53 + .gpuvm_max_page_table_levels = 4, 54 + .hostvm_max_page_table_levels = 4, 55 + .hostvm_cached_page_table_levels = 0, 56 + .pte_group_size_bytes = 2048, 57 + .num_dsc = 6, 58 + .rob_buffer_size_kbytes = 184, 59 + .det_buffer_size_kbytes = 184, 60 + .dpte_buffer_size_in_pte_reqs_luma = 84, 61 + .pde_proc_buffer_size_64k_reqs = 48, 62 + .dpp_output_buffer_pixels = 2560, 63 + .opp_output_buffer_lines = 1, 64 + .pixel_chunk_size_kbytes = 8, 65 + .pte_enable = 1, 66 + .max_page_table_levels = 2, 67 + .pte_chunk_size_kbytes = 2, // ? 68 + .meta_chunk_size_kbytes = 2, 69 + .writeback_chunk_size_kbytes = 8, 70 + .line_buffer_size_bits = 789504, 71 + .is_line_buffer_bpp_fixed = 0, // ? 72 + .line_buffer_fixed_bpp = 0, // ? 73 + .dcc_supported = true, 74 + .writeback_interface_buffer_size_kbytes = 90, 75 + .writeback_line_buffer_buffer_size = 0, 76 + .max_line_buffer_lines = 12, 77 + .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640 78 + .writeback_chroma_buffer_size_kbytes = 8, 79 + .writeback_chroma_line_buffer_width_pixels = 4, 80 + .writeback_max_hscl_ratio = 1, 81 + .writeback_max_vscl_ratio = 1, 82 + .writeback_min_hscl_ratio = 1, 83 + .writeback_min_vscl_ratio = 1, 84 + .writeback_max_hscl_taps = 1, 85 + .writeback_max_vscl_taps = 1, 86 + .writeback_line_buffer_luma_buffer_size = 0, 87 + .writeback_line_buffer_chroma_buffer_size = 14643, 88 + .cursor_buffer_size = 8, 89 + .cursor_chunk_size = 2, 90 + .max_num_otg = 6, 91 + .max_num_dpp = 6, 92 + .max_num_wb = 1, 93 + .max_dchub_pscl_bw_pix_per_clk = 4, 94 + .max_pscl_lb_bw_pix_per_clk = 2, 95 + .max_lb_vscl_bw_pix_per_clk = 4, 96 + .max_vscl_hscl_bw_pix_per_clk = 4, 97 + .max_hscl_ratio = 6, 98 + .max_vscl_ratio = 6, 99 + .hscl_mults = 4, 100 + .vscl_mults = 4, 101 + .max_hscl_taps = 8, 102 + .max_vscl_taps = 8, 103 + .dispclk_ramp_margin_percent = 1, 104 + .underscan_factor = 1.11, 105 + .min_vblank_lines = 32, 106 + .dppclk_delay_subtotal = 46, 107 + .dynamic_metadata_vm_enabled = true, 108 + .dppclk_delay_scl_lb_only = 16, 109 + .dppclk_delay_scl = 50, 110 + .dppclk_delay_cnvc_formatter = 27, 111 + .dppclk_delay_cnvc_cursor = 6, 112 + .dispclk_delay_subtotal = 119, 113 + .dcfclk_cstate_latency = 5.2, // SRExitTime 114 + .max_inter_dcn_tile_repeaters = 8, 115 + .max_num_hdmi_frl_outputs = 1, 116 + .odm_combine_4to1_supported = true, 117 + 118 + .xfc_supported = false, 119 + .xfc_fill_bw_overhead_percent = 10.0, 120 + .xfc_fill_constant_bytes = 0, 121 + .gfx7_compat_tiling_supported = 0, 122 + .number_of_cursors = 1, 123 + }; 124 + 125 + struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = { 126 + .clock_limits = { 127 + { 128 + .state = 0, 129 + .dispclk_mhz = 562.0, 130 + .dppclk_mhz = 300.0, 131 + .phyclk_mhz = 300.0, 132 + .phyclk_d18_mhz = 667.0, 133 + .dscclk_mhz = 405.6, 134 + }, 135 + }, 136 + 137 + .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ 138 + .num_states = 1, 139 + .sr_exit_time_us = 15.5, 140 + .sr_enter_plus_exit_time_us = 20, 141 + .urgent_latency_us = 4.0, 142 + .urgent_latency_pixel_data_only_us = 4.0, 143 + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, 144 + .urgent_latency_vm_data_only_us = 4.0, 145 + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, 146 + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, 147 + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, 148 + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, 149 + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, 150 + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, 151 + .max_avg_sdp_bw_use_normal_percent = 60.0, 152 + .max_avg_dram_bw_use_normal_percent = 40.0, 153 + .writeback_latency_us = 12.0, 154 + .max_request_size_bytes = 256, 155 + .fabric_datapath_to_dcn_data_return_bytes = 64, 156 + .dcn_downspread_percent = 0.5, 157 + .downspread_percent = 0.38, 158 + .dram_page_open_time_ns = 50.0, 159 + .dram_rw_turnaround_time_ns = 17.5, 160 + .dram_return_buffer_per_channel_bytes = 8192, 161 + .round_trip_ping_latency_dcfclk_cycles = 191, 162 + .urgent_out_of_order_return_per_channel_bytes = 4096, 163 + .channel_interleave_bytes = 256, 164 + .num_banks = 8, 165 + .gpuvm_min_page_size_bytes = 4096, 166 + .hostvm_min_page_size_bytes = 4096, 167 + .dram_clock_change_latency_us = 404, 168 + .dummy_pstate_latency_us = 5, 169 + .writeback_dram_clock_change_latency_us = 23.0, 170 + .return_bus_width_bytes = 64, 171 + .dispclk_dppclk_vco_speed_mhz = 3650, 172 + .xfc_bus_transport_time_us = 20, // ? 173 + .xfc_xbuf_latency_tolerance_us = 4, // ? 174 + .use_urgent_burst_bw = 1, // ? 175 + .do_urgent_latency_adjustment = true, 176 + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, 177 + .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, 178 + }; 179 + 180 + 181 + void optc3_fpu_set_vrr_m_const(struct timing_generator *optc, 182 + double vtotal_avg) 183 + { 184 + struct optc *optc1 = DCN10TG_FROM_TG(optc); 185 + double vtotal_min, vtotal_max; 186 + double ratio, modulo, phase; 187 + uint32_t vblank_start; 188 + uint32_t v_total_mask_value = 0; 189 + 190 + dc_assert_fp_enabled(); 191 + 192 + /* Compute VTOTAL_MIN and VTOTAL_MAX, so that 193 + * VOTAL_MAX - VTOTAL_MIN = 1 194 + */ 195 + v_total_mask_value = 16; 196 + vtotal_min = dcn_bw_floor(vtotal_avg); 197 + vtotal_max = dcn_bw_ceil(vtotal_avg); 198 + 199 + /* Check that bottom VBLANK is at least 2 lines tall when running with 200 + * VTOTAL_MIN. Note that VTOTAL registers are defined as 'total number 201 + * of lines in a frame - 1'. 202 + */ 203 + REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START, 204 + &vblank_start); 205 + ASSERT(vtotal_min >= vblank_start + 1); 206 + 207 + /* Special case where the average frame rate can be achieved 208 + * without using the DTO 209 + */ 210 + if (vtotal_min == vtotal_max) { 211 + REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min); 212 + 213 + optc->funcs->set_vtotal_min_max(optc, 0, 0); 214 + REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0); 215 + REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0); 216 + REG_UPDATE_3(OTG_V_TOTAL_CONTROL, 217 + OTG_V_TOTAL_MIN_SEL, 0, 218 + OTG_V_TOTAL_MAX_SEL, 0, 219 + OTG_SET_V_TOTAL_MIN_MASK_EN, 0); 220 + return; 221 + } 222 + 223 + ratio = vtotal_max - vtotal_avg; 224 + modulo = 65536.0 * 65536.0 - 1.0; /* 2^32 - 1 */ 225 + phase = ratio * modulo; 226 + 227 + /* Special cases where the DTO phase gets rounded to 0 or 228 + * to DTO modulo 229 + */ 230 + if (phase <= 0 || phase >= modulo) { 231 + REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, 232 + phase <= 0 ? 233 + (uint32_t)vtotal_max : (uint32_t)vtotal_min); 234 + REG_SET(OTG_V_TOTAL_MIN, 0, OTG_V_TOTAL_MIN, 0); 235 + REG_SET(OTG_V_TOTAL_MAX, 0, OTG_V_TOTAL_MAX, 0); 236 + REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0); 237 + REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0); 238 + REG_UPDATE_3(OTG_V_TOTAL_CONTROL, 239 + OTG_V_TOTAL_MIN_SEL, 0, 240 + OTG_V_TOTAL_MAX_SEL, 0, 241 + OTG_SET_V_TOTAL_MIN_MASK_EN, 0); 242 + return; 243 + } 244 + REG_UPDATE_6(OTG_V_TOTAL_CONTROL, 245 + OTG_V_TOTAL_MIN_SEL, 1, 246 + OTG_V_TOTAL_MAX_SEL, 1, 247 + OTG_SET_V_TOTAL_MIN_MASK_EN, 1, 248 + OTG_SET_V_TOTAL_MIN_MASK, v_total_mask_value, 249 + OTG_VTOTAL_MID_REPLACING_MIN_EN, 0, 250 + OTG_VTOTAL_MID_REPLACING_MAX_EN, 0); 251 + REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min); 252 + optc->funcs->set_vtotal_min_max(optc, vtotal_min, vtotal_max); 253 + REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, (uint32_t)phase); 254 + REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, (uint32_t)modulo); 255 + } 256 + 257 + void dcn30_fpu_populate_dml_writeback_from_context( 258 + struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) 259 + { 260 + int pipe_cnt, i, j; 261 + double max_calc_writeback_dispclk; 262 + double writeback_dispclk; 263 + struct writeback_st dout_wb; 264 + 265 + dc_assert_fp_enabled(); 266 + 267 + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { 268 + struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; 269 + 270 + if (!stream) 271 + continue; 272 + max_calc_writeback_dispclk = 0; 273 + 274 + /* Set writeback information */ 275 + pipes[pipe_cnt].dout.wb_enable = 0; 276 + pipes[pipe_cnt].dout.num_active_wb = 0; 277 + for (j = 0; j < stream->num_wb_info; j++) { 278 + struct dc_writeback_info *wb_info = &stream->writeback_info[j]; 279 + 280 + if (wb_info->wb_enabled && wb_info->writeback_source_plane && 281 + (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { 282 + pipes[pipe_cnt].dout.wb_enable = 1; 283 + pipes[pipe_cnt].dout.num_active_wb++; 284 + dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? 285 + wb_info->dwb_params.cnv_params.crop_height : 286 + wb_info->dwb_params.cnv_params.src_height; 287 + dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? 288 + wb_info->dwb_params.cnv_params.crop_width : 289 + wb_info->dwb_params.cnv_params.src_width; 290 + dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; 291 + dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; 292 + 293 + /* For IP that doesn't support WB scaling, set h/v taps to 1 to avoid DML validation failure */ 294 + if (dc->dml.ip.writeback_max_hscl_taps > 1) { 295 + dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; 296 + dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps; 297 + } else { 298 + dout_wb.wb_htaps_luma = 1; 299 + dout_wb.wb_vtaps_luma = 1; 300 + } 301 + dout_wb.wb_htaps_chroma = 0; 302 + dout_wb.wb_vtaps_chroma = 0; 303 + dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? 304 + (double)wb_info->dwb_params.cnv_params.crop_width / 305 + (double)wb_info->dwb_params.dest_width : 306 + (double)wb_info->dwb_params.cnv_params.src_width / 307 + (double)wb_info->dwb_params.dest_width; 308 + dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? 309 + (double)wb_info->dwb_params.cnv_params.crop_height / 310 + (double)wb_info->dwb_params.dest_height : 311 + (double)wb_info->dwb_params.cnv_params.src_height / 312 + (double)wb_info->dwb_params.dest_height; 313 + if (wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB || 314 + wb_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA) 315 + dout_wb.wb_pixel_format = dm_444_64; 316 + else 317 + dout_wb.wb_pixel_format = dm_444_32; 318 + 319 + /* Workaround for cases where multiple writebacks are connected to same plane 320 + * In which case, need to compute worst case and set the associated writeback parameters 321 + * This workaround is necessary due to DML computation assuming only 1 set of writeback 322 + * parameters per pipe 323 + */ 324 + writeback_dispclk = dml30_CalculateWriteBackDISPCLK( 325 + dout_wb.wb_pixel_format, 326 + pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, 327 + dout_wb.wb_hratio, 328 + dout_wb.wb_vratio, 329 + dout_wb.wb_htaps_luma, 330 + dout_wb.wb_vtaps_luma, 331 + dout_wb.wb_src_width, 332 + dout_wb.wb_dst_width, 333 + pipes[pipe_cnt].pipe.dest.htotal, 334 + dc->current_state->bw_ctx.dml.ip.writeback_line_buffer_buffer_size); 335 + 336 + if (writeback_dispclk > max_calc_writeback_dispclk) { 337 + max_calc_writeback_dispclk = writeback_dispclk; 338 + pipes[pipe_cnt].dout.wb = dout_wb; 339 + } 340 + } 341 + } 342 + 343 + pipe_cnt++; 344 + } 345 + } 346 + 347 + void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params, 348 + struct display_mode_lib *dml, 349 + display_e2e_pipe_params_st *pipes, 350 + int pipe_cnt, 351 + int cur_pipe) 352 + { 353 + int i; 354 + 355 + dc_assert_fp_enabled(); 356 + 357 + for (i = 0; i < sizeof(wb_arb_params->cli_watermark)/sizeof(wb_arb_params->cli_watermark[0]); i++) { 358 + wb_arb_params->cli_watermark[i] = get_wm_writeback_urgent(dml, pipes, pipe_cnt) * 1000; 359 + wb_arb_params->pstate_watermark[i] = get_wm_writeback_dram_clock_change(dml, pipes, pipe_cnt) * 1000; 360 + } 361 + 362 + wb_arb_params->dram_speed_change_duration = dml->vba.WritebackAllowDRAMClockChangeEndPosition[cur_pipe] * pipes[0].clks_cfg.refclk_mhz; /* num_clock_cycles = us * MHz */ 363 + } 364 + 365 + void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) 366 + { 367 + 368 + dc_assert_fp_enabled(); 369 + 370 + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { 371 + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; 372 + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; 373 + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; 374 + } 375 + } 376 + 377 + void dcn30_fpu_calculate_wm_and_dlg( 378 + struct dc *dc, struct dc_state *context, 379 + display_e2e_pipe_params_st *pipes, 380 + int pipe_cnt, 381 + int vlevel) 382 + { 383 + int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; 384 + int i, pipe_idx; 385 + double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb]; 386 + bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported; 387 + 388 + dc_assert_fp_enabled(); 389 + 390 + if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) 391 + dcfclk = context->bw_ctx.dml.soc.min_dcfclk; 392 + 393 + pipes[0].clks_cfg.voltage = vlevel; 394 + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; 395 + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; 396 + 397 + /* Set B: 398 + * DCFCLK: 1GHz or min required above 1GHz 399 + * FCLK/UCLK: Max 400 + */ 401 + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { 402 + if (vlevel == 0) { 403 + pipes[0].clks_cfg.voltage = 1; 404 + pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; 405 + } 406 + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; 407 + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; 408 + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; 409 + } 410 + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 411 + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 412 + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 413 + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 414 + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 415 + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 416 + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 417 + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 418 + 419 + pipes[0].clks_cfg.voltage = vlevel; 420 + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; 421 + 422 + /* Set D: 423 + * DCFCLK: Min Required 424 + * FCLK(proportional to UCLK): 1GHz or Max 425 + * MALL stutter, sr_enter_exit = 4, sr_exit = 2us 426 + */ 427 + /* 428 + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { 429 + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; 430 + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; 431 + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; 432 + } 433 + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 434 + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 435 + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 436 + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 437 + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 438 + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 439 + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 440 + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 441 + */ 442 + 443 + /* Set C: 444 + * DCFCLK: Min Required 445 + * FCLK(proportional to UCLK): 1GHz or Max 446 + * pstate latency overridden to 5us 447 + */ 448 + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { 449 + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; 450 + unsigned int min_dram_speed_mts_margin = 160; 451 + 452 + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported) 453 + min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; 454 + 455 + /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ 456 + for (i = 3; i > 0; i--) 457 + if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) 458 + break; 459 + 460 + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; 461 + 462 + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; 463 + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; 464 + } 465 + 466 + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 467 + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 468 + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 469 + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 470 + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 471 + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 472 + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 473 + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 474 + 475 + if (!pstate_en) { 476 + /* The only difference between A and C is p-state latency, if p-state is not supported we want to 477 + * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark 478 + */ 479 + context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; 480 + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; 481 + } else { 482 + /* Set A: 483 + * DCFCLK: Min Required 484 + * FCLK(proportional to UCLK): 1GHz or Max 485 + * 486 + * Set A calculated last so that following calculations are based on Set A 487 + */ 488 + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); 489 + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 490 + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 491 + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 492 + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 493 + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 494 + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 495 + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 496 + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; 497 + } 498 + 499 + context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; 500 + 501 + /* Make set D = set A until set D is enabled */ 502 + context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; 503 + 504 + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { 505 + if (!context->res_ctx.pipe_ctx[i].stream) 506 + continue; 507 + 508 + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); 509 + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); 510 + 511 + if (dc->config.forced_clocks) { 512 + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; 513 + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; 514 + } 515 + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) 516 + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; 517 + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) 518 + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; 519 + 520 + pipe_idx++; 521 + } 522 + 523 + DC_FP_START(); 524 + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); 525 + DC_FP_END(); 526 + 527 + if (!pstate_en) 528 + /* Restore full p-state latency */ 529 + context->bw_ctx.dml.soc.dram_clock_change_latency_us = 530 + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; 531 + 532 + } 533 + 534 + void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc) 535 + { 536 + dc_assert_fp_enabled(); 537 + 538 + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) 539 + dcn3_0_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; 540 + } 541 + 542 + void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk) 543 + { 544 + dc_assert_fp_enabled(); 545 + 546 + if (!dcn30_bb_max_clk->max_dcfclk_mhz) 547 + dcn30_bb_max_clk->max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz; 548 + if (!dcn30_bb_max_clk->max_dispclk_mhz) 549 + dcn30_bb_max_clk->max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz; 550 + if (!dcn30_bb_max_clk->max_dppclk_mhz) 551 + dcn30_bb_max_clk->max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz; 552 + if (!dcn30_bb_max_clk->max_phyclk_mhz) 553 + dcn30_bb_max_clk->max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz; 554 + } 555 + 556 + void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, 557 + unsigned int *optimal_dcfclk, 558 + unsigned int *optimal_fclk) 559 + { 560 + double bw_from_dram, bw_from_dram1, bw_from_dram2; 561 + 562 + dc_assert_fp_enabled(); 563 + 564 + bw_from_dram1 = uclk_mts * dcn3_0_soc.num_chans * 565 + dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_dram_bw_use_normal_percent / 100); 566 + bw_from_dram2 = uclk_mts * dcn3_0_soc.num_chans * 567 + dcn3_0_soc.dram_channel_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100); 568 + 569 + bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; 570 + 571 + if (optimal_fclk) 572 + *optimal_fclk = bw_from_dram / 573 + (dcn3_0_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); 574 + 575 + if (optimal_dcfclk) 576 + *optimal_dcfclk = bw_from_dram / 577 + (dcn3_0_soc.return_bus_width_bytes * (dcn3_0_soc.max_avg_sdp_bw_use_normal_percent / 100)); 578 + } 579 + 580 + void dcn30_fpu_update_bw_bounding_box(struct dc *dc, 581 + struct clk_bw_params *bw_params, 582 + struct dc_bounding_box_max_clk *dcn30_bb_max_clk, 583 + unsigned int *dcfclk_mhz, 584 + unsigned int *dram_speed_mts) 585 + { 586 + unsigned int i; 587 + 588 + dc_assert_fp_enabled(); 589 + 590 + dcn3_0_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; 591 + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; 592 + 593 + for (i = 0; i < dcn3_0_soc.num_states; i++) { 594 + dcn3_0_soc.clock_limits[i].state = i; 595 + dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; 596 + dcn3_0_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; 597 + dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; 598 + 599 + /* Fill all states with max values of all other clocks */ 600 + dcn3_0_soc.clock_limits[i].dispclk_mhz = dcn30_bb_max_clk->max_dispclk_mhz; 601 + dcn3_0_soc.clock_limits[i].dppclk_mhz = dcn30_bb_max_clk->max_dppclk_mhz; 602 + dcn3_0_soc.clock_limits[i].phyclk_mhz = dcn30_bb_max_clk->max_phyclk_mhz; 603 + dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz; 604 + /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */ 605 + /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ 606 + dcn3_0_soc.clock_limits[i].phyclk_d18_mhz = dcn3_0_soc.clock_limits[0].phyclk_d18_mhz; 607 + dcn3_0_soc.clock_limits[i].socclk_mhz = dcn3_0_soc.clock_limits[0].socclk_mhz; 608 + dcn3_0_soc.clock_limits[i].dscclk_mhz = dcn3_0_soc.clock_limits[0].dscclk_mhz; 609 + } 610 + /* re-init DML with updated bb */ 611 + dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); 612 + if (dc->current_state) 613 + dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); 614 + 615 + } 616 + 617 +
+67
drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
··· 1 + /* 2 + * Copyright 2020-2021 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + * Authors: AMD 23 + * 24 + */ 25 + 26 + #ifndef __DCN30_FPU_H__ 27 + #define __DCN30_FPU_H__ 28 + 29 + #include "core_types.h" 30 + #include "dcn20/dcn20_optc.h" 31 + 32 + void optc3_fpu_set_vrr_m_const(struct timing_generator *optc, 33 + double vtotal_avg); 34 + 35 + void dcn30_fpu_populate_dml_writeback_from_context( 36 + struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes); 37 + 38 + void dcn30_fpu_set_mcif_arb_params(struct mcif_arb_params *wb_arb_params, 39 + struct display_mode_lib *dml, 40 + display_e2e_pipe_params_st *pipes, 41 + int pipe_cnt, 42 + int cur_pipe); 43 + 44 + void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); 45 + 46 + void dcn30_fpu_calculate_wm_and_dlg( 47 + struct dc *dc, struct dc_state *context, 48 + display_e2e_pipe_params_st *pipes, 49 + int pipe_cnt, 50 + int vlevel); 51 + 52 + void dcn30_fpu_update_dram_channel_width_bytes(struct dc *dc); 53 + 54 + void dcn30_fpu_update_max_clk(struct dc_bounding_box_max_clk *dcn30_bb_max_clk); 55 + 56 + void dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, 57 + unsigned int *optimal_dcfclk, 58 + unsigned int *optimal_fclk); 59 + 60 + void dcn30_fpu_update_bw_bounding_box(struct dc *dc, 61 + struct clk_bw_params *bw_params, 62 + struct dc_bounding_box_max_clk *dcn30_bb_max_clk, 63 + unsigned int *dcfclk_mhz, 64 + unsigned int *dram_speed_mts); 65 + 66 + 67 + #endif /* __DCN30_FPU_H__*/
+7
drivers/gpu/drm/amd/display/dc/inc/core_types.h
··· 486 486 } perf_params; 487 487 }; 488 488 489 + struct dc_bounding_box_max_clk { 490 + int max_dcfclk_mhz; 491 + int max_dispclk_mhz; 492 + int max_dppclk_mhz; 493 + int max_phyclk_mhz; 494 + }; 495 + 489 496 #endif /* _CORE_TYPES_H_ */