drm/amd/display: Fix unsafe uses of kernel mode FPU

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

The point of isolating code that uses kernel mode FPU in separate
compilation units is to ensure that even implicit uses of, e.g., SIMD
registers for spilling occur only in a context where this is permitted,
i.e., from inside a kernel_fpu_begin/end block.

This is important on arm64, which uses -mgeneral-regs-only to build all
kernel code, with the exception of such compilation units where FP or
SIMD registers are expected to be used. Given that the compiler may
invent uses of FP/SIMD anywhere in such a unit, none of its code may be
accessible from outside a kernel_fpu_begin/end block.

This means that all callers into such compilation units must use the
DC_FP start/end macros, which must not occur there themselves. For
robustness, all functions with external linkage that reside there should
call dc_assert_fp_enabled() to assert that the FPU context was set up
correctly.

Fix this for the DCN35, DCN351 and DCN36 implementations.

Cc: Austin Zheng <austin.zheng@amd.com>
Cc: Jun Lei <jun.lei@amd.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Leo Li <sunpeng.li@amd.com>
Cc: Rodrigo Siqueira <siqueira@igalia.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Ard Biesheuvel and committed by

Alex Deucher 6 months ago ddbfac15 bd8acfcf

+56 -7

6 changed files

expand all

drivers

gpu

drm

amd

display

dml

dcn31

dcn31_fpu.c

dcn35

dcn35_fpu.c

dcn351

dcn351_fpu.c

resource

dcn35

dcn35_resource.c

dcn351

dcn351_resource.c

dcn36

dcn36_resource.c

drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c

··· 808 808 809 809 int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc) 810 810 { 811 + dc_assert_fp_enabled(); 812 + 811 813 return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0); 812 814 } 813 815 ··· 817 815 struct _vcs_dpi_soc_bounding_box_st *soc, 818 816 int pix_clk_100hz, int bpp, int seg_size_kb) 819 817 { 818 + dc_assert_fp_enabled(); 819 + 820 820 /* Roughly calculate required crb to hide latency. In practice there is slightly 821 821 * more buffer available for latency hiding 822 822 */

+4 -2

drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c

··· 445 445 bool upscaled = false; 446 446 const unsigned int max_allowed_vblank_nom = 1023; 447 447 448 + dc_assert_fp_enabled(); 449 + 448 450 dcn31_populate_dml_pipes_from_context(dc, context, pipes, 449 451 validate_mode); 450 452 ··· 500 498 501 499 pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; 502 500 503 - DC_FP_START(); 504 501 dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); 505 - DC_FP_END(); 506 502 507 503 pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; 508 504 pipes[pipe_cnt].pipe.src.dcc_rate = 3; ··· 580 580 enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW; 581 581 unsigned int i, plane_count = 0; 582 582 DC_LOGGER_INIT(dc->ctx->logger); 583 + 584 + dc_assert_fp_enabled(); 583 585 584 586 for (i = 0; i < dc->res_pool->pipe_count; i++) { 585 587 if (context->res_ctx.pipe_ctx[i].plane_state)

+2 -2

drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c

··· 478 478 bool upscaled = false; 479 479 const unsigned int max_allowed_vblank_nom = 1023; 480 480 481 + dc_assert_fp_enabled(); 482 + 481 483 dcn31_populate_dml_pipes_from_context(dc, context, pipes, 482 484 validate_mode); 483 485 ··· 533 531 534 532 pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; 535 533 536 - DC_FP_START(); 537 534 dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); 538 - DC_FP_END(); 539 535 540 536 pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; 541 537 pipes[pipe_cnt].pipe.src.dcc_rate = 3;

+15 -1

drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c

··· 1760 1760 } 1761 1761 1762 1762 1763 + static int populate_dml_pipes_from_context_fpu(struct dc *dc, 1764 + struct dc_state *context, 1765 + display_e2e_pipe_params_st *pipes, 1766 + enum dc_validate_mode validate_mode) 1767 + { 1768 + int ret; 1769 + 1770 + DC_FP_START(); 1771 + ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); 1772 + DC_FP_END(); 1773 + 1774 + return ret; 1775 + } 1776 + 1763 1777 static struct resource_funcs dcn35_res_pool_funcs = { 1764 1778 .destroy = dcn35_destroy_resource_pool, 1765 1779 .link_enc_create = dcn35_link_encoder_create, ··· 1784 1770 .validate_bandwidth = dcn35_validate_bandwidth, 1785 1771 .calculate_wm_and_dlg = NULL, 1786 1772 .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, 1787 - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, 1773 + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, 1788 1774 .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, 1789 1775 .release_pipe = dcn20_release_pipe, 1790 1776 .add_stream_to_ctx = dcn30_add_stream_to_ctx,

+16 -1

drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c

··· 1732 1732 return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; 1733 1733 } 1734 1734 1735 + static int populate_dml_pipes_from_context_fpu(struct dc *dc, 1736 + struct dc_state *context, 1737 + display_e2e_pipe_params_st *pipes, 1738 + enum dc_validate_mode validate_mode) 1739 + { 1740 + int ret; 1741 + 1742 + DC_FP_START(); 1743 + ret = dcn351_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); 1744 + DC_FP_END(); 1745 + 1746 + return ret; 1747 + 1748 + } 1749 + 1735 1750 static struct resource_funcs dcn351_res_pool_funcs = { 1736 1751 .destroy = dcn351_destroy_resource_pool, 1737 1752 .link_enc_create = dcn35_link_encoder_create, ··· 1757 1742 .validate_bandwidth = dcn351_validate_bandwidth, 1758 1743 .calculate_wm_and_dlg = NULL, 1759 1744 .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, 1760 - .populate_dml_pipes = dcn351_populate_dml_pipes_from_context_fpu, 1745 + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, 1761 1746 .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, 1762 1747 .release_pipe = dcn20_release_pipe, 1763 1748 .add_stream_to_ctx = dcn30_add_stream_to_ctx,

+15 -1

drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c

··· 1734 1734 } 1735 1735 1736 1736 1737 + static int populate_dml_pipes_from_context_fpu(struct dc *dc, 1738 + struct dc_state *context, 1739 + display_e2e_pipe_params_st *pipes, 1740 + enum dc_validate_mode validate_mode) 1741 + { 1742 + int ret; 1743 + 1744 + DC_FP_START(); 1745 + ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); 1746 + DC_FP_END(); 1747 + 1748 + return ret; 1749 + } 1750 + 1737 1751 static struct resource_funcs dcn36_res_pool_funcs = { 1738 1752 .destroy = dcn36_destroy_resource_pool, 1739 1753 .link_enc_create = dcn35_link_encoder_create, ··· 1758 1744 .validate_bandwidth = dcn35_validate_bandwidth, 1759 1745 .calculate_wm_and_dlg = NULL, 1760 1746 .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, 1761 - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, 1747 + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, 1762 1748 .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, 1763 1749 .release_pipe = dcn20_release_pipe, 1764 1750 .add_stream_to_ctx = dcn30_add_stream_to_ctx,