Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/msm: Avoid dirtyfb stalls on video mode displays (v2)

Someone on IRC once asked an innocent enough sounding question: Why
with xf86-video-modesetting is es2gears limited at 120fps.

So I broke out the perfetto tracing mesa MR and took a look. It turns
out the problem was drm_atomic_helper_dirtyfb(), which would end up
waiting for vblank.. es2gears would rapidly push two frames to Xorg,
which would blit them to screen and in idle hook (I assume) call the
DIRTYFB ioctl. Which in turn would do an atomic update to flush the
dirty rects, which would stall until the next vblank. And then the
whole process would repeat.

But this is a bit silly, we only need dirtyfb for command mode DSI
panels. So track in plane state whether dirtyfb is required, and
track in the fb how many attached planes require dirtyfb so that we
can skip it when not required. (Note, mdp4 does not actually have
cmd mode support.)

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20220223191118.881321-1-robdclark@gmail.com
Signed-off-by: Rob Clark <robdclark@chromium.org>

+110 -33
+19 -1
drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
··· 1046 1046 u32 pipe_id; 1047 1047 }; 1048 1048 1049 + static bool dpu_crtc_needs_dirtyfb(struct drm_crtc_state *cstate) 1050 + { 1051 + struct drm_crtc *crtc = cstate->crtc; 1052 + struct drm_encoder *encoder; 1053 + 1054 + drm_for_each_encoder_mask (encoder, crtc->dev, cstate->encoder_mask) { 1055 + if (dpu_encoder_get_intf_mode(encoder) == INTF_MODE_CMD) { 1056 + return true; 1057 + } 1058 + } 1059 + 1060 + return false; 1061 + } 1062 + 1049 1063 static int dpu_crtc_atomic_check(struct drm_crtc *crtc, 1050 1064 struct drm_atomic_state *state) 1051 1065 { ··· 1080 1066 const struct drm_plane_state *pipe_staged[SSPP_MAX]; 1081 1067 int left_zpos_cnt = 0, right_zpos_cnt = 0; 1082 1068 struct drm_rect crtc_rect = { 0 }; 1069 + bool needs_dirtyfb = dpu_crtc_needs_dirtyfb(crtc_state); 1083 1070 1084 1071 pstates = kzalloc(sizeof(*pstates) * DPU_STAGE_MAX * 4, GFP_KERNEL); 1085 1072 ··· 1112 1097 1113 1098 /* get plane state for all drm planes associated with crtc state */ 1114 1099 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { 1100 + struct dpu_plane_state *dpu_pstate = to_dpu_plane_state(pstate); 1115 1101 struct drm_rect dst, clip = crtc_rect; 1116 1102 1117 1103 if (IS_ERR_OR_NULL(pstate)) { ··· 1124 1108 if (cnt >= DPU_STAGE_MAX * 4) 1125 1109 continue; 1126 1110 1127 - pstates[cnt].dpu_pstate = to_dpu_plane_state(pstate); 1111 + pstates[cnt].dpu_pstate = dpu_pstate; 1128 1112 pstates[cnt].drm_pstate = pstate; 1129 1113 pstates[cnt].stage = pstate->normalized_zpos; 1130 1114 pstates[cnt].pipe_id = dpu_plane_pipe(plane); 1115 + 1116 + dpu_pstate->needs_dirtyfb = needs_dirtyfb; 1131 1117 1132 1118 if (pipe_staged[pstates[cnt].pipe_id]) { 1133 1119 multirect_plane[multirect_count].r0 =
+3 -2
drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
··· 902 902 903 903 if (pstate->aspace) { 904 904 ret = msm_framebuffer_prepare(new_state->fb, 905 - pstate->aspace); 905 + pstate->aspace, pstate->needs_dirtyfb); 906 906 if (ret) { 907 907 DPU_ERROR("failed to prepare framebuffer\n"); 908 908 return ret; ··· 933 933 934 934 DPU_DEBUG_PLANE(pdpu, "FB[%u]\n", old_state->fb->base.id); 935 935 936 - msm_framebuffer_cleanup(old_state->fb, old_pstate->aspace); 936 + msm_framebuffer_cleanup(old_state->fb, old_pstate->aspace, 937 + old_pstate->needs_dirtyfb); 937 938 } 938 939 939 940 static bool dpu_plane_validate_src(struct drm_rect *src,
+3
drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
··· 25 25 * @pending: whether the current update is still pending 26 26 * @plane_fetch_bw: calculated BW per plane 27 27 * @plane_clk: calculated clk per plane 28 + * @needs_dirtyfb: whether attached CRTC needs pixel data explicitly flushed 28 29 */ 29 30 struct dpu_plane_state { 30 31 struct drm_plane_state base; ··· 38 37 39 38 u64 plane_fetch_bw; 40 39 u64 plane_clk; 40 + 41 + bool needs_dirtyfb; 41 42 }; 42 43 43 44 /**
+17 -2
drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c
··· 7 7 #include <drm/drm_atomic.h> 8 8 #include <drm/drm_damage_helper.h> 9 9 #include <drm/drm_fourcc.h> 10 + #include <drm/drm_gem_atomic_helper.h> 10 11 11 12 #include "mdp4_kms.h" 12 13 ··· 91 90 .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, 92 91 }; 93 92 93 + static int mdp4_plane_prepare_fb(struct drm_plane *plane, 94 + struct drm_plane_state *new_state) 95 + { 96 + struct msm_drm_private *priv = plane->dev->dev_private; 97 + struct msm_kms *kms = priv->kms; 98 + 99 + if (!new_state->fb) 100 + return 0; 101 + 102 + drm_gem_plane_helper_prepare_fb(plane, new_state); 103 + 104 + return msm_framebuffer_prepare(new_state->fb, kms->aspace, false); 105 + } 106 + 94 107 static void mdp4_plane_cleanup_fb(struct drm_plane *plane, 95 108 struct drm_plane_state *old_state) 96 109 { ··· 117 102 return; 118 103 119 104 DBG("%s: cleanup: FB[%u]", mdp4_plane->name, fb->base.id); 120 - msm_framebuffer_cleanup(fb, kms->aspace); 105 + msm_framebuffer_cleanup(fb, kms->aspace, false); 121 106 } 122 107 123 108 ··· 145 130 } 146 131 147 132 static const struct drm_plane_helper_funcs mdp4_plane_helper_funcs = { 148 - .prepare_fb = msm_atomic_prepare_fb, 133 + .prepare_fb = mdp4_plane_prepare_fb, 149 134 .cleanup_fb = mdp4_plane_cleanup_fb, 150 135 .atomic_check = mdp4_plane_atomic_check, 151 136 .atomic_update = mdp4_plane_atomic_update,
+8
drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
··· 690 690 { 691 691 struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, 692 692 crtc); 693 + struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc_state); 694 + struct mdp5_interface *intf = mdp5_cstate->pipeline.intf; 693 695 struct mdp5_kms *mdp5_kms = get_kms(crtc); 694 696 struct drm_plane *plane; 695 697 struct drm_device *dev = crtc->dev; ··· 708 706 DBG("%s: check", crtc->name); 709 707 710 708 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { 709 + struct mdp5_plane_state *mdp5_pstate = 710 + to_mdp5_plane_state(pstate); 711 + 711 712 if (!pstate->visible) 712 713 continue; 713 714 714 715 pstates[cnt].plane = plane; 715 716 pstates[cnt].state = to_mdp5_plane_state(pstate); 717 + 718 + mdp5_pstate->needs_dirtyfb = 719 + intf->mode == MDP5_INTF_DSI_MODE_COMMAND; 716 720 717 721 /* 718 722 * if any plane on this crtc uses 2 hwpipes, then we need
+5
drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h
··· 100 100 101 101 /* assigned by crtc blender */ 102 102 enum mdp_mixer_stage_id stage; 103 + 104 + /* whether attached CRTC needs pixel data explicitly flushed to 105 + * display (ex. DSI command mode display) 106 + */ 107 + bool needs_dirtyfb; 103 108 }; 104 109 #define to_mdp5_plane_state(x) \ 105 110 container_of(x, struct mdp5_plane_state, base)
+19 -2
drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
··· 8 8 #include <drm/drm_atomic.h> 9 9 #include <drm/drm_damage_helper.h> 10 10 #include <drm/drm_fourcc.h> 11 + #include <drm/drm_gem_atomic_helper.h> 11 12 #include <drm/drm_print.h> 12 13 13 14 #include "mdp5_kms.h" ··· 141 140 .atomic_print_state = mdp5_plane_atomic_print_state, 142 141 }; 143 142 143 + static int mdp5_plane_prepare_fb(struct drm_plane *plane, 144 + struct drm_plane_state *new_state) 145 + { 146 + struct msm_drm_private *priv = plane->dev->dev_private; 147 + struct msm_kms *kms = priv->kms; 148 + bool needs_dirtyfb = to_mdp5_plane_state(new_state)->needs_dirtyfb; 149 + 150 + if (!new_state->fb) 151 + return 0; 152 + 153 + drm_gem_plane_helper_prepare_fb(plane, new_state); 154 + 155 + return msm_framebuffer_prepare(new_state->fb, kms->aspace, needs_dirtyfb); 156 + } 157 + 144 158 static void mdp5_plane_cleanup_fb(struct drm_plane *plane, 145 159 struct drm_plane_state *old_state) 146 160 { 147 161 struct mdp5_kms *mdp5_kms = get_kms(plane); 148 162 struct msm_kms *kms = &mdp5_kms->base.base; 149 163 struct drm_framebuffer *fb = old_state->fb; 164 + bool needed_dirtyfb = to_mdp5_plane_state(old_state)->needs_dirtyfb; 150 165 151 166 if (!fb) 152 167 return; 153 168 154 169 DBG("%s: cleanup: FB[%u]", plane->name, fb->base.id); 155 - msm_framebuffer_cleanup(fb, kms->aspace); 170 + msm_framebuffer_cleanup(fb, kms->aspace, needed_dirtyfb); 156 171 } 157 172 158 173 static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state, ··· 454 437 } 455 438 456 439 static const struct drm_plane_helper_funcs mdp5_plane_helper_funcs = { 457 - .prepare_fb = msm_atomic_prepare_fb, 440 + .prepare_fb = mdp5_plane_prepare_fb, 458 441 .cleanup_fb = mdp5_plane_cleanup_fb, 459 442 .atomic_check = mdp5_plane_atomic_check, 460 443 .atomic_update = mdp5_plane_atomic_update,
-15
drivers/gpu/drm/msm/msm_atomic.c
··· 5 5 */ 6 6 7 7 #include <drm/drm_atomic_uapi.h> 8 - #include <drm/drm_gem_atomic_helper.h> 9 8 #include <drm/drm_vblank.h> 10 9 11 10 #include "msm_atomic_trace.h" 12 11 #include "msm_drv.h" 13 12 #include "msm_gem.h" 14 13 #include "msm_kms.h" 15 - 16 - int msm_atomic_prepare_fb(struct drm_plane *plane, 17 - struct drm_plane_state *new_state) 18 - { 19 - struct msm_drm_private *priv = plane->dev->dev_private; 20 - struct msm_kms *kms = priv->kms; 21 - 22 - if (!new_state->fb) 23 - return 0; 24 - 25 - drm_gem_plane_helper_prepare_fb(plane, new_state); 26 - 27 - return msm_framebuffer_prepare(new_state->fb, kms->aspace); 28 - } 29 14 30 15 /* 31 16 * Helpers to control vblanks while we flush.. basically just to ensure
+2 -4
drivers/gpu/drm/msm/msm_drv.h
··· 239 239 240 240 struct msm_pending_timer; 241 241 242 - int msm_atomic_prepare_fb(struct drm_plane *plane, 243 - struct drm_plane_state *new_state); 244 242 int msm_atomic_init_pending_timer(struct msm_pending_timer *timer, 245 243 struct msm_kms *kms, int crtc_idx); 246 244 void msm_atomic_destroy_pending_timer(struct msm_pending_timer *timer); ··· 297 299 void msm_gem_prime_unpin(struct drm_gem_object *obj); 298 300 299 301 int msm_framebuffer_prepare(struct drm_framebuffer *fb, 300 - struct msm_gem_address_space *aspace); 302 + struct msm_gem_address_space *aspace, bool needs_dirtyfb); 301 303 void msm_framebuffer_cleanup(struct drm_framebuffer *fb, 302 - struct msm_gem_address_space *aspace); 304 + struct msm_gem_address_space *aspace, bool needed_dirtyfb); 303 305 uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb, 304 306 struct msm_gem_address_space *aspace, int plane); 305 307 struct drm_gem_object *msm_framebuffer_bo(struct drm_framebuffer *fb, int plane);
+34 -7
drivers/gpu/drm/msm/msm_fb.c
··· 18 18 struct msm_framebuffer { 19 19 struct drm_framebuffer base; 20 20 const struct msm_format *format; 21 + 22 + /* Count of # of attached planes which need dirtyfb: */ 23 + refcount_t dirtyfb; 21 24 }; 22 25 #define to_msm_framebuffer(x) container_of(x, struct msm_framebuffer, base) 23 26 24 27 static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, 25 28 const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); 26 29 30 + static int msm_framebuffer_dirtyfb(struct drm_framebuffer *fb, 31 + struct drm_file *file_priv, unsigned int flags, 32 + unsigned int color, struct drm_clip_rect *clips, 33 + unsigned int num_clips) 34 + { 35 + struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb); 36 + 37 + /* If this fb is not used on any display requiring pixel data to be 38 + * flushed, then skip dirtyfb 39 + */ 40 + if (refcount_read(&msm_fb->dirtyfb) == 0) 41 + return 0; 42 + 43 + return drm_atomic_helper_dirtyfb(fb, file_priv, flags, color, 44 + clips, num_clips); 45 + } 46 + 27 47 static const struct drm_framebuffer_funcs msm_framebuffer_funcs = { 28 48 .create_handle = drm_gem_fb_create_handle, 29 49 .destroy = drm_gem_fb_destroy, 30 - .dirty = drm_atomic_helper_dirtyfb, 50 + .dirty = msm_framebuffer_dirtyfb, 31 51 }; 32 52 33 53 #ifdef CONFIG_DEBUG_FS ··· 68 48 } 69 49 #endif 70 50 71 - /* prepare/pin all the fb's bo's for scanout. Note that it is not valid 72 - * to prepare an fb more multiple different initiator 'id's. But that 73 - * should be fine, since only the scanout (mdpN) side of things needs 74 - * this, the gpu doesn't care about fb's. 51 + /* prepare/pin all the fb's bo's for scanout. 75 52 */ 76 53 int msm_framebuffer_prepare(struct drm_framebuffer *fb, 77 - struct msm_gem_address_space *aspace) 54 + struct msm_gem_address_space *aspace, 55 + bool needs_dirtyfb) 78 56 { 57 + struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb); 79 58 int ret, i, n = fb->format->num_planes; 80 59 uint64_t iova; 60 + 61 + if (needs_dirtyfb) 62 + refcount_inc(&msm_fb->dirtyfb); 81 63 82 64 for (i = 0; i < n; i++) { 83 65 ret = msm_gem_get_and_pin_iova(fb->obj[i], aspace, &iova); ··· 92 70 } 93 71 94 72 void msm_framebuffer_cleanup(struct drm_framebuffer *fb, 95 - struct msm_gem_address_space *aspace) 73 + struct msm_gem_address_space *aspace, 74 + bool needed_dirtyfb) 96 75 { 76 + struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb); 97 77 int i, n = fb->format->num_planes; 78 + 79 + if (needed_dirtyfb) 80 + refcount_dec(&msm_fb->dirtyfb); 98 81 99 82 for (i = 0; i < n; i++) 100 83 msm_gem_unpin_iova(fb->obj[i], aspace);