Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/msm: rework inactive-work

Re-arrange things a bit so that we can get work requested after a bo
fence passes, like pageflip, done before retiring bo's. Without any
sort of bo cache in userspace, some games can trigger hundred's of
transient bo's, which can cause retire to take a long time (5-10ms).
Obviously we want a bo cache.. but this cleanup will make things a
bit easier for atomic as well and makes things a bit cleaner.

Signed-off-by: Rob Clark <robdclark@gmail.com>
Acked-by: David Brown <davidb@codeaurora.org>

Rob Clark edd4fc63 a8623918

+71 -33
+5 -6
drivers/gpu/drm/msm/mdp4/mdp4_crtc.c
··· 51 51 52 52 /* if there is a pending flip, these will be non-null: */ 53 53 struct drm_pending_vblank_event *event; 54 - struct work_struct pageflip_work; 54 + struct msm_fence_cb pageflip_cb; 55 55 56 56 /* the fb that we currently hold a scanout ref to: */ 57 57 struct drm_framebuffer *fb; ··· 132 132 mdp4_write(mdp4_kms, REG_MDP4_OVERLAY_FLUSH, flush); 133 133 } 134 134 135 - static void pageflip_worker(struct work_struct *work) 135 + static void pageflip_cb(struct msm_fence_cb *cb) 136 136 { 137 137 struct mdp4_crtc *mdp4_crtc = 138 - container_of(work, struct mdp4_crtc, pageflip_work); 138 + container_of(cb, struct mdp4_crtc, pageflip_cb); 139 139 struct drm_crtc *crtc = &mdp4_crtc->base; 140 140 141 141 mdp4_plane_set_scanout(mdp4_crtc->plane, crtc->fb); ··· 397 397 mdp4_crtc->event = event; 398 398 update_fb(crtc, true, new_fb); 399 399 400 - return msm_gem_queue_inactive_work(obj, 401 - &mdp4_crtc->pageflip_work); 400 + return msm_gem_queue_inactive_cb(obj, &mdp4_crtc->pageflip_cb); 402 401 } 403 402 404 403 static int mdp4_crtc_set_property(struct drm_crtc *crtc, ··· 701 702 ret = drm_flip_work_init(&mdp4_crtc->unref_cursor_work, 64, 702 703 "unref cursor", unref_cursor_worker); 703 704 704 - INIT_WORK(&mdp4_crtc->pageflip_work, pageflip_worker); 705 + INIT_FENCE_CB(&mdp4_crtc->pageflip_cb, pageflip_cb); 705 706 706 707 drm_crtc_init(dev, crtc, &mdp4_crtc_funcs); 707 708 drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs);
+26 -4
drivers/gpu/drm/msm/msm_drv.c
··· 187 187 init_waitqueue_head(&priv->fence_event); 188 188 189 189 INIT_LIST_HEAD(&priv->inactive_list); 190 + INIT_LIST_HEAD(&priv->fence_cbs); 190 191 191 192 drm_mode_config_init(dev); 192 193 ··· 540 539 return ret; 541 540 } 542 541 543 - /* call under struct_mutex */ 542 + /* called from workqueue */ 544 543 void msm_update_fence(struct drm_device *dev, uint32_t fence) 545 544 { 546 545 struct msm_drm_private *priv = dev->dev_private; 547 546 548 - if (fence > priv->completed_fence) { 549 - priv->completed_fence = fence; 550 - wake_up_all(&priv->fence_event); 547 + mutex_lock(&dev->struct_mutex); 548 + priv->completed_fence = max(fence, priv->completed_fence); 549 + 550 + while (!list_empty(&priv->fence_cbs)) { 551 + struct msm_fence_cb *cb; 552 + 553 + cb = list_first_entry(&priv->fence_cbs, 554 + struct msm_fence_cb, work.entry); 555 + 556 + if (cb->fence > priv->completed_fence) 557 + break; 558 + 559 + list_del_init(&cb->work.entry); 560 + queue_work(priv->wq, &cb->work); 551 561 } 562 + 563 + mutex_unlock(&dev->struct_mutex); 564 + 565 + wake_up_all(&priv->fence_event); 566 + } 567 + 568 + void __msm_fence_worker(struct work_struct *work) 569 + { 570 + struct msm_fence_cb *cb = container_of(work, struct msm_fence_cb, work); 571 + cb->func(cb); 552 572 } 553 573 554 574 /*
+19 -2
drivers/gpu/drm/msm/msm_drv.h
··· 73 73 74 74 struct workqueue_struct *wq; 75 75 76 + /* callbacks deferred until bo is inactive: */ 77 + struct list_head fence_cbs; 78 + 76 79 /* registered IOMMU domains: */ 77 80 unsigned int num_iommus; 78 81 struct iommu_domain *iommus[NUM_DOMAINS]; ··· 99 96 struct msm_format { 100 97 uint32_t pixel_format; 101 98 }; 99 + 100 + /* callback from wq once fence has passed: */ 101 + struct msm_fence_cb { 102 + struct work_struct work; 103 + uint32_t fence; 104 + void (*func)(struct msm_fence_cb *cb); 105 + }; 106 + 107 + void __msm_fence_worker(struct work_struct *work); 108 + 109 + #define INIT_FENCE_CB(_cb, _func) do { \ 110 + INIT_WORK(&(_cb)->work, __msm_fence_worker); \ 111 + (_cb)->func = _func; \ 112 + } while (0) 102 113 103 114 /* As there are different display controller blocks depending on the 104 115 * snapdragon version, the kms support is split out and the appropriate ··· 177 160 void msm_gem_prime_unpin(struct drm_gem_object *obj); 178 161 void *msm_gem_vaddr_locked(struct drm_gem_object *obj); 179 162 void *msm_gem_vaddr(struct drm_gem_object *obj); 180 - int msm_gem_queue_inactive_work(struct drm_gem_object *obj, 181 - struct work_struct *work); 163 + int msm_gem_queue_inactive_cb(struct drm_gem_object *obj, 164 + struct msm_fence_cb *cb); 182 165 void msm_gem_move_to_active(struct drm_gem_object *obj, 183 166 struct msm_gpu *gpu, bool write, uint32_t fence); 184 167 void msm_gem_move_to_inactive(struct drm_gem_object *obj);
+19 -16
drivers/gpu/drm/msm/msm_gem.c
··· 309 309 310 310 int msm_gem_get_iova(struct drm_gem_object *obj, int id, uint32_t *iova) 311 311 { 312 + struct msm_gem_object *msm_obj = to_msm_bo(obj); 312 313 int ret; 314 + 315 + /* this is safe right now because we don't unmap until the 316 + * bo is deleted: 317 + */ 318 + if (msm_obj->domain[id].iova) { 319 + *iova = msm_obj->domain[id].iova; 320 + return 0; 321 + } 322 + 313 323 mutex_lock(&obj->dev->struct_mutex); 314 324 ret = msm_gem_get_iova_locked(obj, id, iova); 315 325 mutex_unlock(&obj->dev->struct_mutex); ··· 389 379 return ret; 390 380 } 391 381 392 - int msm_gem_queue_inactive_work(struct drm_gem_object *obj, 393 - struct work_struct *work) 382 + /* setup callback for when bo is no longer busy.. 383 + * TODO probably want to differentiate read vs write.. 384 + */ 385 + int msm_gem_queue_inactive_cb(struct drm_gem_object *obj, 386 + struct msm_fence_cb *cb) 394 387 { 395 388 struct drm_device *dev = obj->dev; 396 389 struct msm_drm_private *priv = dev->dev_private; ··· 401 388 int ret = 0; 402 389 403 390 mutex_lock(&dev->struct_mutex); 404 - if (!list_empty(&work->entry)) { 391 + if (!list_empty(&cb->work.entry)) { 405 392 ret = -EINVAL; 406 393 } else if (is_active(msm_obj)) { 407 - list_add_tail(&work->entry, &msm_obj->inactive_work); 394 + cb->fence = max(msm_obj->read_fence, msm_obj->write_fence); 395 + list_add_tail(&cb->work.entry, &priv->fence_cbs); 408 396 } else { 409 - queue_work(priv->wq, work); 397 + queue_work(priv->wq, &cb->work); 410 398 } 411 399 mutex_unlock(&dev->struct_mutex); 412 400 ··· 440 426 msm_obj->write_fence = 0; 441 427 list_del_init(&msm_obj->mm_list); 442 428 list_add_tail(&msm_obj->mm_list, &priv->inactive_list); 443 - 444 - while (!list_empty(&msm_obj->inactive_work)) { 445 - struct work_struct *work; 446 - 447 - work = list_first_entry(&msm_obj->inactive_work, 448 - struct work_struct, entry); 449 - 450 - list_del_init(&work->entry); 451 - queue_work(priv->wq, work); 452 - } 453 429 } 454 430 455 431 int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ··· 608 604 reservation_object_init(msm_obj->resv); 609 605 610 606 INIT_LIST_HEAD(&msm_obj->submit_entry); 611 - INIT_LIST_HEAD(&msm_obj->inactive_work); 612 607 list_add_tail(&msm_obj->mm_list, &priv->inactive_list); 613 608 614 609 *obj = &msm_obj->base;
-3
drivers/gpu/drm/msm/msm_gem.h
··· 45 45 */ 46 46 struct list_head submit_entry; 47 47 48 - /* work defered until bo is inactive: */ 49 - struct list_head inactive_work; 50 - 51 48 struct page **pages; 52 49 struct sg_table *sgt; 53 50 void *vaddr;
+2 -2
drivers/gpu/drm/msm/msm_gpu.c
··· 268 268 struct drm_device *dev = gpu->dev; 269 269 uint32_t fence = gpu->funcs->last_fence(gpu); 270 270 271 + msm_update_fence(gpu->dev, fence); 272 + 271 273 mutex_lock(&dev->struct_mutex); 272 274 273 275 while (!list_empty(&gpu->active_list)) { ··· 288 286 break; 289 287 } 290 288 } 291 - 292 - msm_update_fence(gpu->dev, fence); 293 289 294 290 mutex_unlock(&dev->struct_mutex); 295 291 }