drm/vc4: Remove BOs seqnos · tjh.dev/kernel@e8941ac

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

drm/vc4: Remove BOs seqnos

`bo->seqno`, `bo->write_seqno`, and `exec->bin_dep_seqno` are leftovers
from a time when VC4 didn't support DMA Reservation Objects. Before DMA
Resv was introduced, tracking the correspondence between BOs and jobs
through the job's seqno made sense.

However, this is no longer needed, as VC4 now supports DMA Reservation
Objects and attaches the "job done" fence to the BOs. Therefore, remove
the BOs seqnos in favor of using DMA Resv Objects.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Melissa Wen <mwen@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241220134204.634577-4-mcanal@igalia.com

Maíra Canal 1 year ago e8941ac9 21bd85f8

+17 -113

4 changed files

expand all

drivers

gpu

drm

vc4

vc4_crtc.c

vc4_drv.h

vc4_gem.c

vc4_validate.c

+15 -18

drivers/gpu/drm/vc4/vc4_crtc.c

··· 884 884 struct drm_framebuffer *fb; 885 885 struct drm_framebuffer *old_fb; 886 886 struct drm_pending_vblank_event *event; 887 - 888 - union { 889 - struct dma_fence_cb fence; 890 - struct vc4_seqno_cb seqno; 891 - } cb; 887 + struct dma_fence_cb cb; 892 888 }; 893 889 894 890 /* Called when the V3D execution for the BO being flipped to is done, so that ··· 915 919 kfree(flip_state); 916 920 } 917 921 918 - static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb) 922 + static void vc4_async_page_flip_complete_with_cleanup(struct dma_fence *fence, 923 + struct dma_fence_cb *cb) 919 924 { 920 925 struct vc4_async_flip_state *flip_state = 921 - container_of(cb, struct vc4_async_flip_state, cb.seqno); 926 + container_of(cb, struct vc4_async_flip_state, cb); 922 927 struct vc4_bo *bo = NULL; 923 928 924 929 if (flip_state->old_fb) { ··· 929 932 } 930 933 931 934 vc4_async_page_flip_complete(flip_state); 935 + dma_fence_put(fence); 932 936 933 937 /* 934 938 * Decrement the BO usecnt in order to keep the inc/dec ··· 948 950 struct dma_fence_cb *cb) 949 951 { 950 952 struct vc4_async_flip_state *flip_state = 951 - container_of(cb, struct vc4_async_flip_state, cb.fence); 953 + container_of(cb, struct vc4_async_flip_state, cb); 952 954 953 955 vc4_async_page_flip_complete(flip_state); 954 956 dma_fence_put(fence); ··· 959 961 { 960 962 struct drm_framebuffer *fb = flip_state->fb; 961 963 struct drm_gem_dma_object *dma_bo = drm_fb_dma_get_gem_obj(fb, 0); 964 + dma_fence_func_t async_page_flip_complete_function; 962 965 struct vc4_dev *vc4 = to_vc4_dev(dev); 963 966 struct dma_fence *fence; 964 967 int ret; 965 968 966 - if (vc4->gen == VC4_GEN_4) { 967 - struct vc4_bo *bo = to_vc4_bo(&dma_bo->base); 968 - 969 - return vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno, 970 - vc4_async_page_flip_seqno_complete); 971 - } 969 + if (vc4->gen == VC4_GEN_4) 970 + async_page_flip_complete_function = vc4_async_page_flip_complete_with_cleanup; 971 + else 972 + async_page_flip_complete_function = vc4_async_page_flip_fence_complete; 972 973 973 974 ret = dma_resv_get_singleton(dma_bo->base.resv, DMA_RESV_USAGE_READ, &fence); 974 975 if (ret) ··· 975 978 976 979 /* If there's no fence, complete the page flip immediately */ 977 980 if (!fence) { 978 - vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence); 981 + async_page_flip_complete_function(fence, &flip_state->cb); 979 982 return 0; 980 983 } 981 984 982 985 /* If the fence has already been completed, complete the page flip */ 983 - if (dma_fence_add_callback(fence, &flip_state->cb.fence, 984 - vc4_async_page_flip_fence_complete)) 985 - vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence); 986 + if (dma_fence_add_callback(fence, &flip_state->cb, 987 + async_page_flip_complete_function)) 988 + async_page_flip_complete_function(fence, &flip_state->cb); 986 989 987 990 return 0; 988 991 }

-27

drivers/gpu/drm/vc4/vc4_drv.h

··· 186 186 */ 187 187 struct vc4_perfmon *active_perfmon; 188 188 189 - /* List of struct vc4_seqno_cb for callbacks to be made from a 190 - * workqueue when the given seqno is passed. 191 - */ 192 - struct list_head seqno_cb_list; 193 - 194 189 /* The memory used for storing binner tile alloc, tile state, 195 190 * and overflow memory allocations. This is freed when V3D 196 191 * powers down. ··· 242 247 struct vc4_bo { 243 248 struct drm_gem_dma_object base; 244 249 245 - /* seqno of the last job to render using this BO. */ 246 - uint64_t seqno; 247 - 248 - /* seqno of the last job to use the RCL to write to this BO. 249 - * 250 - * Note that this doesn't include binner overflow memory 251 - * writes. 252 - */ 253 - uint64_t write_seqno; 254 - 255 250 bool t_format; 256 251 257 252 /* List entry for the BO's position in either ··· 288 303 289 304 #define to_vc4_fence(_fence) \ 290 305 container_of_const(_fence, struct vc4_fence, base) 291 - 292 - struct vc4_seqno_cb { 293 - struct work_struct work; 294 - uint64_t seqno; 295 - void (*func)(struct vc4_seqno_cb *cb); 296 - }; 297 306 298 307 struct vc4_v3d { 299 308 struct vc4_dev *vc4; ··· 674 695 /* Sequence number for this bin/render job. */ 675 696 uint64_t seqno; 676 697 677 - /* Latest write_seqno of any BO that binning depends on. */ 678 - uint64_t bin_dep_seqno; 679 - 680 698 struct dma_fence *fence; 681 699 682 700 /* Last current addresses the hardware was processing when the ··· 1001 1025 int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, 1002 1026 uint64_t timeout_ns, bool interruptible); 1003 1027 void vc4_job_handle_completed(struct vc4_dev *vc4); 1004 - int vc4_queue_seqno_cb(struct drm_device *dev, 1005 - struct vc4_seqno_cb *cb, uint64_t seqno, 1006 - void (*func)(struct vc4_seqno_cb *cb)); 1007 1028 int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, 1008 1029 struct drm_file *file_priv); 1009 1030

+2 -57

drivers/gpu/drm/vc4/vc4_gem.c

··· 553 553 } 554 554 555 555 static void 556 - vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) 556 + vc4_attach_fences(struct vc4_exec_info *exec) 557 557 { 558 558 struct vc4_bo *bo; 559 559 unsigned i; 560 560 561 561 for (i = 0; i < exec->bo_count; i++) { 562 562 bo = to_vc4_bo(exec->bo[i]); 563 - bo->seqno = seqno; 564 - 565 563 dma_resv_add_fence(bo->base.base.resv, exec->fence, 566 564 DMA_RESV_USAGE_READ); 567 565 } 568 566 569 - list_for_each_entry(bo, &exec->unref_list, unref_head) { 570 - bo->seqno = seqno; 571 - } 572 - 573 567 for (i = 0; i < exec->rcl_write_bo_count; i++) { 574 568 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); 575 - bo->write_seqno = seqno; 576 - 577 569 dma_resv_add_fence(bo->base.base.resv, exec->fence, 578 570 DMA_RESV_USAGE_WRITE); 579 571 } ··· 639 647 if (out_sync) 640 648 drm_syncobj_replace_fence(out_sync, exec->fence); 641 649 642 - vc4_update_bo_seqnos(exec, seqno); 650 + vc4_attach_fences(exec); 643 651 644 652 drm_exec_fini(exec_ctx); 645 653 ··· 837 845 goto fail; 838 846 } 839 847 840 - /* Block waiting on any previous rendering into the CS's VBO, 841 - * IB, or textures, so that pixels are actually written by the 842 - * time we try to read them. 843 - */ 844 - ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); 845 - 846 848 fail: 847 849 kvfree(temp); 848 850 return ret; ··· 895 909 vc4_job_handle_completed(struct vc4_dev *vc4) 896 910 { 897 911 unsigned long irqflags; 898 - struct vc4_seqno_cb *cb, *cb_temp; 899 912 900 913 if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) 901 914 return; ··· 911 926 spin_lock_irqsave(&vc4->job_lock, irqflags); 912 927 } 913 928 914 - list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { 915 - if (cb->seqno <= vc4->finished_seqno) { 916 - list_del_init(&cb->work.entry); 917 - schedule_work(&cb->work); 918 - } 919 - } 920 - 921 929 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 922 - } 923 - 924 - static void vc4_seqno_cb_work(struct work_struct *work) 925 - { 926 - struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); 927 - 928 - cb->func(cb); 929 - } 930 - 931 - int vc4_queue_seqno_cb(struct drm_device *dev, 932 - struct vc4_seqno_cb *cb, uint64_t seqno, 933 - void (*func)(struct vc4_seqno_cb *cb)) 934 - { 935 - struct vc4_dev *vc4 = to_vc4_dev(dev); 936 - unsigned long irqflags; 937 - 938 - if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) 939 - return -ENODEV; 940 - 941 - cb->func = func; 942 - INIT_WORK(&cb->work, vc4_seqno_cb_work); 943 - 944 - spin_lock_irqsave(&vc4->job_lock, irqflags); 945 - if (seqno > vc4->finished_seqno) { 946 - cb->seqno = seqno; 947 - list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); 948 - } else { 949 - schedule_work(&cb->work); 950 - } 951 - spin_unlock_irqrestore(&vc4->job_lock, irqflags); 952 - 953 - return 0; 954 930 } 955 931 956 932 /* Scheduled when any job has been completed, this walks the list of ··· 1167 1221 INIT_LIST_HEAD(&vc4->bin_job_list); 1168 1222 INIT_LIST_HEAD(&vc4->render_job_list); 1169 1223 INIT_LIST_HEAD(&vc4->job_done_list); 1170 - INIT_LIST_HEAD(&vc4->seqno_cb_list); 1171 1224 spin_lock_init(&vc4->job_lock); 1172 1225 1173 1226 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);

-11

drivers/gpu/drm/vc4/vc4_validate.c

··· 284 284 if (!ib) 285 285 return -EINVAL; 286 286 287 - exec->bin_dep_seqno = max(exec->bin_dep_seqno, 288 - to_vc4_bo(&ib->base)->write_seqno); 289 - 290 287 if (offset > ib->base.size || 291 288 (ib->base.size - offset) / index_size < length) { 292 289 DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n", ··· 735 738 736 739 *validated_p0 = tex->dma_addr + p0; 737 740 738 - if (is_cs) { 739 - exec->bin_dep_seqno = max(exec->bin_dep_seqno, 740 - to_vc4_bo(&tex->base)->write_seqno); 741 - } 742 - 743 741 return true; 744 742 fail: 745 743 DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0); ··· 895 903 uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1; 896 904 uint32_t stride = *(uint8_t *)(pkt_u + o + 5); 897 905 uint32_t max_index; 898 - 899 - exec->bin_dep_seqno = max(exec->bin_dep_seqno, 900 - to_vc4_bo(&vbo->base)->write_seqno); 901 906 902 907 if (state->addr & 0x8) 903 908 stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;