Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/vc4: add tracepoints for CL submissions

Trace submit_cl_ioctl and related IRQs for CL submission and bin/render
jobs execution. It might be helpful to get a rendering timeline and
track job throttling.

Signed-off-by: Melissa Wen <mwen@igalia.com>
Reviewed-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20220201212651.zhltjmaokisffq3x@mail.igalia.com

authored by

Melissa Wen and committed by
Maxime Ripard
044feb97 4442ac1a

+107
+7
drivers/gpu/drm/vc4/vc4_gem.c
··· 485 485 * immediately move it to the to-be-rendered queue. 486 486 */ 487 487 if (exec->ct0ca != exec->ct0ea) { 488 + trace_vc4_submit_cl(dev, false, exec->seqno, exec->ct0ca, 489 + exec->ct0ea); 488 490 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); 489 491 } else { 490 492 struct vc4_exec_info *next; ··· 521 519 */ 522 520 vc4_flush_texture_caches(dev); 523 521 522 + trace_vc4_submit_cl(dev, true, exec->seqno, exec->ct1ca, exec->ct1ea); 524 523 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); 525 524 } 526 525 ··· 1137 1134 struct ww_acquire_ctx acquire_ctx; 1138 1135 struct dma_fence *in_fence; 1139 1136 int ret = 0; 1137 + 1138 + trace_vc4_submit_cl_ioctl(dev, args->bin_cl_size, 1139 + args->shader_rec_size, 1140 + args->bo_handle_count); 1140 1141 1141 1142 if (!vc4->v3d) { 1142 1143 DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n");
+5
drivers/gpu/drm/vc4/vc4_irq.c
··· 51 51 52 52 #include "vc4_drv.h" 53 53 #include "vc4_regs.h" 54 + #include "vc4_trace.h" 54 55 55 56 #define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \ 56 57 V3D_INT_FLDONE | \ ··· 124 123 if (!exec) 125 124 return; 126 125 126 + trace_vc4_bcl_end_irq(dev, exec->seqno); 127 + 127 128 vc4_move_job_to_render(dev, exec); 128 129 next = vc4_first_bin_job(vc4); 129 130 ··· 163 160 164 161 if (!exec) 165 162 return; 163 + 164 + trace_vc4_rcl_end_irq(dev, exec->seqno); 166 165 167 166 vc4->finished_seqno++; 168 167 list_move_tail(&exec->head, &vc4->job_done_list);
+95
drivers/gpu/drm/vc4/vc4_trace.h
··· 52 52 __entry->dev, __entry->seqno) 53 53 ); 54 54 55 + TRACE_EVENT(vc4_submit_cl_ioctl, 56 + TP_PROTO(struct drm_device *dev, u32 bin_cl_size, u32 shader_rec_size, u32 bo_count), 57 + TP_ARGS(dev, bin_cl_size, shader_rec_size, bo_count), 58 + 59 + TP_STRUCT__entry( 60 + __field(u32, dev) 61 + __field(u32, bin_cl_size) 62 + __field(u32, shader_rec_size) 63 + __field(u32, bo_count) 64 + ), 65 + 66 + TP_fast_assign( 67 + __entry->dev = dev->primary->index; 68 + __entry->bin_cl_size = bin_cl_size; 69 + __entry->shader_rec_size = shader_rec_size; 70 + __entry->bo_count = bo_count; 71 + ), 72 + 73 + TP_printk("dev=%u, bin_cl_size=%u, shader_rec_size=%u, bo_count=%u", 74 + __entry->dev, 75 + __entry->bin_cl_size, 76 + __entry->shader_rec_size, 77 + __entry->bo_count) 78 + ); 79 + 80 + TRACE_EVENT(vc4_submit_cl, 81 + TP_PROTO(struct drm_device *dev, bool is_render, 82 + uint64_t seqno, 83 + u32 ctnqba, u32 ctnqea), 84 + TP_ARGS(dev, is_render, seqno, ctnqba, ctnqea), 85 + 86 + TP_STRUCT__entry( 87 + __field(u32, dev) 88 + __field(bool, is_render) 89 + __field(u64, seqno) 90 + __field(u32, ctnqba) 91 + __field(u32, ctnqea) 92 + ), 93 + 94 + TP_fast_assign( 95 + __entry->dev = dev->primary->index; 96 + __entry->is_render = is_render; 97 + __entry->seqno = seqno; 98 + __entry->ctnqba = ctnqba; 99 + __entry->ctnqea = ctnqea; 100 + ), 101 + 102 + TP_printk("dev=%u, %s, seqno=%llu, 0x%08x..0x%08x", 103 + __entry->dev, 104 + __entry->is_render ? "RCL" : "BCL", 105 + __entry->seqno, 106 + __entry->ctnqba, 107 + __entry->ctnqea) 108 + ); 109 + 110 + TRACE_EVENT(vc4_bcl_end_irq, 111 + TP_PROTO(struct drm_device *dev, 112 + uint64_t seqno), 113 + TP_ARGS(dev, seqno), 114 + 115 + TP_STRUCT__entry( 116 + __field(u32, dev) 117 + __field(u64, seqno) 118 + ), 119 + 120 + TP_fast_assign( 121 + __entry->dev = dev->primary->index; 122 + __entry->seqno = seqno; 123 + ), 124 + 125 + TP_printk("dev=%u, seqno=%llu", 126 + __entry->dev, 127 + __entry->seqno) 128 + ); 129 + 130 + TRACE_EVENT(vc4_rcl_end_irq, 131 + TP_PROTO(struct drm_device *dev, 132 + uint64_t seqno), 133 + TP_ARGS(dev, seqno), 134 + 135 + TP_STRUCT__entry( 136 + __field(u32, dev) 137 + __field(u64, seqno) 138 + ), 139 + 140 + TP_fast_assign( 141 + __entry->dev = dev->primary->index; 142 + __entry->seqno = seqno; 143 + ), 144 + 145 + TP_printk("dev=%u, seqno=%llu", 146 + __entry->dev, 147 + __entry->seqno) 148 + ); 149 + 55 150 #endif /* _VC4_TRACE_H_ */ 56 151 57 152 /* This part must be outside protection */