Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/cm: Add tracepoints to track MAD send operations

Surface the operation of MAD exchanges during connection
establishment. Some samples:

[root@klimt ~]# trace-cmd report -F ib_cma
cpus=4
kworker/0:4-123 [000] 60.677388: icm_send_rep: local_id=1965336542 remote_id=1096195961 state=REQ_RCVD lap_state=LAP_UNINIT
kworker/u8:11-391 [002] 60.678808: icm_send_req: local_id=1982113758 remote_id=0 state=IDLE lap_state=LAP_UNINIT
kworker/0:4-123 [000] 60.679652: icm_send_rtu: local_id=1982113758 remote_id=1079418745 state=REP_RCVD lap_state=LAP_UNINIT
nfsd-1954 [001] 60.691350: icm_send_rep: local_id=1998890974 remote_id=1129750393 state=MRA_REQ_SENT lap_state=LAP_UNINIT
nfsd-1954 [003] 62.017931: icm_send_drep: local_id=1998890974 remote_id=1129750393 state=TIMEWAIT lap_state=LAP_UNINIT

Link: https://lore.kernel.org/r/159767240197.2968.12048458026453596018.stgit@klimt.1015granger.net
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Chuck Lever and committed by
Jason Gunthorpe
8dc105be 75874b3d

+125 -2
+20 -2
drivers/infiniband/core/cm.c
··· 1563 1563 cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); 1564 1564 cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); 1565 1565 1566 + trace_icm_send_req(&cm_id_priv->id); 1566 1567 spin_lock_irqsave(&cm_id_priv->lock, flags); 1567 1568 ret = ib_post_send_mad(cm_id_priv->msg, NULL); 1568 1569 if (ret) { ··· 1611 1610 IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length); 1612 1611 } 1613 1612 1613 + trace_icm_issue_rej( 1614 + IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg), 1615 + IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg)); 1614 1616 ret = ib_post_send_mad(msg, NULL); 1615 1617 if (ret) 1616 1618 cm_free_msg(msg); ··· 1965 1961 } 1966 1962 spin_unlock_irq(&cm_id_priv->lock); 1967 1963 1964 + trace_icm_send_dup_req(&cm_id_priv->id); 1968 1965 ret = ib_post_send_mad(msg, NULL); 1969 1966 if (ret) 1970 1967 goto free; ··· 2292 2287 msg->timeout_ms = cm_id_priv->timeout_ms; 2293 2288 msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; 2294 2289 2290 + trace_icm_send_rep(cm_id); 2295 2291 ret = ib_post_send_mad(msg, NULL); 2296 2292 if (ret) { 2297 2293 spin_unlock_irqrestore(&cm_id_priv->lock, flags); ··· 2364 2358 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, 2365 2359 private_data, private_data_len); 2366 2360 2361 + trace_icm_send_rtu(cm_id); 2367 2362 ret = ib_post_send_mad(msg, NULL); 2368 2363 if (ret) { 2369 2364 spin_unlock_irqrestore(&cm_id_priv->lock, flags); ··· 2446 2439 goto unlock; 2447 2440 spin_unlock_irq(&cm_id_priv->lock); 2448 2441 2442 + trace_icm_send_dup_rep(&cm_id_priv->id); 2449 2443 ret = ib_post_send_mad(msg, NULL); 2450 2444 if (ret) 2451 2445 goto free; ··· 2668 2660 msg->timeout_ms = cm_id_priv->timeout_ms; 2669 2661 msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; 2670 2662 2663 + trace_icm_send_dreq(&cm_id_priv->id); 2671 2664 ret = ib_post_send_mad(msg, NULL); 2672 2665 if (ret) { 2673 2666 cm_enter_timewait(cm_id_priv); ··· 2739 2730 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, 2740 2731 private_data, private_data_len); 2741 2732 2733 + trace_icm_send_drep(&cm_id_priv->id); 2742 2734 ret = ib_post_send_mad(msg, NULL); 2743 2735 if (ret) { 2744 2736 cm_free_msg(msg); ··· 2789 2779 IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg, 2790 2780 IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); 2791 2781 2782 + trace_icm_issue_drep( 2783 + IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg), 2784 + IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); 2792 2785 ret = ib_post_send_mad(msg, NULL); 2793 2786 if (ret) 2794 2787 cm_free_msg(msg); ··· 2949 2936 return -EINVAL; 2950 2937 } 2951 2938 2939 + trace_icm_send_rej(&cm_id_priv->id, reason); 2952 2940 ret = ib_post_send_mad(msg, NULL); 2953 2941 if (ret) { 2954 2942 cm_free_msg(msg); ··· 3128 3114 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, 3129 3115 msg_response, service_timeout, 3130 3116 private_data, private_data_len); 3117 + trace_icm_send_mra(cm_id); 3131 3118 ret = ib_post_send_mad(msg, NULL); 3132 3119 if (ret) 3133 3120 goto error2; ··· 3499 3484 msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; 3500 3485 3501 3486 spin_lock_irqsave(&cm_id_priv->lock, flags); 3502 - if (cm_id->state == IB_CM_IDLE) 3487 + if (cm_id->state == IB_CM_IDLE) { 3488 + trace_icm_send_sidr_req(&cm_id_priv->id); 3503 3489 ret = ib_post_send_mad(msg, NULL); 3504 - else 3490 + } else { 3505 3491 ret = -EINVAL; 3492 + } 3506 3493 3507 3494 if (ret) { 3508 3495 spin_unlock_irqrestore(&cm_id_priv->lock, flags); ··· 3666 3649 3667 3650 cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, 3668 3651 param); 3652 + trace_icm_send_sidr_rep(&cm_id_priv->id); 3669 3653 ret = ib_post_send_mad(msg, NULL); 3670 3654 if (ret) { 3671 3655 cm_free_msg(msg);
+105
drivers/infiniband/core/cm_trace.h
··· 80 80 #define show_ib_cm_lap_state(x) \ 81 81 __print_symbolic(x, IB_CM_LAP_STATE_LIST) 82 82 83 + /* 84 + * enum ib_cm_rej_reason, from include/rdma/ib_cm.h 85 + */ 86 + #define IB_CM_REJ_REASON_LIST \ 87 + ib_cm_rej_reason(REJ_NO_QP) \ 88 + ib_cm_rej_reason(REJ_NO_EEC) \ 89 + ib_cm_rej_reason(REJ_NO_RESOURCES) \ 90 + ib_cm_rej_reason(REJ_TIMEOUT) \ 91 + ib_cm_rej_reason(REJ_UNSUPPORTED) \ 92 + ib_cm_rej_reason(REJ_INVALID_COMM_ID) \ 93 + ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE) \ 94 + ib_cm_rej_reason(REJ_INVALID_SERVICE_ID) \ 95 + ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE) \ 96 + ib_cm_rej_reason(REJ_STALE_CONN) \ 97 + ib_cm_rej_reason(REJ_RDC_NOT_EXIST) \ 98 + ib_cm_rej_reason(REJ_INVALID_GID) \ 99 + ib_cm_rej_reason(REJ_INVALID_LID) \ 100 + ib_cm_rej_reason(REJ_INVALID_SL) \ 101 + ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS) \ 102 + ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT) \ 103 + ib_cm_rej_reason(REJ_INVALID_PACKET_RATE) \ 104 + ib_cm_rej_reason(REJ_INVALID_ALT_GID) \ 105 + ib_cm_rej_reason(REJ_INVALID_ALT_LID) \ 106 + ib_cm_rej_reason(REJ_INVALID_ALT_SL) \ 107 + ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS) \ 108 + ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT) \ 109 + ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE) \ 110 + ib_cm_rej_reason(REJ_PORT_CM_REDIRECT) \ 111 + ib_cm_rej_reason(REJ_PORT_REDIRECT) \ 112 + ib_cm_rej_reason(REJ_INVALID_MTU) \ 113 + ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES) \ 114 + ib_cm_rej_reason(REJ_CONSUMER_DEFINED) \ 115 + ib_cm_rej_reason(REJ_INVALID_RNR_RETRY) \ 116 + ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID) \ 117 + ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION) \ 118 + ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL) \ 119 + ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL) \ 120 + ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED) 121 + 122 + #undef ib_cm_rej_reason 123 + #undef ib_cm_rej_reason_end 124 + #define ib_cm_rej_reason(x) TRACE_DEFINE_ENUM(IB_CM_##x); 125 + #define ib_cm_rej_reason_end(x) TRACE_DEFINE_ENUM(IB_CM_##x); 126 + 127 + IB_CM_REJ_REASON_LIST 128 + 129 + #undef ib_cm_rej_reason 130 + #undef ib_cm_rej_reason_end 131 + #define ib_cm_rej_reason(x) { IB_CM_##x, #x }, 132 + #define ib_cm_rej_reason_end(x) { IB_CM_##x, #x } 133 + 134 + #define show_ib_cm_rej_reason(x) \ 135 + __print_symbolic(x, IB_CM_REJ_REASON_LIST) 83 136 84 137 DECLARE_EVENT_CLASS(icm_id_class, 85 138 TP_PROTO( ··· 161 108 __entry->local_id, __entry->remote_id, 162 109 show_ib_cm_state(__entry->state), 163 110 show_ib_cm_lap_state(__entry->lap_state) 111 + ) 112 + ); 113 + 114 + #define DEFINE_CM_SEND_EVENT(name) \ 115 + DEFINE_EVENT(icm_id_class, \ 116 + icm_send_##name, \ 117 + TP_PROTO( \ 118 + const struct ib_cm_id *cm_id \ 119 + ), \ 120 + TP_ARGS(cm_id)) 121 + 122 + DEFINE_CM_SEND_EVENT(req); 123 + DEFINE_CM_SEND_EVENT(rep); 124 + DEFINE_CM_SEND_EVENT(dup_req); 125 + DEFINE_CM_SEND_EVENT(dup_rep); 126 + DEFINE_CM_SEND_EVENT(rtu); 127 + DEFINE_CM_SEND_EVENT(mra); 128 + DEFINE_CM_SEND_EVENT(sidr_req); 129 + DEFINE_CM_SEND_EVENT(sidr_rep); 130 + DEFINE_CM_SEND_EVENT(dreq); 131 + DEFINE_CM_SEND_EVENT(drep); 132 + 133 + TRACE_EVENT(icm_send_rej, 134 + TP_PROTO( 135 + const struct ib_cm_id *cm_id, 136 + enum ib_cm_rej_reason reason 137 + ), 138 + 139 + TP_ARGS(cm_id, reason), 140 + 141 + TP_STRUCT__entry( 142 + __field(const void *, cm_id) 143 + __field(u32, local_id) 144 + __field(u32, remote_id) 145 + __field(unsigned long, state) 146 + __field(unsigned long, reason) 147 + ), 148 + 149 + TP_fast_assign( 150 + __entry->cm_id = cm_id; 151 + __entry->local_id = be32_to_cpu(cm_id->local_id); 152 + __entry->remote_id = be32_to_cpu(cm_id->remote_id); 153 + __entry->state = cm_id->state; 154 + __entry->reason = reason; 155 + ), 156 + 157 + TP_printk("local_id=%u remote_id=%u state=%s reason=%s", 158 + __entry->local_id, __entry->remote_id, 159 + show_ib_cm_state(__entry->state), 160 + show_ib_cm_rej_reason(__entry->reason) 164 161 ) 165 162 ); 166 163 ··· 275 172 ), \ 276 173 TP_ARGS(local_id, remote_id)) 277 174 175 + DEFINE_CM_LOCAL_EVENT(issue_rej); 176 + DEFINE_CM_LOCAL_EVENT(issue_drep); 278 177 DEFINE_CM_LOCAL_EVENT(staleconn_err); 279 178 DEFINE_CM_LOCAL_EVENT(no_priv_err); 280 179