Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfsd-4.18' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
"A relatively quiet cycle for nfsd.

The largest piece is an RDMA update from Chuck Lever with new trace
points, miscellaneous cleanups, and streamlining of the send and
receive paths.

Other than that, some miscellaneous bugfixes"

* tag 'nfsd-4.18' of git://linux-nfs.org/~bfields/linux: (26 commits)
nfsd: fix error handling in nfs4_set_delegation()
nfsd: fix potential use-after-free in nfsd4_decode_getdeviceinfo
Fix 16-byte memory leak in gssp_accept_sec_context_upcall
svcrdma: Fix incorrect return value/type in svc_rdma_post_recvs
svcrdma: Remove unused svc_rdma_op_ctxt
svcrdma: Persistently allocate and DMA-map Send buffers
svcrdma: Simplify svc_rdma_send()
svcrdma: Remove post_send_wr
svcrdma: Don't overrun the SGE array in svc_rdma_send_ctxt
svcrdma: Introduce svc_rdma_send_ctxt
svcrdma: Clean up Send SGE accounting
svcrdma: Refactor svc_rdma_dma_map_buf
svcrdma: Allocate recv_ctxt's on CPU handling Receives
svcrdma: Persistently allocate and DMA-map Receive buffers
svcrdma: Preserve Receive buffer until svc_rdma_sendto
svcrdma: Simplify svc_rdma_recv_ctxt_put
svcrdma: Remove sc_rq_depth
svcrdma: Introduce svc_rdma_recv_ctxt
svcrdma: Trace key RDMA API events
svcrdma: Trace key RPC/RDMA protocol events
...

+1492 -883
+16 -2
fs/nfsd/blocklayout.c
··· 216 216 struct request_queue *q = bdev->bd_disk->queue; 217 217 struct request *rq; 218 218 struct scsi_request *req; 219 - size_t bufflen = 252, len, id_len; 219 + /* 220 + * The allocation length (passed in bytes 3 and 4 of the INQUIRY 221 + * command descriptor block) specifies the number of bytes that have 222 + * been allocated for the data-in buffer. 223 + * 252 is the highest one-byte value that is a multiple of 4. 224 + * 65532 is the highest two-byte value that is a multiple of 4. 225 + */ 226 + size_t bufflen = 252, maxlen = 65532, len, id_len; 220 227 u8 *buf, *d, type, assoc; 221 - int error; 228 + int retries = 1, error; 222 229 223 230 if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q))) 224 231 return -EINVAL; 225 232 233 + again: 226 234 buf = kzalloc(bufflen, GFP_KERNEL); 227 235 if (!buf) 228 236 return -ENOMEM; ··· 263 255 264 256 len = (buf[2] << 8) + buf[3] + 4; 265 257 if (len > bufflen) { 258 + if (len <= maxlen && retries--) { 259 + blk_put_request(rq); 260 + kfree(buf); 261 + bufflen = len; 262 + goto again; 263 + } 266 264 pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n", 267 265 len); 268 266 goto out_put_request;
-5
fs/nfsd/cache.h
··· 67 67 RC_REPLBUFF, 68 68 }; 69 69 70 - /* 71 - * If requests are retransmitted within this interval, they're dropped. 72 - */ 73 - #define RC_DELAY (HZ/5) 74 - 75 70 /* Cache entries expire after this time period */ 76 71 #define RC_EXPIRE (120 * HZ) 77 72
+4 -1
fs/nfsd/nfs4state.c
··· 4378 4378 spin_unlock(&state_lock); 4379 4379 4380 4380 if (status) 4381 - destroy_unhashed_deleg(dp); 4381 + goto out_unlock; 4382 + 4382 4383 return dp; 4384 + out_unlock: 4385 + vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp); 4383 4386 out_clnt_odstate: 4384 4387 put_clnt_odstate(dp->dl_clnt_odstate); 4385 4388 out_stid:
+5 -2
fs/nfsd/nfs4xdr.c
··· 1585 1585 gdev->gd_maxcount = be32_to_cpup(p++); 1586 1586 num = be32_to_cpup(p++); 1587 1587 if (num) { 1588 + if (num > 1000) 1589 + goto xdr_error; 1588 1590 READ_BUF(4 * num); 1589 1591 gdev->gd_notify_types = be32_to_cpup(p++); 1590 1592 for (i = 1; i < num; i++) { ··· 3653 3651 nfserr = nfserr_resource; 3654 3652 goto err_no_verf; 3655 3653 } 3656 - maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX); 3654 + maxcount = svc_max_payload(resp->rqstp); 3655 + maxcount = min_t(u32, readdir->rd_maxcount, maxcount); 3657 3656 /* 3658 3657 * Note the rfc defines rd_maxcount as the size of the 3659 3658 * READDIR4resok structure, which includes the verifier above ··· 3668 3665 3669 3666 /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */ 3670 3667 if (!readdir->rd_dircount) 3671 - readdir->rd_dircount = INT_MAX; 3668 + readdir->rd_dircount = svc_max_payload(resp->rqstp); 3672 3669 3673 3670 readdir->xdr = xdr; 3674 3671 readdir->rd_maxcount = maxcount;
+2 -4
fs/nfsd/nfscache.c
··· 394 394 __wsum csum; 395 395 u32 hash = nfsd_cache_hash(xid); 396 396 struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; 397 - unsigned long age; 398 397 int type = rqstp->rq_cachetype; 399 398 int rtn = RC_DOIT; 400 399 ··· 460 461 found_entry: 461 462 nfsdstats.rchits++; 462 463 /* We found a matching entry which is either in progress or done. */ 463 - age = jiffies - rp->c_timestamp; 464 464 lru_put_end(b, rp); 465 465 466 466 rtn = RC_DROPIT; 467 - /* Request being processed or excessive rexmits */ 468 - if (rp->c_state == RC_INPROG || age < RC_DELAY) 467 + /* Request being processed */ 468 + if (rp->c_state == RC_INPROG) 469 469 goto out; 470 470 471 471 /* From the hall of fame of impractical attacks:
+49 -46
include/linux/sunrpc/svc_rdma.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 1 2 /* 2 3 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 3 4 * ··· 71 70 extern atomic_t rdma_stat_sq_poll; 72 71 extern atomic_t rdma_stat_sq_prod; 73 72 74 - /* 75 - * Contexts are built when an RDMA request is created and are a 76 - * record of the resources that can be recovered when the request 77 - * completes. 78 - */ 79 - struct svc_rdma_op_ctxt { 80 - struct list_head list; 81 - struct xdr_buf arg; 82 - struct ib_cqe cqe; 83 - u32 byte_len; 84 - struct svcxprt_rdma *xprt; 85 - enum dma_data_direction direction; 86 - int count; 87 - unsigned int mapped_sges; 88 - int hdr_count; 89 - struct ib_send_wr send_wr; 90 - struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE]; 91 - struct page *pages[RPCSVC_MAXPAGES]; 92 - }; 93 - 94 73 struct svcxprt_rdma { 95 74 struct svc_xprt sc_xprt; /* SVC transport structure */ 96 75 struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ 97 76 struct list_head sc_accept_q; /* Conn. waiting accept */ 98 77 int sc_ord; /* RDMA read limit */ 99 - int sc_max_sge; 78 + int sc_max_send_sges; 100 79 bool sc_snd_w_inv; /* OK to use Send With Invalidate */ 101 80 102 81 atomic_t sc_sq_avail; /* SQEs ready to be consumed */ 103 82 unsigned int sc_sq_depth; /* Depth of SQ */ 104 - unsigned int sc_rq_depth; /* Depth of RQ */ 105 83 __be32 sc_fc_credits; /* Forward credits */ 106 84 u32 sc_max_requests; /* Max requests */ 107 85 u32 sc_max_bc_requests;/* Backward credits */ ··· 89 109 90 110 struct ib_pd *sc_pd; 91 111 92 - spinlock_t sc_ctxt_lock; 93 - struct list_head sc_ctxts; 94 - int sc_ctxt_used; 112 + spinlock_t sc_send_lock; 113 + struct list_head sc_send_ctxts; 95 114 spinlock_t sc_rw_ctxt_lock; 96 115 struct list_head sc_rw_ctxts; 97 116 ··· 106 127 unsigned long sc_flags; 107 128 struct list_head sc_read_complete_q; 108 129 struct work_struct sc_work; 130 + 131 + spinlock_t sc_recv_lock; 132 + struct list_head sc_recv_ctxts; 109 133 }; 110 134 /* sc_flags */ 111 135 #define RDMAXPRT_CONN_PENDING 3 ··· 123 141 124 142 #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD 125 143 126 - /* Track DMA maps for this transport and context */ 127 - static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma, 128 - struct svc_rdma_op_ctxt *ctxt) 129 - { 130 - ctxt->mapped_sges++; 131 - } 144 + struct svc_rdma_recv_ctxt { 145 + struct list_head rc_list; 146 + struct ib_recv_wr rc_recv_wr; 147 + struct ib_cqe rc_cqe; 148 + struct ib_sge rc_recv_sge; 149 + void *rc_recv_buf; 150 + struct xdr_buf rc_arg; 151 + bool rc_temp; 152 + u32 rc_byte_len; 153 + unsigned int rc_page_count; 154 + unsigned int rc_hdr_count; 155 + struct page *rc_pages[RPCSVC_MAXPAGES]; 156 + }; 157 + 158 + struct svc_rdma_send_ctxt { 159 + struct list_head sc_list; 160 + struct ib_send_wr sc_send_wr; 161 + struct ib_cqe sc_cqe; 162 + void *sc_xprt_buf; 163 + int sc_page_count; 164 + int sc_cur_sge_no; 165 + struct page *sc_pages[RPCSVC_MAXPAGES]; 166 + struct ib_sge sc_sges[]; 167 + }; 132 168 133 169 /* svc_rdma_backchannel.c */ 134 170 extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, ··· 154 154 struct xdr_buf *rcvbuf); 155 155 156 156 /* svc_rdma_recvfrom.c */ 157 + extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); 158 + extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); 159 + extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, 160 + struct svc_rdma_recv_ctxt *ctxt); 161 + extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); 157 162 extern int svc_rdma_recvfrom(struct svc_rqst *); 158 163 159 164 /* svc_rdma_rw.c */ 160 165 extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); 161 166 extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, 162 167 struct svc_rqst *rqstp, 163 - struct svc_rdma_op_ctxt *head, __be32 *p); 168 + struct svc_rdma_recv_ctxt *head, __be32 *p); 164 169 extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, 165 170 __be32 *wr_ch, struct xdr_buf *xdr); 166 171 extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, ··· 173 168 struct xdr_buf *xdr); 174 169 175 170 /* svc_rdma_sendto.c */ 176 - extern int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma, 177 - struct svc_rdma_op_ctxt *ctxt, 178 - __be32 *rdma_resp, unsigned int len); 179 - extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma, 180 - struct svc_rdma_op_ctxt *ctxt, 181 - int num_sge, u32 inv_rkey); 171 + extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); 172 + extern struct svc_rdma_send_ctxt * 173 + svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); 174 + extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, 175 + struct svc_rdma_send_ctxt *ctxt); 176 + extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr); 177 + extern void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, 178 + struct svc_rdma_send_ctxt *ctxt, 179 + unsigned int len); 180 + extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, 181 + struct svc_rdma_send_ctxt *ctxt, 182 + struct xdr_buf *xdr, __be32 *wr_lst); 182 183 extern int svc_rdma_sendto(struct svc_rqst *); 183 184 184 185 /* svc_rdma_transport.c */ 185 - extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *); 186 - extern void svc_rdma_wc_reg(struct ib_cq *, struct ib_wc *); 187 - extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *); 188 - extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *); 189 - extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); 190 186 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); 191 - extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); 192 - extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); 193 - extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); 194 187 extern void svc_sq_reap(struct svcxprt_rdma *); 195 188 extern void svc_rq_reap(struct svcxprt_rdma *); 196 189 extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
+583 -1
include/trace/events/rpcrdma.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 /* 3 - * Copyright (c) 2017 Oracle. All rights reserved. 3 + * Copyright (c) 2017, 2018 Oracle. All rights reserved. 4 + * 5 + * Trace point definitions for the "rpcrdma" subsystem. 4 6 */ 5 7 #undef TRACE_SYSTEM 6 8 #define TRACE_SYSTEM rpcrdma ··· 886 884 887 885 DEFINE_CB_EVENT(xprtrdma_cb_call); 888 886 DEFINE_CB_EVENT(xprtrdma_cb_reply); 887 + 888 + /** 889 + ** Server-side RPC/RDMA events 890 + **/ 891 + 892 + DECLARE_EVENT_CLASS(svcrdma_xprt_event, 893 + TP_PROTO( 894 + const struct svc_xprt *xprt 895 + ), 896 + 897 + TP_ARGS(xprt), 898 + 899 + TP_STRUCT__entry( 900 + __field(const void *, xprt) 901 + __string(addr, xprt->xpt_remotebuf) 902 + ), 903 + 904 + TP_fast_assign( 905 + __entry->xprt = xprt; 906 + __assign_str(addr, xprt->xpt_remotebuf); 907 + ), 908 + 909 + TP_printk("xprt=%p addr=%s", 910 + __entry->xprt, __get_str(addr) 911 + ) 912 + ); 913 + 914 + #define DEFINE_XPRT_EVENT(name) \ 915 + DEFINE_EVENT(svcrdma_xprt_event, svcrdma_xprt_##name, \ 916 + TP_PROTO( \ 917 + const struct svc_xprt *xprt \ 918 + ), \ 919 + TP_ARGS(xprt)) 920 + 921 + DEFINE_XPRT_EVENT(accept); 922 + DEFINE_XPRT_EVENT(fail); 923 + DEFINE_XPRT_EVENT(free); 924 + 925 + TRACE_DEFINE_ENUM(RDMA_MSG); 926 + TRACE_DEFINE_ENUM(RDMA_NOMSG); 927 + TRACE_DEFINE_ENUM(RDMA_MSGP); 928 + TRACE_DEFINE_ENUM(RDMA_DONE); 929 + TRACE_DEFINE_ENUM(RDMA_ERROR); 930 + 931 + #define show_rpcrdma_proc(x) \ 932 + __print_symbolic(x, \ 933 + { RDMA_MSG, "RDMA_MSG" }, \ 934 + { RDMA_NOMSG, "RDMA_NOMSG" }, \ 935 + { RDMA_MSGP, "RDMA_MSGP" }, \ 936 + { RDMA_DONE, "RDMA_DONE" }, \ 937 + { RDMA_ERROR, "RDMA_ERROR" }) 938 + 939 + TRACE_EVENT(svcrdma_decode_rqst, 940 + TP_PROTO( 941 + __be32 *p, 942 + unsigned int hdrlen 943 + ), 944 + 945 + TP_ARGS(p, hdrlen), 946 + 947 + TP_STRUCT__entry( 948 + __field(u32, xid) 949 + __field(u32, vers) 950 + __field(u32, proc) 951 + __field(u32, credits) 952 + __field(unsigned int, hdrlen) 953 + ), 954 + 955 + TP_fast_assign( 956 + __entry->xid = be32_to_cpup(p++); 957 + __entry->vers = be32_to_cpup(p++); 958 + __entry->credits = be32_to_cpup(p++); 959 + __entry->proc = be32_to_cpup(p); 960 + __entry->hdrlen = hdrlen; 961 + ), 962 + 963 + TP_printk("xid=0x%08x vers=%u credits=%u proc=%s hdrlen=%u", 964 + __entry->xid, __entry->vers, __entry->credits, 965 + show_rpcrdma_proc(__entry->proc), __entry->hdrlen) 966 + ); 967 + 968 + TRACE_EVENT(svcrdma_decode_short, 969 + TP_PROTO( 970 + unsigned int hdrlen 971 + ), 972 + 973 + TP_ARGS(hdrlen), 974 + 975 + TP_STRUCT__entry( 976 + __field(unsigned int, hdrlen) 977 + ), 978 + 979 + TP_fast_assign( 980 + __entry->hdrlen = hdrlen; 981 + ), 982 + 983 + TP_printk("hdrlen=%u", __entry->hdrlen) 984 + ); 985 + 986 + DECLARE_EVENT_CLASS(svcrdma_badreq_event, 987 + TP_PROTO( 988 + __be32 *p 989 + ), 990 + 991 + TP_ARGS(p), 992 + 993 + TP_STRUCT__entry( 994 + __field(u32, xid) 995 + __field(u32, vers) 996 + __field(u32, proc) 997 + __field(u32, credits) 998 + ), 999 + 1000 + TP_fast_assign( 1001 + __entry->xid = be32_to_cpup(p++); 1002 + __entry->vers = be32_to_cpup(p++); 1003 + __entry->credits = be32_to_cpup(p++); 1004 + __entry->proc = be32_to_cpup(p); 1005 + ), 1006 + 1007 + TP_printk("xid=0x%08x vers=%u credits=%u proc=%u", 1008 + __entry->xid, __entry->vers, __entry->credits, __entry->proc) 1009 + ); 1010 + 1011 + #define DEFINE_BADREQ_EVENT(name) \ 1012 + DEFINE_EVENT(svcrdma_badreq_event, svcrdma_decode_##name,\ 1013 + TP_PROTO( \ 1014 + __be32 *p \ 1015 + ), \ 1016 + TP_ARGS(p)) 1017 + 1018 + DEFINE_BADREQ_EVENT(badvers); 1019 + DEFINE_BADREQ_EVENT(drop); 1020 + DEFINE_BADREQ_EVENT(badproc); 1021 + DEFINE_BADREQ_EVENT(parse); 1022 + 1023 + DECLARE_EVENT_CLASS(svcrdma_segment_event, 1024 + TP_PROTO( 1025 + u32 handle, 1026 + u32 length, 1027 + u64 offset 1028 + ), 1029 + 1030 + TP_ARGS(handle, length, offset), 1031 + 1032 + TP_STRUCT__entry( 1033 + __field(u32, handle) 1034 + __field(u32, length) 1035 + __field(u64, offset) 1036 + ), 1037 + 1038 + TP_fast_assign( 1039 + __entry->handle = handle; 1040 + __entry->length = length; 1041 + __entry->offset = offset; 1042 + ), 1043 + 1044 + TP_printk("%u@0x%016llx:0x%08x", 1045 + __entry->length, (unsigned long long)__entry->offset, 1046 + __entry->handle 1047 + ) 1048 + ); 1049 + 1050 + #define DEFINE_SEGMENT_EVENT(name) \ 1051 + DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\ 1052 + TP_PROTO( \ 1053 + u32 handle, \ 1054 + u32 length, \ 1055 + u64 offset \ 1056 + ), \ 1057 + TP_ARGS(handle, length, offset)) 1058 + 1059 + DEFINE_SEGMENT_EVENT(rseg); 1060 + DEFINE_SEGMENT_EVENT(wseg); 1061 + 1062 + DECLARE_EVENT_CLASS(svcrdma_chunk_event, 1063 + TP_PROTO( 1064 + u32 length 1065 + ), 1066 + 1067 + TP_ARGS(length), 1068 + 1069 + TP_STRUCT__entry( 1070 + __field(u32, length) 1071 + ), 1072 + 1073 + TP_fast_assign( 1074 + __entry->length = length; 1075 + ), 1076 + 1077 + TP_printk("length=%u", 1078 + __entry->length 1079 + ) 1080 + ); 1081 + 1082 + #define DEFINE_CHUNK_EVENT(name) \ 1083 + DEFINE_EVENT(svcrdma_chunk_event, svcrdma_encode_##name,\ 1084 + TP_PROTO( \ 1085 + u32 length \ 1086 + ), \ 1087 + TP_ARGS(length)) 1088 + 1089 + DEFINE_CHUNK_EVENT(pzr); 1090 + DEFINE_CHUNK_EVENT(write); 1091 + DEFINE_CHUNK_EVENT(reply); 1092 + 1093 + TRACE_EVENT(svcrdma_encode_read, 1094 + TP_PROTO( 1095 + u32 length, 1096 + u32 position 1097 + ), 1098 + 1099 + TP_ARGS(length, position), 1100 + 1101 + TP_STRUCT__entry( 1102 + __field(u32, length) 1103 + __field(u32, position) 1104 + ), 1105 + 1106 + TP_fast_assign( 1107 + __entry->length = length; 1108 + __entry->position = position; 1109 + ), 1110 + 1111 + TP_printk("length=%u position=%u", 1112 + __entry->length, __entry->position 1113 + ) 1114 + ); 1115 + 1116 + DECLARE_EVENT_CLASS(svcrdma_error_event, 1117 + TP_PROTO( 1118 + __be32 xid 1119 + ), 1120 + 1121 + TP_ARGS(xid), 1122 + 1123 + TP_STRUCT__entry( 1124 + __field(u32, xid) 1125 + ), 1126 + 1127 + TP_fast_assign( 1128 + __entry->xid = be32_to_cpu(xid); 1129 + ), 1130 + 1131 + TP_printk("xid=0x%08x", 1132 + __entry->xid 1133 + ) 1134 + ); 1135 + 1136 + #define DEFINE_ERROR_EVENT(name) \ 1137 + DEFINE_EVENT(svcrdma_error_event, svcrdma_err_##name, \ 1138 + TP_PROTO( \ 1139 + __be32 xid \ 1140 + ), \ 1141 + TP_ARGS(xid)) 1142 + 1143 + DEFINE_ERROR_EVENT(vers); 1144 + DEFINE_ERROR_EVENT(chunk); 1145 + 1146 + /** 1147 + ** Server-side RDMA API events 1148 + **/ 1149 + 1150 + TRACE_EVENT(svcrdma_dma_map_page, 1151 + TP_PROTO( 1152 + const struct svcxprt_rdma *rdma, 1153 + const void *page 1154 + ), 1155 + 1156 + TP_ARGS(rdma, page), 1157 + 1158 + TP_STRUCT__entry( 1159 + __field(const void *, page); 1160 + __string(device, rdma->sc_cm_id->device->name) 1161 + __string(addr, rdma->sc_xprt.xpt_remotebuf) 1162 + ), 1163 + 1164 + TP_fast_assign( 1165 + __entry->page = page; 1166 + __assign_str(device, rdma->sc_cm_id->device->name); 1167 + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); 1168 + ), 1169 + 1170 + TP_printk("addr=%s device=%s page=%p", 1171 + __get_str(addr), __get_str(device), __entry->page 1172 + ) 1173 + ); 1174 + 1175 + TRACE_EVENT(svcrdma_dma_map_rwctx, 1176 + TP_PROTO( 1177 + const struct svcxprt_rdma *rdma, 1178 + int status 1179 + ), 1180 + 1181 + TP_ARGS(rdma, status), 1182 + 1183 + TP_STRUCT__entry( 1184 + __field(int, status) 1185 + __string(device, rdma->sc_cm_id->device->name) 1186 + __string(addr, rdma->sc_xprt.xpt_remotebuf) 1187 + ), 1188 + 1189 + TP_fast_assign( 1190 + __entry->status = status; 1191 + __assign_str(device, rdma->sc_cm_id->device->name); 1192 + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); 1193 + ), 1194 + 1195 + TP_printk("addr=%s device=%s status=%d", 1196 + __get_str(addr), __get_str(device), __entry->status 1197 + ) 1198 + ); 1199 + 1200 + TRACE_EVENT(svcrdma_send_failed, 1201 + TP_PROTO( 1202 + const struct svc_rqst *rqst, 1203 + int status 1204 + ), 1205 + 1206 + TP_ARGS(rqst, status), 1207 + 1208 + TP_STRUCT__entry( 1209 + __field(int, status) 1210 + __field(u32, xid) 1211 + __field(const void *, xprt) 1212 + __string(addr, rqst->rq_xprt->xpt_remotebuf) 1213 + ), 1214 + 1215 + TP_fast_assign( 1216 + __entry->status = status; 1217 + __entry->xid = __be32_to_cpu(rqst->rq_xid); 1218 + __entry->xprt = rqst->rq_xprt; 1219 + __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); 1220 + ), 1221 + 1222 + TP_printk("xprt=%p addr=%s xid=0x%08x status=%d", 1223 + __entry->xprt, __get_str(addr), 1224 + __entry->xid, __entry->status 1225 + ) 1226 + ); 1227 + 1228 + DECLARE_EVENT_CLASS(svcrdma_sendcomp_event, 1229 + TP_PROTO( 1230 + const struct ib_wc *wc 1231 + ), 1232 + 1233 + TP_ARGS(wc), 1234 + 1235 + TP_STRUCT__entry( 1236 + __field(const void *, cqe) 1237 + __field(unsigned int, status) 1238 + __field(unsigned int, vendor_err) 1239 + ), 1240 + 1241 + TP_fast_assign( 1242 + __entry->cqe = wc->wr_cqe; 1243 + __entry->status = wc->status; 1244 + if (wc->status) 1245 + __entry->vendor_err = wc->vendor_err; 1246 + else 1247 + __entry->vendor_err = 0; 1248 + ), 1249 + 1250 + TP_printk("cqe=%p status=%s (%u/0x%x)", 1251 + __entry->cqe, rdma_show_wc_status(__entry->status), 1252 + __entry->status, __entry->vendor_err 1253 + ) 1254 + ); 1255 + 1256 + #define DEFINE_SENDCOMP_EVENT(name) \ 1257 + DEFINE_EVENT(svcrdma_sendcomp_event, svcrdma_wc_##name, \ 1258 + TP_PROTO( \ 1259 + const struct ib_wc *wc \ 1260 + ), \ 1261 + TP_ARGS(wc)) 1262 + 1263 + TRACE_EVENT(svcrdma_post_send, 1264 + TP_PROTO( 1265 + const struct ib_send_wr *wr, 1266 + int status 1267 + ), 1268 + 1269 + TP_ARGS(wr, status), 1270 + 1271 + TP_STRUCT__entry( 1272 + __field(const void *, cqe) 1273 + __field(unsigned int, num_sge) 1274 + __field(u32, inv_rkey) 1275 + __field(int, status) 1276 + ), 1277 + 1278 + TP_fast_assign( 1279 + __entry->cqe = wr->wr_cqe; 1280 + __entry->num_sge = wr->num_sge; 1281 + __entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ? 1282 + wr->ex.invalidate_rkey : 0; 1283 + __entry->status = status; 1284 + ), 1285 + 1286 + TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x status=%d", 1287 + __entry->cqe, __entry->num_sge, 1288 + __entry->inv_rkey, __entry->status 1289 + ) 1290 + ); 1291 + 1292 + DEFINE_SENDCOMP_EVENT(send); 1293 + 1294 + TRACE_EVENT(svcrdma_post_recv, 1295 + TP_PROTO( 1296 + const struct ib_recv_wr *wr, 1297 + int status 1298 + ), 1299 + 1300 + TP_ARGS(wr, status), 1301 + 1302 + TP_STRUCT__entry( 1303 + __field(const void *, cqe) 1304 + __field(int, status) 1305 + ), 1306 + 1307 + TP_fast_assign( 1308 + __entry->cqe = wr->wr_cqe; 1309 + __entry->status = status; 1310 + ), 1311 + 1312 + TP_printk("cqe=%p status=%d", 1313 + __entry->cqe, __entry->status 1314 + ) 1315 + ); 1316 + 1317 + TRACE_EVENT(svcrdma_wc_receive, 1318 + TP_PROTO( 1319 + const struct ib_wc *wc 1320 + ), 1321 + 1322 + TP_ARGS(wc), 1323 + 1324 + TP_STRUCT__entry( 1325 + __field(const void *, cqe) 1326 + __field(u32, byte_len) 1327 + __field(unsigned int, status) 1328 + __field(u32, vendor_err) 1329 + ), 1330 + 1331 + TP_fast_assign( 1332 + __entry->cqe = wc->wr_cqe; 1333 + __entry->status = wc->status; 1334 + if (wc->status) { 1335 + __entry->byte_len = 0; 1336 + __entry->vendor_err = wc->vendor_err; 1337 + } else { 1338 + __entry->byte_len = wc->byte_len; 1339 + __entry->vendor_err = 0; 1340 + } 1341 + ), 1342 + 1343 + TP_printk("cqe=%p byte_len=%u status=%s (%u/0x%x)", 1344 + __entry->cqe, __entry->byte_len, 1345 + rdma_show_wc_status(__entry->status), 1346 + __entry->status, __entry->vendor_err 1347 + ) 1348 + ); 1349 + 1350 + TRACE_EVENT(svcrdma_post_rw, 1351 + TP_PROTO( 1352 + const void *cqe, 1353 + int sqecount, 1354 + int status 1355 + ), 1356 + 1357 + TP_ARGS(cqe, sqecount, status), 1358 + 1359 + TP_STRUCT__entry( 1360 + __field(const void *, cqe) 1361 + __field(int, sqecount) 1362 + __field(int, status) 1363 + ), 1364 + 1365 + TP_fast_assign( 1366 + __entry->cqe = cqe; 1367 + __entry->sqecount = sqecount; 1368 + __entry->status = status; 1369 + ), 1370 + 1371 + TP_printk("cqe=%p sqecount=%d status=%d", 1372 + __entry->cqe, __entry->sqecount, __entry->status 1373 + ) 1374 + ); 1375 + 1376 + DEFINE_SENDCOMP_EVENT(read); 1377 + DEFINE_SENDCOMP_EVENT(write); 1378 + 1379 + TRACE_EVENT(svcrdma_cm_event, 1380 + TP_PROTO( 1381 + const struct rdma_cm_event *event, 1382 + const struct sockaddr *sap 1383 + ), 1384 + 1385 + TP_ARGS(event, sap), 1386 + 1387 + TP_STRUCT__entry( 1388 + __field(unsigned int, event) 1389 + __field(int, status) 1390 + __array(__u8, addr, INET6_ADDRSTRLEN + 10) 1391 + ), 1392 + 1393 + TP_fast_assign( 1394 + __entry->event = event->event; 1395 + __entry->status = event->status; 1396 + snprintf(__entry->addr, sizeof(__entry->addr) - 1, 1397 + "%pISpc", sap); 1398 + ), 1399 + 1400 + TP_printk("addr=%s event=%s (%u/%d)", 1401 + __entry->addr, 1402 + rdma_show_cm_event(__entry->event), 1403 + __entry->event, __entry->status 1404 + ) 1405 + ); 1406 + 1407 + TRACE_EVENT(svcrdma_qp_error, 1408 + TP_PROTO( 1409 + const struct ib_event *event, 1410 + const struct sockaddr *sap 1411 + ), 1412 + 1413 + TP_ARGS(event, sap), 1414 + 1415 + TP_STRUCT__entry( 1416 + __field(unsigned int, event) 1417 + __string(device, event->device->name) 1418 + __array(__u8, addr, INET6_ADDRSTRLEN + 10) 1419 + ), 1420 + 1421 + TP_fast_assign( 1422 + __entry->event = event->event; 1423 + __assign_str(device, event->device->name); 1424 + snprintf(__entry->addr, sizeof(__entry->addr) - 1, 1425 + "%pISpc", sap); 1426 + ), 1427 + 1428 + TP_printk("addr=%s dev=%s event=%s (%u)", 1429 + __entry->addr, __get_str(device), 1430 + rdma_show_ib_event(__entry->event), __entry->event 1431 + ) 1432 + ); 1433 + 1434 + DECLARE_EVENT_CLASS(svcrdma_sendqueue_event, 1435 + TP_PROTO( 1436 + const struct svcxprt_rdma *rdma 1437 + ), 1438 + 1439 + TP_ARGS(rdma), 1440 + 1441 + TP_STRUCT__entry( 1442 + __field(int, avail) 1443 + __field(int, depth) 1444 + __string(addr, rdma->sc_xprt.xpt_remotebuf) 1445 + ), 1446 + 1447 + TP_fast_assign( 1448 + __entry->avail = atomic_read(&rdma->sc_sq_avail); 1449 + __entry->depth = rdma->sc_sq_depth; 1450 + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); 1451 + ), 1452 + 1453 + TP_printk("addr=%s sc_sq_avail=%d/%d", 1454 + __get_str(addr), __entry->avail, __entry->depth 1455 + ) 1456 + ); 1457 + 1458 + #define DEFINE_SQ_EVENT(name) \ 1459 + DEFINE_EVENT(svcrdma_sendqueue_event, svcrdma_sq_##name,\ 1460 + TP_PROTO( \ 1461 + const struct svcxprt_rdma *rdma \ 1462 + ), \ 1463 + TP_ARGS(rdma)) 1464 + 1465 + DEFINE_SQ_EVENT(full); 1466 + DEFINE_SQ_EVENT(retry); 889 1467 890 1468 #endif /* _TRACE_RPCRDMA_H */ 891 1469
+3 -1
net/sunrpc/auth_gss/gss_rpc_upcall.c
··· 298 298 if (res.context_handle) { 299 299 data->out_handle = rctxh.exported_context_token; 300 300 data->mech_oid.len = rctxh.mech.len; 301 - if (rctxh.mech.data) 301 + if (rctxh.mech.data) { 302 302 memcpy(data->mech_oid.data, rctxh.mech.data, 303 303 data->mech_oid.len); 304 + kfree(rctxh.mech.data); 305 + } 304 306 client_name = rctxh.src_name.display_name; 305 307 } 306 308
+2
net/sunrpc/xprtrdma/backchannel.c
··· 9 9 #include <linux/sunrpc/xprt.h> 10 10 #include <linux/sunrpc/svc.h> 11 11 #include <linux/sunrpc/svc_xprt.h> 12 + #include <linux/sunrpc/svc_rdma.h> 12 13 13 14 #include "xprt_rdma.h" 15 + #include <trace/events/rpcrdma.h> 14 16 15 17 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 16 18 # define RPCDBG_FACILITY RPCDBG_TRANS
+3
net/sunrpc/xprtrdma/fmr_ops.c
··· 20 20 * verb (fmr_op_unmap). 21 21 */ 22 22 23 + #include <linux/sunrpc/svc_rdma.h> 24 + 23 25 #include "xprt_rdma.h" 26 + #include <trace/events/rpcrdma.h> 24 27 25 28 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 26 29 # define RPCDBG_FACILITY RPCDBG_TRANS
+2
net/sunrpc/xprtrdma/frwr_ops.c
··· 71 71 */ 72 72 73 73 #include <linux/sunrpc/rpc_rdma.h> 74 + #include <linux/sunrpc/svc_rdma.h> 74 75 75 76 #include "xprt_rdma.h" 77 + #include <trace/events/rpcrdma.h> 76 78 77 79 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 78 80 # define RPCDBG_FACILITY RPCDBG_TRANS
+3 -1
net/sunrpc/xprtrdma/module.c
··· 13 13 14 14 #include <asm/swab.h> 15 15 16 - #define CREATE_TRACE_POINTS 17 16 #include "xprt_rdma.h" 17 + 18 + #define CREATE_TRACE_POINTS 19 + #include <trace/events/rpcrdma.h> 18 20 19 21 MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); 20 22 MODULE_DESCRIPTION("RPC/RDMA Transport");
+5 -2
net/sunrpc/xprtrdma/rpc_rdma.c
··· 46 46 * to the Linux RPC framework lives. 47 47 */ 48 48 49 - #include "xprt_rdma.h" 50 - 51 49 #include <linux/highmem.h> 50 + 51 + #include <linux/sunrpc/svc_rdma.h> 52 + 53 + #include "xprt_rdma.h" 54 + #include <trace/events/rpcrdma.h> 52 55 53 56 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 54 57 # define RPCDBG_FACILITY RPCDBG_TRANS
+2 -1
net/sunrpc/xprtrdma/svc_rdma.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 1 2 /* 3 + * Copyright (c) 2015-2018 Oracle. All rights reserved. 2 4 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 3 5 * 4 6 * This software is available to you under a choice of one of two ··· 48 46 #include <linux/sunrpc/clnt.h> 49 47 #include <linux/sunrpc/sched.h> 50 48 #include <linux/sunrpc/svc_rdma.h> 51 - #include "xprt_rdma.h" 52 49 53 50 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 54 51
+22 -32
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* 3 - * Copyright (c) 2015 Oracle. All rights reserved. 3 + * Copyright (c) 2015-2018 Oracle. All rights reserved. 4 4 * 5 5 * Support for backward direction RPCs on RPC/RDMA (server-side). 6 6 */ 7 7 8 8 #include <linux/module.h> 9 + 9 10 #include <linux/sunrpc/svc_rdma.h> 11 + 10 12 #include "xprt_rdma.h" 13 + #include <trace/events/rpcrdma.h> 11 14 12 15 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 13 16 ··· 115 112 * the adapter has a small maximum SQ depth. 116 113 */ 117 114 static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, 118 - struct rpc_rqst *rqst) 115 + struct rpc_rqst *rqst, 116 + struct svc_rdma_send_ctxt *ctxt) 119 117 { 120 - struct svc_rdma_op_ctxt *ctxt; 121 118 int ret; 122 119 123 - ctxt = svc_rdma_get_context(rdma); 124 - 125 - /* rpcrdma_bc_send_request builds the transport header and 126 - * the backchannel RPC message in the same buffer. Thus only 127 - * one SGE is needed to send both. 128 - */ 129 - ret = svc_rdma_map_reply_hdr(rdma, ctxt, rqst->rq_buffer, 130 - rqst->rq_snd_buf.len); 120 + ret = svc_rdma_map_reply_msg(rdma, ctxt, &rqst->rq_snd_buf, NULL); 131 121 if (ret < 0) 132 - goto out_err; 122 + return -EIO; 133 123 134 124 /* Bump page refcnt so Send completion doesn't release 135 125 * the rq_buffer before all retransmits are complete. 136 126 */ 137 127 get_page(virt_to_page(rqst->rq_buffer)); 138 - ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); 139 - if (ret) 140 - goto out_unmap; 141 - 142 - out_err: 143 - dprintk("svcrdma: %s returns %d\n", __func__, ret); 144 - return ret; 145 - 146 - out_unmap: 147 - svc_rdma_unmap_dma(ctxt); 148 - svc_rdma_put_context(ctxt, 1); 149 - ret = -EIO; 150 - goto out_err; 128 + ctxt->sc_send_wr.opcode = IB_WR_SEND; 129 + return svc_rdma_send(rdma, &ctxt->sc_send_wr); 151 130 } 152 131 153 132 /* Server-side transport endpoint wants a whole page for its send ··· 176 191 { 177 192 struct rpc_xprt *xprt = rqst->rq_xprt; 178 193 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 194 + struct svc_rdma_send_ctxt *ctxt; 179 195 __be32 *p; 180 196 int rc; 181 197 182 - /* Space in the send buffer for an RPC/RDMA header is reserved 183 - * via xprt->tsh_size. 184 - */ 185 - p = rqst->rq_buffer; 198 + ctxt = svc_rdma_send_ctxt_get(rdma); 199 + if (!ctxt) 200 + goto drop_connection; 201 + 202 + p = ctxt->sc_xprt_buf; 186 203 *p++ = rqst->rq_xid; 187 204 *p++ = rpcrdma_version; 188 205 *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); ··· 192 205 *p++ = xdr_zero; 193 206 *p++ = xdr_zero; 194 207 *p = xdr_zero; 208 + svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_MIN); 195 209 196 210 #ifdef SVCRDMA_BACKCHANNEL_DEBUG 197 211 pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); 198 212 #endif 199 213 200 - rc = svc_rdma_bc_sendto(rdma, rqst); 201 - if (rc) 214 + rc = svc_rdma_bc_sendto(rdma, rqst, ctxt); 215 + if (rc) { 216 + svc_rdma_send_ctxt_put(rdma, ctxt); 202 217 goto drop_connection; 218 + } 203 219 return rc; 204 220 205 221 drop_connection: ··· 310 320 xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; 311 321 312 322 xprt->prot = XPRT_TRANSPORT_BC_RDMA; 313 - xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32); 323 + xprt->tsh_size = 0; 314 324 xprt->ops = &xprt_rdma_bc_procs; 315 325 316 326 memcpy(&xprt->addr, args->dstaddr, args->addrlen);
+314 -123
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) 2016, 2017 Oracle. All rights reserved. 3 + * Copyright (c) 2016-2018 Oracle. All rights reserved. 3 4 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 4 5 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 5 6 * ··· 61 60 * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's 62 61 * data payload from the client. svc_rdma_recvfrom sets up the 63 62 * RDMA Reads using pages in svc_rqst::rq_pages, which are 64 - * transferred to an svc_rdma_op_ctxt for the duration of the 63 + * transferred to an svc_rdma_recv_ctxt for the duration of the 65 64 * I/O. svc_rdma_recvfrom then returns zero, since the RPC message 66 65 * is still not yet ready. 67 66 * ··· 70 69 * svc_rdma_recvfrom again. This second call may use a different 71 70 * svc_rqst than the first one, thus any information that needs 72 71 * to be preserved across these two calls is kept in an 73 - * svc_rdma_op_ctxt. 72 + * svc_rdma_recv_ctxt. 74 73 * 75 74 * The second call to svc_rdma_recvfrom performs final assembly 76 75 * of the RPC Call message, using the RDMA Read sink pages kept in 77 - * the svc_rdma_op_ctxt. The xdr_buf is copied from the 78 - * svc_rdma_op_ctxt to the second svc_rqst. The second call returns 76 + * the svc_rdma_recv_ctxt. The xdr_buf is copied from the 77 + * svc_rdma_recv_ctxt to the second svc_rqst. The second call returns 79 78 * the length of the completed RPC Call message. 80 79 * 81 80 * Page Management 82 81 * 83 82 * Pages under I/O must be transferred from the first svc_rqst to an 84 - * svc_rdma_op_ctxt before the first svc_rdma_recvfrom call returns. 83 + * svc_rdma_recv_ctxt before the first svc_rdma_recvfrom call returns. 85 84 * 86 85 * The first svc_rqst supplies pages for RDMA Reads. These are moved 87 86 * from rqstp::rq_pages into ctxt::pages. The consumed elements of ··· 89 88 * svc_rdma_recvfrom call returns. 90 89 * 91 90 * During the second svc_rdma_recvfrom call, RDMA Read sink pages 92 - * are transferred from the svc_rdma_op_ctxt to the second svc_rqst 91 + * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst 93 92 * (see rdma_read_complete() below). 94 93 */ 95 94 95 + #include <linux/spinlock.h> 96 96 #include <asm/unaligned.h> 97 97 #include <rdma/ib_verbs.h> 98 98 #include <rdma/rdma_cm.h> 99 - 100 - #include <linux/spinlock.h> 101 99 102 100 #include <linux/sunrpc/xdr.h> 103 101 #include <linux/sunrpc/debug.h> 104 102 #include <linux/sunrpc/rpc_rdma.h> 105 103 #include <linux/sunrpc/svc_rdma.h> 106 104 105 + #include "xprt_rdma.h" 106 + #include <trace/events/rpcrdma.h> 107 + 107 108 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 108 109 109 - /* 110 - * Replace the pages in the rq_argpages array with the pages from the SGE in 111 - * the RDMA_RECV completion. The SGL should contain full pages up until the 112 - * last one. 113 - */ 114 - static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, 115 - struct svc_rdma_op_ctxt *ctxt) 110 + static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc); 111 + 112 + static inline struct svc_rdma_recv_ctxt * 113 + svc_rdma_next_recv_ctxt(struct list_head *list) 116 114 { 117 - struct page *page; 118 - int sge_no; 119 - u32 len; 115 + return list_first_entry_or_null(list, struct svc_rdma_recv_ctxt, 116 + rc_list); 117 + } 120 118 121 - /* The reply path assumes the Call's transport header resides 122 - * in rqstp->rq_pages[0]. 123 - */ 124 - page = ctxt->pages[0]; 125 - put_page(rqstp->rq_pages[0]); 126 - rqstp->rq_pages[0] = page; 119 + static struct svc_rdma_recv_ctxt * 120 + svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) 121 + { 122 + struct svc_rdma_recv_ctxt *ctxt; 123 + dma_addr_t addr; 124 + void *buffer; 127 125 128 - /* Set up the XDR head */ 129 - rqstp->rq_arg.head[0].iov_base = page_address(page); 130 - rqstp->rq_arg.head[0].iov_len = 131 - min_t(size_t, ctxt->byte_len, ctxt->sge[0].length); 132 - rqstp->rq_arg.len = ctxt->byte_len; 133 - rqstp->rq_arg.buflen = ctxt->byte_len; 126 + ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); 127 + if (!ctxt) 128 + goto fail0; 129 + buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); 130 + if (!buffer) 131 + goto fail1; 132 + addr = ib_dma_map_single(rdma->sc_pd->device, buffer, 133 + rdma->sc_max_req_size, DMA_FROM_DEVICE); 134 + if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) 135 + goto fail2; 134 136 135 - /* Compute bytes past head in the SGL */ 136 - len = ctxt->byte_len - rqstp->rq_arg.head[0].iov_len; 137 + ctxt->rc_recv_wr.next = NULL; 138 + ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe; 139 + ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge; 140 + ctxt->rc_recv_wr.num_sge = 1; 141 + ctxt->rc_cqe.done = svc_rdma_wc_receive; 142 + ctxt->rc_recv_sge.addr = addr; 143 + ctxt->rc_recv_sge.length = rdma->sc_max_req_size; 144 + ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; 145 + ctxt->rc_recv_buf = buffer; 146 + ctxt->rc_temp = false; 147 + return ctxt; 137 148 138 - /* If data remains, store it in the pagelist */ 139 - rqstp->rq_arg.page_len = len; 140 - rqstp->rq_arg.page_base = 0; 149 + fail2: 150 + kfree(buffer); 151 + fail1: 152 + kfree(ctxt); 153 + fail0: 154 + return NULL; 155 + } 141 156 142 - sge_no = 1; 143 - while (len && sge_no < ctxt->count) { 144 - page = ctxt->pages[sge_no]; 145 - put_page(rqstp->rq_pages[sge_no]); 146 - rqstp->rq_pages[sge_no] = page; 147 - len -= min_t(u32, len, ctxt->sge[sge_no].length); 148 - sge_no++; 157 + static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma, 158 + struct svc_rdma_recv_ctxt *ctxt) 159 + { 160 + ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr, 161 + ctxt->rc_recv_sge.length, DMA_FROM_DEVICE); 162 + kfree(ctxt->rc_recv_buf); 163 + kfree(ctxt); 164 + } 165 + 166 + /** 167 + * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt 168 + * @rdma: svcxprt_rdma being torn down 169 + * 170 + */ 171 + void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma) 172 + { 173 + struct svc_rdma_recv_ctxt *ctxt; 174 + 175 + while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) { 176 + list_del(&ctxt->rc_list); 177 + svc_rdma_recv_ctxt_destroy(rdma, ctxt); 149 178 } 150 - rqstp->rq_respages = &rqstp->rq_pages[sge_no]; 179 + } 180 + 181 + static struct svc_rdma_recv_ctxt * 182 + svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) 183 + { 184 + struct svc_rdma_recv_ctxt *ctxt; 185 + 186 + spin_lock(&rdma->sc_recv_lock); 187 + ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts); 188 + if (!ctxt) 189 + goto out_empty; 190 + list_del(&ctxt->rc_list); 191 + spin_unlock(&rdma->sc_recv_lock); 192 + 193 + out: 194 + ctxt->rc_page_count = 0; 195 + return ctxt; 196 + 197 + out_empty: 198 + spin_unlock(&rdma->sc_recv_lock); 199 + 200 + ctxt = svc_rdma_recv_ctxt_alloc(rdma); 201 + if (!ctxt) 202 + return NULL; 203 + goto out; 204 + } 205 + 206 + /** 207 + * svc_rdma_recv_ctxt_put - Return recv_ctxt to free list 208 + * @rdma: controlling svcxprt_rdma 209 + * @ctxt: object to return to the free list 210 + * 211 + */ 212 + void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, 213 + struct svc_rdma_recv_ctxt *ctxt) 214 + { 215 + unsigned int i; 216 + 217 + for (i = 0; i < ctxt->rc_page_count; i++) 218 + put_page(ctxt->rc_pages[i]); 219 + 220 + if (!ctxt->rc_temp) { 221 + spin_lock(&rdma->sc_recv_lock); 222 + list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts); 223 + spin_unlock(&rdma->sc_recv_lock); 224 + } else 225 + svc_rdma_recv_ctxt_destroy(rdma, ctxt); 226 + } 227 + 228 + static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, 229 + struct svc_rdma_recv_ctxt *ctxt) 230 + { 231 + struct ib_recv_wr *bad_recv_wr; 232 + int ret; 233 + 234 + svc_xprt_get(&rdma->sc_xprt); 235 + ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr); 236 + trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret); 237 + if (ret) 238 + goto err_post; 239 + return 0; 240 + 241 + err_post: 242 + svc_rdma_recv_ctxt_put(rdma, ctxt); 243 + svc_xprt_put(&rdma->sc_xprt); 244 + return ret; 245 + } 246 + 247 + static int svc_rdma_post_recv(struct svcxprt_rdma *rdma) 248 + { 249 + struct svc_rdma_recv_ctxt *ctxt; 250 + 251 + ctxt = svc_rdma_recv_ctxt_get(rdma); 252 + if (!ctxt) 253 + return -ENOMEM; 254 + return __svc_rdma_post_recv(rdma, ctxt); 255 + } 256 + 257 + /** 258 + * svc_rdma_post_recvs - Post initial set of Recv WRs 259 + * @rdma: fresh svcxprt_rdma 260 + * 261 + * Returns true if successful, otherwise false. 262 + */ 263 + bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) 264 + { 265 + struct svc_rdma_recv_ctxt *ctxt; 266 + unsigned int i; 267 + int ret; 268 + 269 + for (i = 0; i < rdma->sc_max_requests; i++) { 270 + ctxt = svc_rdma_recv_ctxt_get(rdma); 271 + if (!ctxt) 272 + return false; 273 + ctxt->rc_temp = true; 274 + ret = __svc_rdma_post_recv(rdma, ctxt); 275 + if (ret) { 276 + pr_err("svcrdma: failure posting recv buffers: %d\n", 277 + ret); 278 + return false; 279 + } 280 + } 281 + return true; 282 + } 283 + 284 + /** 285 + * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC 286 + * @cq: Completion Queue context 287 + * @wc: Work Completion object 288 + * 289 + * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that 290 + * the Receive completion handler could be running. 291 + */ 292 + static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) 293 + { 294 + struct svcxprt_rdma *rdma = cq->cq_context; 295 + struct ib_cqe *cqe = wc->wr_cqe; 296 + struct svc_rdma_recv_ctxt *ctxt; 297 + 298 + trace_svcrdma_wc_receive(wc); 299 + 300 + /* WARNING: Only wc->wr_cqe and wc->status are reliable */ 301 + ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); 302 + 303 + if (wc->status != IB_WC_SUCCESS) 304 + goto flushed; 305 + 306 + if (svc_rdma_post_recv(rdma)) 307 + goto post_err; 308 + 309 + /* All wc fields are now known to be valid */ 310 + ctxt->rc_byte_len = wc->byte_len; 311 + ib_dma_sync_single_for_cpu(rdma->sc_pd->device, 312 + ctxt->rc_recv_sge.addr, 313 + wc->byte_len, DMA_FROM_DEVICE); 314 + 315 + spin_lock(&rdma->sc_rq_dto_lock); 316 + list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q); 317 + spin_unlock(&rdma->sc_rq_dto_lock); 318 + set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); 319 + if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) 320 + svc_xprt_enqueue(&rdma->sc_xprt); 321 + goto out; 322 + 323 + flushed: 324 + if (wc->status != IB_WC_WR_FLUSH_ERR) 325 + pr_err("svcrdma: Recv: %s (%u/0x%x)\n", 326 + ib_wc_status_msg(wc->status), 327 + wc->status, wc->vendor_err); 328 + post_err: 329 + svc_rdma_recv_ctxt_put(rdma, ctxt); 330 + set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 331 + svc_xprt_enqueue(&rdma->sc_xprt); 332 + out: 333 + svc_xprt_put(&rdma->sc_xprt); 334 + } 335 + 336 + /** 337 + * svc_rdma_flush_recv_queues - Drain pending Receive work 338 + * @rdma: svcxprt_rdma being shut down 339 + * 340 + */ 341 + void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma) 342 + { 343 + struct svc_rdma_recv_ctxt *ctxt; 344 + 345 + while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) { 346 + list_del(&ctxt->rc_list); 347 + svc_rdma_recv_ctxt_put(rdma, ctxt); 348 + } 349 + while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) { 350 + list_del(&ctxt->rc_list); 351 + svc_rdma_recv_ctxt_put(rdma, ctxt); 352 + } 353 + } 354 + 355 + static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, 356 + struct svc_rdma_recv_ctxt *ctxt) 357 + { 358 + struct xdr_buf *arg = &rqstp->rq_arg; 359 + 360 + arg->head[0].iov_base = ctxt->rc_recv_buf; 361 + arg->head[0].iov_len = ctxt->rc_byte_len; 362 + arg->tail[0].iov_base = NULL; 363 + arg->tail[0].iov_len = 0; 364 + arg->page_len = 0; 365 + arg->page_base = 0; 366 + arg->buflen = ctxt->rc_byte_len; 367 + arg->len = ctxt->rc_byte_len; 368 + 369 + rqstp->rq_respages = &rqstp->rq_pages[0]; 151 370 rqstp->rq_next_page = rqstp->rq_respages + 1; 152 - 153 - /* If not all pages were used from the SGL, free the remaining ones */ 154 - len = sge_no; 155 - while (sge_no < ctxt->count) { 156 - page = ctxt->pages[sge_no++]; 157 - put_page(page); 158 - } 159 - ctxt->count = len; 160 - 161 - /* Set up tail */ 162 - rqstp->rq_arg.tail[0].iov_base = NULL; 163 - rqstp->rq_arg.tail[0].iov_len = 0; 164 371 } 165 372 166 373 /* This accommodates the largest possible Write chunk, ··· 503 294 { 504 295 __be32 *p, *end, *rdma_argp; 505 296 unsigned int hdr_len; 506 - char *proc; 507 297 508 298 /* Verify that there's enough bytes for header + something */ 509 299 if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) ··· 514 306 515 307 switch (*(rdma_argp + 3)) { 516 308 case rdma_msg: 517 - proc = "RDMA_MSG"; 518 309 break; 519 310 case rdma_nomsg: 520 - proc = "RDMA_NOMSG"; 521 311 break; 522 312 523 313 case rdma_done: ··· 545 339 hdr_len = (unsigned long)p - (unsigned long)rdma_argp; 546 340 rq_arg->head[0].iov_len -= hdr_len; 547 341 rq_arg->len -= hdr_len; 548 - dprintk("svcrdma: received %s request for XID 0x%08x, hdr_len=%u\n", 549 - proc, be32_to_cpup(rdma_argp), hdr_len); 342 + trace_svcrdma_decode_rqst(rdma_argp, hdr_len); 550 343 return hdr_len; 551 344 552 345 out_short: 553 - dprintk("svcrdma: header too short = %d\n", rq_arg->len); 346 + trace_svcrdma_decode_short(rq_arg->len); 554 347 return -EINVAL; 555 348 556 349 out_version: 557 - dprintk("svcrdma: bad xprt version: %u\n", 558 - be32_to_cpup(rdma_argp + 1)); 350 + trace_svcrdma_decode_badvers(rdma_argp); 559 351 return -EPROTONOSUPPORT; 560 352 561 353 out_drop: 562 - dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n"); 354 + trace_svcrdma_decode_drop(rdma_argp); 563 355 return 0; 564 356 565 357 out_proc: 566 - dprintk("svcrdma: bad rdma procedure (%u)\n", 567 - be32_to_cpup(rdma_argp + 3)); 358 + trace_svcrdma_decode_badproc(rdma_argp); 568 359 return -EINVAL; 569 360 570 361 out_inval: 571 - dprintk("svcrdma: failed to parse transport header\n"); 362 + trace_svcrdma_decode_parse(rdma_argp); 572 363 return -EINVAL; 573 364 } 574 365 575 366 static void rdma_read_complete(struct svc_rqst *rqstp, 576 - struct svc_rdma_op_ctxt *head) 367 + struct svc_rdma_recv_ctxt *head) 577 368 { 578 369 int page_no; 579 370 580 - /* Copy RPC pages */ 581 - for (page_no = 0; page_no < head->count; page_no++) { 371 + /* Move Read chunk pages to rqstp so that they will be released 372 + * when svc_process is done with them. 373 + */ 374 + for (page_no = 0; page_no < head->rc_page_count; page_no++) { 582 375 put_page(rqstp->rq_pages[page_no]); 583 - rqstp->rq_pages[page_no] = head->pages[page_no]; 376 + rqstp->rq_pages[page_no] = head->rc_pages[page_no]; 584 377 } 378 + head->rc_page_count = 0; 585 379 586 380 /* Point rq_arg.pages past header */ 587 - rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; 588 - rqstp->rq_arg.page_len = head->arg.page_len; 381 + rqstp->rq_arg.pages = &rqstp->rq_pages[head->rc_hdr_count]; 382 + rqstp->rq_arg.page_len = head->rc_arg.page_len; 589 383 590 384 /* rq_respages starts after the last arg page */ 591 385 rqstp->rq_respages = &rqstp->rq_pages[page_no]; 592 386 rqstp->rq_next_page = rqstp->rq_respages + 1; 593 387 594 388 /* Rebuild rq_arg head and tail. */ 595 - rqstp->rq_arg.head[0] = head->arg.head[0]; 596 - rqstp->rq_arg.tail[0] = head->arg.tail[0]; 597 - rqstp->rq_arg.len = head->arg.len; 598 - rqstp->rq_arg.buflen = head->arg.buflen; 389 + rqstp->rq_arg.head[0] = head->rc_arg.head[0]; 390 + rqstp->rq_arg.tail[0] = head->rc_arg.tail[0]; 391 + rqstp->rq_arg.len = head->rc_arg.len; 392 + rqstp->rq_arg.buflen = head->rc_arg.buflen; 599 393 } 600 394 601 395 static void svc_rdma_send_error(struct svcxprt_rdma *xprt, 602 396 __be32 *rdma_argp, int status) 603 397 { 604 - struct svc_rdma_op_ctxt *ctxt; 605 - __be32 *p, *err_msgp; 398 + struct svc_rdma_send_ctxt *ctxt; 606 399 unsigned int length; 607 - struct page *page; 400 + __be32 *p; 608 401 int ret; 609 402 610 - page = alloc_page(GFP_KERNEL); 611 - if (!page) 403 + ctxt = svc_rdma_send_ctxt_get(xprt); 404 + if (!ctxt) 612 405 return; 613 - err_msgp = page_address(page); 614 406 615 - p = err_msgp; 407 + p = ctxt->sc_xprt_buf; 616 408 *p++ = *rdma_argp; 617 409 *p++ = *(rdma_argp + 1); 618 410 *p++ = xprt->sc_fc_credits; 619 411 *p++ = rdma_error; 620 - if (status == -EPROTONOSUPPORT) { 412 + switch (status) { 413 + case -EPROTONOSUPPORT: 621 414 *p++ = err_vers; 622 415 *p++ = rpcrdma_version; 623 416 *p++ = rpcrdma_version; 624 - } else { 417 + trace_svcrdma_err_vers(*rdma_argp); 418 + break; 419 + default: 625 420 *p++ = err_chunk; 421 + trace_svcrdma_err_chunk(*rdma_argp); 626 422 } 627 - length = (unsigned long)p - (unsigned long)err_msgp; 423 + length = (unsigned long)p - (unsigned long)ctxt->sc_xprt_buf; 424 + svc_rdma_sync_reply_hdr(xprt, ctxt, length); 628 425 629 - /* Map transport header; no RPC message payload */ 630 - ctxt = svc_rdma_get_context(xprt); 631 - ret = svc_rdma_map_reply_hdr(xprt, ctxt, err_msgp, length); 632 - if (ret) { 633 - dprintk("svcrdma: Error %d mapping send for protocol error\n", 634 - ret); 635 - return; 636 - } 637 - 638 - ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0); 639 - if (ret) { 640 - dprintk("svcrdma: Error %d posting send for protocol error\n", 641 - ret); 642 - svc_rdma_unmap_dma(ctxt); 643 - svc_rdma_put_context(ctxt, 1); 644 - } 426 + ctxt->sc_send_wr.opcode = IB_WR_SEND; 427 + ret = svc_rdma_send(xprt, &ctxt->sc_send_wr); 428 + if (ret) 429 + svc_rdma_send_ctxt_put(xprt, ctxt); 645 430 } 646 431 647 432 /* By convention, backchannel calls arrive via rdma_msg type ··· 704 507 struct svc_xprt *xprt = rqstp->rq_xprt; 705 508 struct svcxprt_rdma *rdma_xprt = 706 509 container_of(xprt, struct svcxprt_rdma, sc_xprt); 707 - struct svc_rdma_op_ctxt *ctxt; 510 + struct svc_rdma_recv_ctxt *ctxt; 708 511 __be32 *p; 709 512 int ret; 710 513 711 514 spin_lock(&rdma_xprt->sc_rq_dto_lock); 712 - if (!list_empty(&rdma_xprt->sc_read_complete_q)) { 713 - ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q, 714 - struct svc_rdma_op_ctxt, list); 715 - list_del(&ctxt->list); 515 + ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q); 516 + if (ctxt) { 517 + list_del(&ctxt->rc_list); 716 518 spin_unlock(&rdma_xprt->sc_rq_dto_lock); 717 519 rdma_read_complete(rqstp, ctxt); 718 520 goto complete; 719 - } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { 720 - ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q, 721 - struct svc_rdma_op_ctxt, list); 722 - list_del(&ctxt->list); 723 - } else { 521 + } 522 + ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q); 523 + if (!ctxt) { 724 524 /* No new incoming requests, terminate the loop */ 725 525 clear_bit(XPT_DATA, &xprt->xpt_flags); 726 526 spin_unlock(&rdma_xprt->sc_rq_dto_lock); 727 527 return 0; 728 528 } 529 + list_del(&ctxt->rc_list); 729 530 spin_unlock(&rdma_xprt->sc_rq_dto_lock); 730 531 731 - dprintk("svcrdma: recvfrom: ctxt=%p on xprt=%p, rqstp=%p\n", 732 - ctxt, rdma_xprt, rqstp); 733 532 atomic_inc(&rdma_stat_recv); 734 533 735 534 svc_rdma_build_arg_xdr(rqstp, ctxt); ··· 741 548 if (svc_rdma_is_backchannel_reply(xprt, p)) { 742 549 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p, 743 550 &rqstp->rq_arg); 744 - svc_rdma_put_context(ctxt, 0); 551 + svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 745 552 return ret; 746 553 } 747 554 ··· 750 557 goto out_readchunk; 751 558 752 559 complete: 753 - svc_rdma_put_context(ctxt, 0); 754 - dprintk("svcrdma: recvfrom: xprt=%p, rqstp=%p, rq_arg.len=%u\n", 755 - rdma_xprt, rqstp, rqstp->rq_arg.len); 560 + rqstp->rq_xprt_ctxt = ctxt; 756 561 rqstp->rq_prot = IPPROTO_MAX; 757 562 svc_xprt_copy_addrs(rqstp, xprt); 758 563 return rqstp->rq_arg.len; ··· 763 572 764 573 out_err: 765 574 svc_rdma_send_error(rdma_xprt, p, ret); 766 - svc_rdma_put_context(ctxt, 0); 575 + svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 767 576 return 0; 768 577 769 578 out_postfail: 770 579 if (ret == -EINVAL) 771 580 svc_rdma_send_error(rdma_xprt, p, ret); 772 - svc_rdma_put_context(ctxt, 1); 581 + svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 773 582 return ret; 774 583 775 584 out_drop: 776 - svc_rdma_put_context(ctxt, 1); 585 + svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 777 586 return 0; 778 587 }
+67 -66
net/sunrpc/xprtrdma/svc_rdma_rw.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* 3 - * Copyright (c) 2016 Oracle. All rights reserved. 3 + * Copyright (c) 2016-2018 Oracle. All rights reserved. 4 4 * 5 5 * Use the core R/W API to move RPC-over-RDMA Read and Write chunks. 6 6 */ 7 + 8 + #include <rdma/rw.h> 7 9 8 10 #include <linux/sunrpc/rpc_rdma.h> 9 11 #include <linux/sunrpc/svc_rdma.h> 10 12 #include <linux/sunrpc/debug.h> 11 13 12 - #include <rdma/rw.h> 14 + #include "xprt_rdma.h" 15 + #include <trace/events/rpcrdma.h> 13 16 14 17 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 15 18 ··· 208 205 struct svc_rdma_write_info *info = 209 206 container_of(cc, struct svc_rdma_write_info, wi_cc); 210 207 208 + trace_svcrdma_wc_write(wc); 209 + 211 210 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); 212 211 wake_up(&rdma->sc_send_wait); 213 212 ··· 227 222 /* State for pulling a Read chunk. 228 223 */ 229 224 struct svc_rdma_read_info { 230 - struct svc_rdma_op_ctxt *ri_readctxt; 225 + struct svc_rdma_recv_ctxt *ri_readctxt; 231 226 unsigned int ri_position; 232 227 unsigned int ri_pageno; 233 228 unsigned int ri_pageoff; ··· 271 266 struct svc_rdma_read_info *info = 272 267 container_of(cc, struct svc_rdma_read_info, ri_cc); 273 268 269 + trace_svcrdma_wc_read(wc); 270 + 274 271 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); 275 272 wake_up(&rdma->sc_send_wait); 276 273 ··· 282 275 pr_err("svcrdma: read ctx: %s (%u/0x%x)\n", 283 276 ib_wc_status_msg(wc->status), 284 277 wc->status, wc->vendor_err); 285 - svc_rdma_put_context(info->ri_readctxt, 1); 278 + svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt); 286 279 } else { 287 280 spin_lock(&rdma->sc_rq_dto_lock); 288 - list_add_tail(&info->ri_readctxt->list, 281 + list_add_tail(&info->ri_readctxt->rc_list, 289 282 &rdma->sc_read_complete_q); 290 283 spin_unlock(&rdma->sc_rq_dto_lock); 291 284 ··· 330 323 if (atomic_sub_return(cc->cc_sqecount, 331 324 &rdma->sc_sq_avail) > 0) { 332 325 ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); 326 + trace_svcrdma_post_rw(&cc->cc_cqe, 327 + cc->cc_sqecount, ret); 333 328 if (ret) 334 329 break; 335 330 return 0; 336 331 } 337 332 338 - atomic_inc(&rdma_stat_sq_starve); 333 + trace_svcrdma_sq_full(rdma); 339 334 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); 340 335 wait_event(rdma->sc_send_wait, 341 336 atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount); 337 + trace_svcrdma_sq_retry(rdma); 342 338 } while (1); 343 339 344 - pr_err("svcrdma: ib_post_send failed (%d)\n", ret); 345 340 set_bit(XPT_CLOSE, &xprt->xpt_flags); 346 341 347 342 /* If even one was posted, there will be a completion. */ ··· 446 437 if (ret < 0) 447 438 goto out_initerr; 448 439 440 + trace_svcrdma_encode_wseg(seg_handle, write_len, seg_offset); 449 441 list_add(&ctxt->rw_list, &cc->cc_rwctxts); 450 442 cc->cc_sqecount += ret; 451 443 if (write_len == seg_length - info->wi_seg_off) { ··· 472 462 473 463 out_initerr: 474 464 svc_rdma_put_rw_ctxt(rdma, ctxt); 475 - pr_err("svcrdma: failed to map pagelist (%d)\n", ret); 465 + trace_svcrdma_dma_map_rwctx(rdma, ret); 476 466 return -EIO; 477 467 } 478 468 ··· 536 526 ret = svc_rdma_post_chunk_ctxt(&info->wi_cc); 537 527 if (ret < 0) 538 528 goto out_err; 529 + 530 + trace_svcrdma_encode_write(xdr->page_len); 539 531 return xdr->page_len; 540 532 541 533 out_err: ··· 594 582 ret = svc_rdma_post_chunk_ctxt(&info->wi_cc); 595 583 if (ret < 0) 596 584 goto out_err; 585 + 586 + trace_svcrdma_encode_reply(consumed); 597 587 return consumed; 598 588 599 589 out_err: ··· 607 593 struct svc_rqst *rqstp, 608 594 u32 rkey, u32 len, u64 offset) 609 595 { 610 - struct svc_rdma_op_ctxt *head = info->ri_readctxt; 596 + struct svc_rdma_recv_ctxt *head = info->ri_readctxt; 611 597 struct svc_rdma_chunk_ctxt *cc = &info->ri_cc; 612 598 struct svc_rdma_rw_ctxt *ctxt; 613 599 unsigned int sge_no, seg_len; ··· 620 606 goto out_noctx; 621 607 ctxt->rw_nents = sge_no; 622 608 623 - dprintk("svcrdma: reading segment %u@0x%016llx:0x%08x (%u sges)\n", 624 - len, offset, rkey, sge_no); 625 - 626 609 sg = ctxt->rw_sg_table.sgl; 627 610 for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) { 628 611 seg_len = min_t(unsigned int, len, 629 612 PAGE_SIZE - info->ri_pageoff); 630 613 631 - head->arg.pages[info->ri_pageno] = 614 + head->rc_arg.pages[info->ri_pageno] = 632 615 rqstp->rq_pages[info->ri_pageno]; 633 616 if (!info->ri_pageoff) 634 - head->count++; 617 + head->rc_page_count++; 635 618 636 619 sg_set_page(sg, rqstp->rq_pages[info->ri_pageno], 637 620 seg_len, info->ri_pageoff); ··· 667 656 return -EINVAL; 668 657 669 658 out_initerr: 659 + trace_svcrdma_dma_map_rwctx(cc->cc_rdma, ret); 670 660 svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt); 671 - pr_err("svcrdma: failed to map pagelist (%d)\n", ret); 672 661 return -EIO; 673 662 } 674 663 ··· 697 686 if (ret < 0) 698 687 break; 699 688 689 + trace_svcrdma_encode_rseg(rs_handle, rs_length, rs_offset); 700 690 info->ri_chunklen += rs_length; 701 691 } 702 692 ··· 705 693 } 706 694 707 695 /* Construct RDMA Reads to pull over a normal Read chunk. The chunk 708 - * data lands in the page list of head->arg.pages. 696 + * data lands in the page list of head->rc_arg.pages. 709 697 * 710 - * Currently NFSD does not look at the head->arg.tail[0] iovec. 698 + * Currently NFSD does not look at the head->rc_arg.tail[0] iovec. 711 699 * Therefore, XDR round-up of the Read chunk and trailing 712 700 * inline content must both be added at the end of the pagelist. 713 701 */ ··· 715 703 struct svc_rdma_read_info *info, 716 704 __be32 *p) 717 705 { 718 - struct svc_rdma_op_ctxt *head = info->ri_readctxt; 706 + struct svc_rdma_recv_ctxt *head = info->ri_readctxt; 719 707 int ret; 720 - 721 - dprintk("svcrdma: Reading Read chunk at position %u\n", 722 - info->ri_position); 723 - 724 - info->ri_pageno = head->hdr_count; 725 - info->ri_pageoff = 0; 726 708 727 709 ret = svc_rdma_build_read_chunk(rqstp, info, p); 728 710 if (ret < 0) 729 711 goto out; 712 + 713 + trace_svcrdma_encode_read(info->ri_chunklen, info->ri_position); 714 + 715 + head->rc_hdr_count = 0; 730 716 731 717 /* Split the Receive buffer between the head and tail 732 718 * buffers at Read chunk's position. XDR roundup of the 733 719 * chunk is not included in either the pagelist or in 734 720 * the tail. 735 721 */ 736 - head->arg.tail[0].iov_base = 737 - head->arg.head[0].iov_base + info->ri_position; 738 - head->arg.tail[0].iov_len = 739 - head->arg.head[0].iov_len - info->ri_position; 740 - head->arg.head[0].iov_len = info->ri_position; 722 + head->rc_arg.tail[0].iov_base = 723 + head->rc_arg.head[0].iov_base + info->ri_position; 724 + head->rc_arg.tail[0].iov_len = 725 + head->rc_arg.head[0].iov_len - info->ri_position; 726 + head->rc_arg.head[0].iov_len = info->ri_position; 741 727 742 728 /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2). 743 729 * ··· 748 738 */ 749 739 info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2; 750 740 751 - head->arg.page_len = info->ri_chunklen; 752 - head->arg.len += info->ri_chunklen; 753 - head->arg.buflen += info->ri_chunklen; 741 + head->rc_arg.page_len = info->ri_chunklen; 742 + head->rc_arg.len += info->ri_chunklen; 743 + head->rc_arg.buflen += info->ri_chunklen; 754 744 755 745 out: 756 746 return ret; ··· 759 749 /* Construct RDMA Reads to pull over a Position Zero Read chunk. 760 750 * The start of the data lands in the first page just after 761 751 * the Transport header, and the rest lands in the page list of 762 - * head->arg.pages. 752 + * head->rc_arg.pages. 763 753 * 764 754 * Assumptions: 765 755 * - A PZRC has an XDR-aligned length (no implicit round-up). ··· 771 761 struct svc_rdma_read_info *info, 772 762 __be32 *p) 773 763 { 774 - struct svc_rdma_op_ctxt *head = info->ri_readctxt; 764 + struct svc_rdma_recv_ctxt *head = info->ri_readctxt; 775 765 int ret; 776 - 777 - dprintk("svcrdma: Reading Position Zero Read chunk\n"); 778 - 779 - info->ri_pageno = head->hdr_count - 1; 780 - info->ri_pageoff = offset_in_page(head->byte_len); 781 766 782 767 ret = svc_rdma_build_read_chunk(rqstp, info, p); 783 768 if (ret < 0) 784 769 goto out; 785 770 786 - head->arg.len += info->ri_chunklen; 787 - head->arg.buflen += info->ri_chunklen; 771 + trace_svcrdma_encode_pzr(info->ri_chunklen); 788 772 789 - if (head->arg.buflen <= head->sge[0].length) { 790 - /* Transport header and RPC message fit entirely 791 - * in page where head iovec resides. 792 - */ 793 - head->arg.head[0].iov_len = info->ri_chunklen; 794 - } else { 795 - /* Transport header and part of RPC message reside 796 - * in the head iovec's page. 797 - */ 798 - head->arg.head[0].iov_len = 799 - head->sge[0].length - head->byte_len; 800 - head->arg.page_len = 801 - info->ri_chunklen - head->arg.head[0].iov_len; 802 - } 773 + head->rc_arg.len += info->ri_chunklen; 774 + head->rc_arg.buflen += info->ri_chunklen; 775 + 776 + head->rc_hdr_count = 1; 777 + head->rc_arg.head[0].iov_base = page_address(head->rc_pages[0]); 778 + head->rc_arg.head[0].iov_len = min_t(size_t, PAGE_SIZE, 779 + info->ri_chunklen); 780 + 781 + head->rc_arg.page_len = info->ri_chunklen - 782 + head->rc_arg.head[0].iov_len; 803 783 804 784 out: 805 785 return ret; ··· 813 813 * - All Read segments in @p have the same Position value. 814 814 */ 815 815 int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, 816 - struct svc_rdma_op_ctxt *head, __be32 *p) 816 + struct svc_rdma_recv_ctxt *head, __be32 *p) 817 817 { 818 818 struct svc_rdma_read_info *info; 819 819 struct page **page; 820 820 int ret; 821 821 822 822 /* The request (with page list) is constructed in 823 - * head->arg. Pages involved with RDMA Read I/O are 823 + * head->rc_arg. Pages involved with RDMA Read I/O are 824 824 * transferred there. 825 825 */ 826 - head->hdr_count = head->count; 827 - head->arg.head[0] = rqstp->rq_arg.head[0]; 828 - head->arg.tail[0] = rqstp->rq_arg.tail[0]; 829 - head->arg.pages = head->pages; 830 - head->arg.page_base = 0; 831 - head->arg.page_len = 0; 832 - head->arg.len = rqstp->rq_arg.len; 833 - head->arg.buflen = rqstp->rq_arg.buflen; 826 + head->rc_arg.head[0] = rqstp->rq_arg.head[0]; 827 + head->rc_arg.tail[0] = rqstp->rq_arg.tail[0]; 828 + head->rc_arg.pages = head->rc_pages; 829 + head->rc_arg.page_base = 0; 830 + head->rc_arg.page_len = 0; 831 + head->rc_arg.len = rqstp->rq_arg.len; 832 + head->rc_arg.buflen = rqstp->rq_arg.buflen; 834 833 835 834 info = svc_rdma_read_info_alloc(rdma); 836 835 if (!info) 837 836 return -ENOMEM; 838 837 info->ri_readctxt = head; 838 + info->ri_pageno = 0; 839 + info->ri_pageoff = 0; 839 840 840 841 info->ri_position = be32_to_cpup(p + 1); 841 842 if (info->ri_position) ··· 857 856 858 857 out: 859 858 /* Read sink pages have been moved from rqstp->rq_pages to 860 - * head->arg.pages. Force svc_recv to refill those slots 859 + * head->rc_arg.pages. Force svc_recv to refill those slots 861 860 * in rq_pages. 862 861 */ 863 862 for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++)
+329 -189
net/sunrpc/xprtrdma/svc_rdma_sendto.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) 2016 Oracle. All rights reserved. 3 + * Copyright (c) 2016-2018 Oracle. All rights reserved. 3 4 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 4 5 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 5 6 * ··· 75 74 * DMA-unmap the pages under I/O for that Write segment. The Write 76 75 * completion handler does not release any pages. 77 76 * 78 - * When the Send WR is constructed, it also gets its own svc_rdma_op_ctxt. 77 + * When the Send WR is constructed, it also gets its own svc_rdma_send_ctxt. 79 78 * The ownership of all of the Reply's pages are transferred into that 80 79 * ctxt, the Send WR is posted, and sendto returns. 81 80 * 82 - * The svc_rdma_op_ctxt is presented when the Send WR completes. The 81 + * The svc_rdma_send_ctxt is presented when the Send WR completes. The 83 82 * Send completion handler finally releases the Reply's pages. 84 83 * 85 84 * This mechanism also assumes that completions on the transport's Send ··· 99 98 * where two different Write segments send portions of the same page. 100 99 */ 101 100 102 - #include <linux/sunrpc/debug.h> 103 - #include <linux/sunrpc/rpc_rdma.h> 104 101 #include <linux/spinlock.h> 105 102 #include <asm/unaligned.h> 103 + 106 104 #include <rdma/ib_verbs.h> 107 105 #include <rdma/rdma_cm.h> 106 + 107 + #include <linux/sunrpc/debug.h> 108 + #include <linux/sunrpc/rpc_rdma.h> 108 109 #include <linux/sunrpc/svc_rdma.h> 109 110 111 + #include "xprt_rdma.h" 112 + #include <trace/events/rpcrdma.h> 113 + 110 114 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 115 + 116 + static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc); 117 + 118 + static inline struct svc_rdma_send_ctxt * 119 + svc_rdma_next_send_ctxt(struct list_head *list) 120 + { 121 + return list_first_entry_or_null(list, struct svc_rdma_send_ctxt, 122 + sc_list); 123 + } 124 + 125 + static struct svc_rdma_send_ctxt * 126 + svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) 127 + { 128 + struct svc_rdma_send_ctxt *ctxt; 129 + dma_addr_t addr; 130 + void *buffer; 131 + size_t size; 132 + int i; 133 + 134 + size = sizeof(*ctxt); 135 + size += rdma->sc_max_send_sges * sizeof(struct ib_sge); 136 + ctxt = kmalloc(size, GFP_KERNEL); 137 + if (!ctxt) 138 + goto fail0; 139 + buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); 140 + if (!buffer) 141 + goto fail1; 142 + addr = ib_dma_map_single(rdma->sc_pd->device, buffer, 143 + rdma->sc_max_req_size, DMA_TO_DEVICE); 144 + if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) 145 + goto fail2; 146 + 147 + ctxt->sc_send_wr.next = NULL; 148 + ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; 149 + ctxt->sc_send_wr.sg_list = ctxt->sc_sges; 150 + ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; 151 + ctxt->sc_cqe.done = svc_rdma_wc_send; 152 + ctxt->sc_xprt_buf = buffer; 153 + ctxt->sc_sges[0].addr = addr; 154 + 155 + for (i = 0; i < rdma->sc_max_send_sges; i++) 156 + ctxt->sc_sges[i].lkey = rdma->sc_pd->local_dma_lkey; 157 + return ctxt; 158 + 159 + fail2: 160 + kfree(buffer); 161 + fail1: 162 + kfree(ctxt); 163 + fail0: 164 + return NULL; 165 + } 166 + 167 + /** 168 + * svc_rdma_send_ctxts_destroy - Release all send_ctxt's for an xprt 169 + * @rdma: svcxprt_rdma being torn down 170 + * 171 + */ 172 + void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma) 173 + { 174 + struct svc_rdma_send_ctxt *ctxt; 175 + 176 + while ((ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts))) { 177 + list_del(&ctxt->sc_list); 178 + ib_dma_unmap_single(rdma->sc_pd->device, 179 + ctxt->sc_sges[0].addr, 180 + rdma->sc_max_req_size, 181 + DMA_TO_DEVICE); 182 + kfree(ctxt->sc_xprt_buf); 183 + kfree(ctxt); 184 + } 185 + } 186 + 187 + /** 188 + * svc_rdma_send_ctxt_get - Get a free send_ctxt 189 + * @rdma: controlling svcxprt_rdma 190 + * 191 + * Returns a ready-to-use send_ctxt, or NULL if none are 192 + * available and a fresh one cannot be allocated. 193 + */ 194 + struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma) 195 + { 196 + struct svc_rdma_send_ctxt *ctxt; 197 + 198 + spin_lock(&rdma->sc_send_lock); 199 + ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts); 200 + if (!ctxt) 201 + goto out_empty; 202 + list_del(&ctxt->sc_list); 203 + spin_unlock(&rdma->sc_send_lock); 204 + 205 + out: 206 + ctxt->sc_send_wr.num_sge = 0; 207 + ctxt->sc_cur_sge_no = 0; 208 + ctxt->sc_page_count = 0; 209 + return ctxt; 210 + 211 + out_empty: 212 + spin_unlock(&rdma->sc_send_lock); 213 + ctxt = svc_rdma_send_ctxt_alloc(rdma); 214 + if (!ctxt) 215 + return NULL; 216 + goto out; 217 + } 218 + 219 + /** 220 + * svc_rdma_send_ctxt_put - Return send_ctxt to free list 221 + * @rdma: controlling svcxprt_rdma 222 + * @ctxt: object to return to the free list 223 + * 224 + * Pages left in sc_pages are DMA unmapped and released. 225 + */ 226 + void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, 227 + struct svc_rdma_send_ctxt *ctxt) 228 + { 229 + struct ib_device *device = rdma->sc_cm_id->device; 230 + unsigned int i; 231 + 232 + /* The first SGE contains the transport header, which 233 + * remains mapped until @ctxt is destroyed. 234 + */ 235 + for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) 236 + ib_dma_unmap_page(device, 237 + ctxt->sc_sges[i].addr, 238 + ctxt->sc_sges[i].length, 239 + DMA_TO_DEVICE); 240 + 241 + for (i = 0; i < ctxt->sc_page_count; ++i) 242 + put_page(ctxt->sc_pages[i]); 243 + 244 + spin_lock(&rdma->sc_send_lock); 245 + list_add(&ctxt->sc_list, &rdma->sc_send_ctxts); 246 + spin_unlock(&rdma->sc_send_lock); 247 + } 248 + 249 + /** 250 + * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC 251 + * @cq: Completion Queue context 252 + * @wc: Work Completion object 253 + * 254 + * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that 255 + * the Send completion handler could be running. 256 + */ 257 + static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) 258 + { 259 + struct svcxprt_rdma *rdma = cq->cq_context; 260 + struct ib_cqe *cqe = wc->wr_cqe; 261 + struct svc_rdma_send_ctxt *ctxt; 262 + 263 + trace_svcrdma_wc_send(wc); 264 + 265 + atomic_inc(&rdma->sc_sq_avail); 266 + wake_up(&rdma->sc_send_wait); 267 + 268 + ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); 269 + svc_rdma_send_ctxt_put(rdma, ctxt); 270 + 271 + if (unlikely(wc->status != IB_WC_SUCCESS)) { 272 + set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 273 + svc_xprt_enqueue(&rdma->sc_xprt); 274 + if (wc->status != IB_WC_WR_FLUSH_ERR) 275 + pr_err("svcrdma: Send: %s (%u/0x%x)\n", 276 + ib_wc_status_msg(wc->status), 277 + wc->status, wc->vendor_err); 278 + } 279 + 280 + svc_xprt_put(&rdma->sc_xprt); 281 + } 282 + 283 + /** 284 + * svc_rdma_send - Post a single Send WR 285 + * @rdma: transport on which to post the WR 286 + * @wr: prepared Send WR to post 287 + * 288 + * Returns zero the Send WR was posted successfully. Otherwise, a 289 + * negative errno is returned. 290 + */ 291 + int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) 292 + { 293 + struct ib_send_wr *bad_wr; 294 + int ret; 295 + 296 + might_sleep(); 297 + 298 + /* If the SQ is full, wait until an SQ entry is available */ 299 + while (1) { 300 + if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) { 301 + atomic_inc(&rdma_stat_sq_starve); 302 + trace_svcrdma_sq_full(rdma); 303 + atomic_inc(&rdma->sc_sq_avail); 304 + wait_event(rdma->sc_send_wait, 305 + atomic_read(&rdma->sc_sq_avail) > 1); 306 + if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) 307 + return -ENOTCONN; 308 + trace_svcrdma_sq_retry(rdma); 309 + continue; 310 + } 311 + 312 + svc_xprt_get(&rdma->sc_xprt); 313 + ret = ib_post_send(rdma->sc_qp, wr, &bad_wr); 314 + trace_svcrdma_post_send(wr, ret); 315 + if (ret) { 316 + set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 317 + svc_xprt_put(&rdma->sc_xprt); 318 + wake_up(&rdma->sc_send_wait); 319 + } 320 + break; 321 + } 322 + return ret; 323 + } 111 324 112 325 static u32 xdr_padsize(u32 len) 113 326 { ··· 511 296 return be32_to_cpup(p); 512 297 } 513 298 514 - /* ib_dma_map_page() is used here because svc_rdma_dma_unmap() 515 - * is used during completion to DMA-unmap this memory, and 516 - * it uses ib_dma_unmap_page() exclusively. 517 - */ 518 - static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, 519 - struct svc_rdma_op_ctxt *ctxt, 520 - unsigned int sge_no, 521 - unsigned char *base, 522 - unsigned int len) 523 - { 524 - unsigned long offset = (unsigned long)base & ~PAGE_MASK; 525 - struct ib_device *dev = rdma->sc_cm_id->device; 526 - dma_addr_t dma_addr; 527 - 528 - dma_addr = ib_dma_map_page(dev, virt_to_page(base), 529 - offset, len, DMA_TO_DEVICE); 530 - if (ib_dma_mapping_error(dev, dma_addr)) 531 - goto out_maperr; 532 - 533 - ctxt->sge[sge_no].addr = dma_addr; 534 - ctxt->sge[sge_no].length = len; 535 - ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; 536 - svc_rdma_count_mappings(rdma, ctxt); 537 - return 0; 538 - 539 - out_maperr: 540 - pr_err("svcrdma: failed to map buffer\n"); 541 - return -EIO; 542 - } 543 - 544 299 static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, 545 - struct svc_rdma_op_ctxt *ctxt, 546 - unsigned int sge_no, 300 + struct svc_rdma_send_ctxt *ctxt, 547 301 struct page *page, 548 - unsigned int offset, 302 + unsigned long offset, 549 303 unsigned int len) 550 304 { 551 305 struct ib_device *dev = rdma->sc_cm_id->device; ··· 524 340 if (ib_dma_mapping_error(dev, dma_addr)) 525 341 goto out_maperr; 526 342 527 - ctxt->sge[sge_no].addr = dma_addr; 528 - ctxt->sge[sge_no].length = len; 529 - ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; 530 - svc_rdma_count_mappings(rdma, ctxt); 343 + ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr; 344 + ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len; 345 + ctxt->sc_send_wr.num_sge++; 531 346 return 0; 532 347 533 348 out_maperr: 534 - pr_err("svcrdma: failed to map page\n"); 349 + trace_svcrdma_dma_map_page(rdma, page); 535 350 return -EIO; 536 351 } 537 352 538 - /** 539 - * svc_rdma_map_reply_hdr - DMA map the transport header buffer 540 - * @rdma: controlling transport 541 - * @ctxt: op_ctxt for the Send WR 542 - * @rdma_resp: buffer containing transport header 543 - * @len: length of transport header 544 - * 545 - * Returns: 546 - * %0 if the header is DMA mapped, 547 - * %-EIO if DMA mapping failed. 353 + /* ib_dma_map_page() is used here because svc_rdma_dma_unmap() 354 + * handles DMA-unmap and it uses ib_dma_unmap_page() exclusively. 548 355 */ 549 - int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma, 550 - struct svc_rdma_op_ctxt *ctxt, 551 - __be32 *rdma_resp, 552 - unsigned int len) 356 + static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, 357 + struct svc_rdma_send_ctxt *ctxt, 358 + unsigned char *base, 359 + unsigned int len) 553 360 { 554 - ctxt->direction = DMA_TO_DEVICE; 555 - ctxt->pages[0] = virt_to_page(rdma_resp); 556 - ctxt->count = 1; 557 - return svc_rdma_dma_map_page(rdma, ctxt, 0, ctxt->pages[0], 0, len); 361 + return svc_rdma_dma_map_page(rdma, ctxt, virt_to_page(base), 362 + offset_in_page(base), len); 558 363 } 559 364 560 - /* Load the xdr_buf into the ctxt's sge array, and DMA map each 365 + /** 366 + * svc_rdma_sync_reply_hdr - DMA sync the transport header buffer 367 + * @rdma: controlling transport 368 + * @ctxt: send_ctxt for the Send WR 369 + * @len: length of transport header 370 + * 371 + */ 372 + void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, 373 + struct svc_rdma_send_ctxt *ctxt, 374 + unsigned int len) 375 + { 376 + ctxt->sc_sges[0].length = len; 377 + ctxt->sc_send_wr.num_sge++; 378 + ib_dma_sync_single_for_device(rdma->sc_pd->device, 379 + ctxt->sc_sges[0].addr, len, 380 + DMA_TO_DEVICE); 381 + } 382 + 383 + /* svc_rdma_map_reply_msg - Map the buffer holding RPC message 384 + * @rdma: controlling transport 385 + * @ctxt: send_ctxt for the Send WR 386 + * @xdr: prepared xdr_buf containing RPC message 387 + * @wr_lst: pointer to Call header's Write list, or NULL 388 + * 389 + * Load the xdr_buf into the ctxt's sge array, and DMA map each 561 390 * element as it is added. 562 391 * 563 - * Returns the number of sge elements loaded on success, or 564 - * a negative errno on failure. 392 + * Returns zero on success, or a negative errno on failure. 565 393 */ 566 - static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, 567 - struct svc_rdma_op_ctxt *ctxt, 568 - struct xdr_buf *xdr, __be32 *wr_lst) 394 + int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, 395 + struct svc_rdma_send_ctxt *ctxt, 396 + struct xdr_buf *xdr, __be32 *wr_lst) 569 397 { 570 - unsigned int len, sge_no, remaining, page_off; 398 + unsigned int len, remaining; 399 + unsigned long page_off; 571 400 struct page **ppages; 572 401 unsigned char *base; 573 402 u32 xdr_pad; 574 403 int ret; 575 404 576 - sge_no = 1; 577 - 578 - ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, 405 + if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) 406 + return -EIO; 407 + ret = svc_rdma_dma_map_buf(rdma, ctxt, 579 408 xdr->head[0].iov_base, 580 409 xdr->head[0].iov_len); 581 410 if (ret < 0) ··· 618 421 while (remaining) { 619 422 len = min_t(u32, PAGE_SIZE - page_off, remaining); 620 423 621 - ret = svc_rdma_dma_map_page(rdma, ctxt, sge_no++, 622 - *ppages++, page_off, len); 424 + if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) 425 + return -EIO; 426 + ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, 427 + page_off, len); 623 428 if (ret < 0) 624 429 return ret; 625 430 ··· 633 434 len = xdr->tail[0].iov_len; 634 435 tail: 635 436 if (len) { 636 - ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, base, len); 437 + if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) 438 + return -EIO; 439 + ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); 637 440 if (ret < 0) 638 441 return ret; 639 442 } 640 443 641 - return sge_no - 1; 444 + return 0; 642 445 } 643 446 644 447 /* The svc_rqst and all resources it owns are released as soon as ··· 648 447 * so they are released by the Send completion handler. 649 448 */ 650 449 static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, 651 - struct svc_rdma_op_ctxt *ctxt) 450 + struct svc_rdma_send_ctxt *ctxt) 652 451 { 653 452 int i, pages = rqstp->rq_next_page - rqstp->rq_respages; 654 453 655 - ctxt->count += pages; 454 + ctxt->sc_page_count += pages; 656 455 for (i = 0; i < pages; i++) { 657 - ctxt->pages[i + 1] = rqstp->rq_respages[i]; 456 + ctxt->sc_pages[i] = rqstp->rq_respages[i]; 658 457 rqstp->rq_respages[i] = NULL; 659 458 } 660 459 rqstp->rq_next_page = rqstp->rq_respages + 1; 661 460 } 662 461 663 - /** 664 - * svc_rdma_post_send_wr - Set up and post one Send Work Request 665 - * @rdma: controlling transport 666 - * @ctxt: op_ctxt for transmitting the Send WR 667 - * @num_sge: number of SGEs to send 668 - * @inv_rkey: R_key argument to Send With Invalidate, or zero 669 - * 670 - * Returns: 671 - * %0 if the Send* was posted successfully, 672 - * %-ENOTCONN if the connection was lost or dropped, 673 - * %-EINVAL if there was a problem with the Send we built, 674 - * %-ENOMEM if ib_post_send failed. 675 - */ 676 - int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma, 677 - struct svc_rdma_op_ctxt *ctxt, int num_sge, 678 - u32 inv_rkey) 679 - { 680 - struct ib_send_wr *send_wr = &ctxt->send_wr; 681 - 682 - dprintk("svcrdma: posting Send WR with %u sge(s)\n", num_sge); 683 - 684 - send_wr->next = NULL; 685 - ctxt->cqe.done = svc_rdma_wc_send; 686 - send_wr->wr_cqe = &ctxt->cqe; 687 - send_wr->sg_list = ctxt->sge; 688 - send_wr->num_sge = num_sge; 689 - send_wr->send_flags = IB_SEND_SIGNALED; 690 - if (inv_rkey) { 691 - send_wr->opcode = IB_WR_SEND_WITH_INV; 692 - send_wr->ex.invalidate_rkey = inv_rkey; 693 - } else { 694 - send_wr->opcode = IB_WR_SEND; 695 - } 696 - 697 - return svc_rdma_send(rdma, send_wr); 698 - } 699 - 700 462 /* Prepare the portion of the RPC Reply that will be transmitted 701 463 * via RDMA Send. The RPC-over-RDMA transport header is prepared 702 - * in sge[0], and the RPC xdr_buf is prepared in following sges. 464 + * in sc_sges[0], and the RPC xdr_buf is prepared in following sges. 703 465 * 704 466 * Depending on whether a Write list or Reply chunk is present, 705 467 * the server may send all, a portion of, or none of the xdr_buf. 706 - * In the latter case, only the transport header (sge[0]) is 468 + * In the latter case, only the transport header (sc_sges[0]) is 707 469 * transmitted. 708 470 * 709 471 * RDMA Send is the last step of transmitting an RPC reply. Pages ··· 679 515 * - The Reply's transport header will never be larger than a page. 680 516 */ 681 517 static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, 682 - __be32 *rdma_argp, __be32 *rdma_resp, 518 + struct svc_rdma_send_ctxt *ctxt, 519 + __be32 *rdma_argp, 683 520 struct svc_rqst *rqstp, 684 521 __be32 *wr_lst, __be32 *rp_ch) 685 522 { 686 - struct svc_rdma_op_ctxt *ctxt; 687 - u32 inv_rkey; 688 523 int ret; 689 - 690 - dprintk("svcrdma: sending %s reply: head=%zu, pagelen=%u, tail=%zu\n", 691 - (rp_ch ? "RDMA_NOMSG" : "RDMA_MSG"), 692 - rqstp->rq_res.head[0].iov_len, 693 - rqstp->rq_res.page_len, 694 - rqstp->rq_res.tail[0].iov_len); 695 - 696 - ctxt = svc_rdma_get_context(rdma); 697 - 698 - ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 699 - svc_rdma_reply_hdr_len(rdma_resp)); 700 - if (ret < 0) 701 - goto err; 702 524 703 525 if (!rp_ch) { 704 526 ret = svc_rdma_map_reply_msg(rdma, ctxt, 705 527 &rqstp->rq_res, wr_lst); 706 528 if (ret < 0) 707 - goto err; 529 + return ret; 708 530 } 709 531 710 532 svc_rdma_save_io_pages(rqstp, ctxt); 711 533 712 - inv_rkey = 0; 713 - if (rdma->sc_snd_w_inv) 714 - inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch); 715 - ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, inv_rkey); 716 - if (ret) 717 - goto err; 718 - 719 - return 0; 720 - 721 - err: 722 - svc_rdma_unmap_dma(ctxt); 723 - svc_rdma_put_context(ctxt, 1); 724 - return ret; 534 + ctxt->sc_send_wr.opcode = IB_WR_SEND; 535 + if (rdma->sc_snd_w_inv) { 536 + ctxt->sc_send_wr.ex.invalidate_rkey = 537 + svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch); 538 + if (ctxt->sc_send_wr.ex.invalidate_rkey) 539 + ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; 540 + } 541 + dprintk("svcrdma: posting Send WR with %u sge(s)\n", 542 + ctxt->sc_send_wr.num_sge); 543 + return svc_rdma_send(rdma, &ctxt->sc_send_wr); 725 544 } 726 545 727 546 /* Given the client-provided Write and Reply chunks, the server was not ··· 715 568 * Remote Invalidation is skipped for simplicity. 716 569 */ 717 570 static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, 718 - __be32 *rdma_resp, struct svc_rqst *rqstp) 571 + struct svc_rdma_send_ctxt *ctxt, 572 + struct svc_rqst *rqstp) 719 573 { 720 - struct svc_rdma_op_ctxt *ctxt; 721 574 __be32 *p; 722 575 int ret; 723 576 724 - ctxt = svc_rdma_get_context(rdma); 725 - 726 - /* Replace the original transport header with an 727 - * RDMA_ERROR response. XID etc are preserved. 728 - */ 729 - p = rdma_resp + 3; 577 + p = ctxt->sc_xprt_buf; 578 + trace_svcrdma_err_chunk(*p); 579 + p += 3; 730 580 *p++ = rdma_error; 731 581 *p = err_chunk; 732 - 733 - ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 20); 734 - if (ret < 0) 735 - goto err; 582 + svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_ERR); 736 583 737 584 svc_rdma_save_io_pages(rqstp, ctxt); 738 585 739 - ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, 0); 740 - if (ret) 741 - goto err; 586 + ctxt->sc_send_wr.opcode = IB_WR_SEND; 587 + ret = svc_rdma_send(rdma, &ctxt->sc_send_wr); 588 + if (ret) { 589 + svc_rdma_send_ctxt_put(rdma, ctxt); 590 + return ret; 591 + } 742 592 743 593 return 0; 744 - 745 - err: 746 - pr_err("svcrdma: failed to post Send WR (%d)\n", ret); 747 - svc_rdma_unmap_dma(ctxt); 748 - svc_rdma_put_context(ctxt, 1); 749 - return ret; 750 594 } 751 595 752 596 void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) ··· 761 623 struct svc_xprt *xprt = rqstp->rq_xprt; 762 624 struct svcxprt_rdma *rdma = 763 625 container_of(xprt, struct svcxprt_rdma, sc_xprt); 626 + struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; 764 627 __be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch; 765 628 struct xdr_buf *xdr = &rqstp->rq_res; 766 - struct page *res_page; 629 + struct svc_rdma_send_ctxt *sctxt; 767 630 int ret; 768 631 769 - /* Find the call's chunk lists to decide how to send the reply. 770 - * Receive places the Call's xprt header at the start of page 0. 771 - */ 772 - rdma_argp = page_address(rqstp->rq_pages[0]); 632 + rdma_argp = rctxt->rc_recv_buf; 773 633 svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch); 774 - 775 - dprintk("svcrdma: preparing response for XID 0x%08x\n", 776 - be32_to_cpup(rdma_argp)); 777 634 778 635 /* Create the RDMA response header. xprt->xpt_mutex, 779 636 * acquired in svc_send(), serializes RPC replies. The ··· 777 644 * critical section. 778 645 */ 779 646 ret = -ENOMEM; 780 - res_page = alloc_page(GFP_KERNEL); 781 - if (!res_page) 647 + sctxt = svc_rdma_send_ctxt_get(rdma); 648 + if (!sctxt) 782 649 goto err0; 783 - rdma_resp = page_address(res_page); 650 + rdma_resp = sctxt->sc_xprt_buf; 784 651 785 652 p = rdma_resp; 786 653 *p++ = *rdma_argp; ··· 807 674 svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); 808 675 } 809 676 810 - ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp, 677 + svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp)); 678 + ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp, 811 679 wr_lst, rp_ch); 812 680 if (ret < 0) 813 - goto err0; 814 - return 0; 681 + goto err1; 682 + ret = 0; 683 + 684 + out: 685 + rqstp->rq_xprt_ctxt = NULL; 686 + svc_rdma_recv_ctxt_put(rdma, rctxt); 687 + return ret; 815 688 816 689 err2: 817 690 if (ret != -E2BIG && ret != -EINVAL) 818 691 goto err1; 819 692 820 - ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp); 693 + ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp); 821 694 if (ret < 0) 822 - goto err0; 823 - return 0; 695 + goto err1; 696 + ret = 0; 697 + goto out; 824 698 825 699 err1: 826 - put_page(res_page); 700 + svc_rdma_send_ctxt_put(rdma, sctxt); 827 701 err0: 828 - pr_err("svcrdma: Could not send reply, err=%d. Closing transport.\n", 829 - ret); 702 + trace_svcrdma_send_failed(rqstp, ret); 830 703 set_bit(XPT_CLOSE, &xprt->xpt_flags); 831 - return -ENOTCONN; 704 + ret = -ENOTCONN; 705 + goto out; 832 706 }
+76 -405
net/sunrpc/xprtrdma/svc_rdma_transport.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 1 2 /* 3 + * Copyright (c) 2015-2018 Oracle. All rights reserved. 2 4 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 3 5 * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved. 4 6 * ··· 42 40 * Author: Tom Tucker <tom@opengridcomputing.com> 43 41 */ 44 42 45 - #include <linux/sunrpc/svc_xprt.h> 46 - #include <linux/sunrpc/addr.h> 47 - #include <linux/sunrpc/debug.h> 48 - #include <linux/sunrpc/rpc_rdma.h> 49 43 #include <linux/interrupt.h> 50 44 #include <linux/sched.h> 51 45 #include <linux/slab.h> 52 46 #include <linux/spinlock.h> 53 47 #include <linux/workqueue.h> 48 + #include <linux/export.h> 49 + 54 50 #include <rdma/ib_verbs.h> 55 51 #include <rdma/rdma_cm.h> 56 52 #include <rdma/rw.h> 53 + 54 + #include <linux/sunrpc/addr.h> 55 + #include <linux/sunrpc/debug.h> 56 + #include <linux/sunrpc/rpc_rdma.h> 57 + #include <linux/sunrpc/svc_xprt.h> 57 58 #include <linux/sunrpc/svc_rdma.h> 58 - #include <linux/export.h> 59 + 59 60 #include "xprt_rdma.h" 61 + #include <trace/events/rpcrdma.h> 60 62 61 63 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 62 64 63 - static int svc_rdma_post_recv(struct svcxprt_rdma *xprt); 64 - static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int); 65 + static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, 66 + struct net *net); 65 67 static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 66 68 struct net *net, 67 69 struct sockaddr *sa, int salen, ··· 129 123 struct svcxprt_rdma *cma_xprt; 130 124 struct svc_xprt *xprt; 131 125 132 - cma_xprt = rdma_create_xprt(serv, 0); 126 + cma_xprt = svc_rdma_create_xprt(serv, net); 133 127 if (!cma_xprt) 134 128 return ERR_PTR(-ENOMEM); 135 129 xprt = &cma_xprt->sc_xprt; ··· 158 152 } 159 153 #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 160 154 161 - static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt, 162 - gfp_t flags) 163 - { 164 - struct svc_rdma_op_ctxt *ctxt; 165 - 166 - ctxt = kmalloc(sizeof(*ctxt), flags); 167 - if (ctxt) { 168 - ctxt->xprt = xprt; 169 - INIT_LIST_HEAD(&ctxt->list); 170 - } 171 - return ctxt; 172 - } 173 - 174 - static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) 175 - { 176 - unsigned int i; 177 - 178 - /* Each RPC/RDMA credit can consume one Receive and 179 - * one Send WQE at the same time. 180 - */ 181 - i = xprt->sc_sq_depth + xprt->sc_rq_depth; 182 - 183 - while (i--) { 184 - struct svc_rdma_op_ctxt *ctxt; 185 - 186 - ctxt = alloc_ctxt(xprt, GFP_KERNEL); 187 - if (!ctxt) { 188 - dprintk("svcrdma: No memory for RDMA ctxt\n"); 189 - return false; 190 - } 191 - list_add(&ctxt->list, &xprt->sc_ctxts); 192 - } 193 - return true; 194 - } 195 - 196 - struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 197 - { 198 - struct svc_rdma_op_ctxt *ctxt = NULL; 199 - 200 - spin_lock(&xprt->sc_ctxt_lock); 201 - xprt->sc_ctxt_used++; 202 - if (list_empty(&xprt->sc_ctxts)) 203 - goto out_empty; 204 - 205 - ctxt = list_first_entry(&xprt->sc_ctxts, 206 - struct svc_rdma_op_ctxt, list); 207 - list_del(&ctxt->list); 208 - spin_unlock(&xprt->sc_ctxt_lock); 209 - 210 - out: 211 - ctxt->count = 0; 212 - ctxt->mapped_sges = 0; 213 - return ctxt; 214 - 215 - out_empty: 216 - /* Either pre-allocation missed the mark, or send 217 - * queue accounting is broken. 218 - */ 219 - spin_unlock(&xprt->sc_ctxt_lock); 220 - 221 - ctxt = alloc_ctxt(xprt, GFP_NOIO); 222 - if (ctxt) 223 - goto out; 224 - 225 - spin_lock(&xprt->sc_ctxt_lock); 226 - xprt->sc_ctxt_used--; 227 - spin_unlock(&xprt->sc_ctxt_lock); 228 - WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n"); 229 - return NULL; 230 - } 231 - 232 - void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) 233 - { 234 - struct svcxprt_rdma *xprt = ctxt->xprt; 235 - struct ib_device *device = xprt->sc_cm_id->device; 236 - unsigned int i; 237 - 238 - for (i = 0; i < ctxt->mapped_sges; i++) 239 - ib_dma_unmap_page(device, 240 - ctxt->sge[i].addr, 241 - ctxt->sge[i].length, 242 - ctxt->direction); 243 - ctxt->mapped_sges = 0; 244 - } 245 - 246 - void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) 247 - { 248 - struct svcxprt_rdma *xprt = ctxt->xprt; 249 - int i; 250 - 251 - if (free_pages) 252 - for (i = 0; i < ctxt->count; i++) 253 - put_page(ctxt->pages[i]); 254 - 255 - spin_lock(&xprt->sc_ctxt_lock); 256 - xprt->sc_ctxt_used--; 257 - list_add(&ctxt->list, &xprt->sc_ctxts); 258 - spin_unlock(&xprt->sc_ctxt_lock); 259 - } 260 - 261 - static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) 262 - { 263 - while (!list_empty(&xprt->sc_ctxts)) { 264 - struct svc_rdma_op_ctxt *ctxt; 265 - 266 - ctxt = list_first_entry(&xprt->sc_ctxts, 267 - struct svc_rdma_op_ctxt, list); 268 - list_del(&ctxt->list); 269 - kfree(ctxt); 270 - } 271 - } 272 - 273 155 /* QP event handler */ 274 156 static void qp_event_handler(struct ib_event *event, void *context) 275 157 { 276 158 struct svc_xprt *xprt = context; 277 159 160 + trace_svcrdma_qp_error(event, (struct sockaddr *)&xprt->xpt_remote); 278 161 switch (event->event) { 279 162 /* These are considered benign events */ 280 163 case IB_EVENT_PATH_MIG: 281 164 case IB_EVENT_COMM_EST: 282 165 case IB_EVENT_SQ_DRAINED: 283 166 case IB_EVENT_QP_LAST_WQE_REACHED: 284 - dprintk("svcrdma: QP event %s (%d) received for QP=%p\n", 285 - ib_event_msg(event->event), event->event, 286 - event->element.qp); 287 167 break; 168 + 288 169 /* These are considered fatal events */ 289 170 case IB_EVENT_PATH_MIG_ERR: 290 171 case IB_EVENT_QP_FATAL: ··· 179 286 case IB_EVENT_QP_ACCESS_ERR: 180 287 case IB_EVENT_DEVICE_FATAL: 181 288 default: 182 - dprintk("svcrdma: QP ERROR event %s (%d) received for QP=%p, " 183 - "closing transport\n", 184 - ib_event_msg(event->event), event->event, 185 - event->element.qp); 186 289 set_bit(XPT_CLOSE, &xprt->xpt_flags); 187 290 svc_xprt_enqueue(xprt); 188 291 break; 189 292 } 190 293 } 191 294 192 - /** 193 - * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC 194 - * @cq: completion queue 195 - * @wc: completed WR 196 - * 197 - */ 198 - static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) 199 - { 200 - struct svcxprt_rdma *xprt = cq->cq_context; 201 - struct ib_cqe *cqe = wc->wr_cqe; 202 - struct svc_rdma_op_ctxt *ctxt; 203 - 204 - /* WARNING: Only wc->wr_cqe and wc->status are reliable */ 205 - ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe); 206 - svc_rdma_unmap_dma(ctxt); 207 - 208 - if (wc->status != IB_WC_SUCCESS) 209 - goto flushed; 210 - 211 - /* All wc fields are now known to be valid */ 212 - ctxt->byte_len = wc->byte_len; 213 - spin_lock(&xprt->sc_rq_dto_lock); 214 - list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q); 215 - spin_unlock(&xprt->sc_rq_dto_lock); 216 - 217 - svc_rdma_post_recv(xprt); 218 - 219 - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 220 - if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) 221 - goto out; 222 - goto out_enqueue; 223 - 224 - flushed: 225 - if (wc->status != IB_WC_WR_FLUSH_ERR) 226 - pr_err("svcrdma: Recv: %s (%u/0x%x)\n", 227 - ib_wc_status_msg(wc->status), 228 - wc->status, wc->vendor_err); 229 - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 230 - svc_rdma_put_context(ctxt, 1); 231 - 232 - out_enqueue: 233 - svc_xprt_enqueue(&xprt->sc_xprt); 234 - out: 235 - svc_xprt_put(&xprt->sc_xprt); 236 - } 237 - 238 - /** 239 - * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC 240 - * @cq: completion queue 241 - * @wc: completed WR 242 - * 243 - */ 244 - void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) 245 - { 246 - struct svcxprt_rdma *xprt = cq->cq_context; 247 - struct ib_cqe *cqe = wc->wr_cqe; 248 - struct svc_rdma_op_ctxt *ctxt; 249 - 250 - atomic_inc(&xprt->sc_sq_avail); 251 - wake_up(&xprt->sc_send_wait); 252 - 253 - ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe); 254 - svc_rdma_unmap_dma(ctxt); 255 - svc_rdma_put_context(ctxt, 1); 256 - 257 - if (unlikely(wc->status != IB_WC_SUCCESS)) { 258 - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 259 - svc_xprt_enqueue(&xprt->sc_xprt); 260 - if (wc->status != IB_WC_WR_FLUSH_ERR) 261 - pr_err("svcrdma: Send: %s (%u/0x%x)\n", 262 - ib_wc_status_msg(wc->status), 263 - wc->status, wc->vendor_err); 264 - } 265 - 266 - svc_xprt_put(&xprt->sc_xprt); 267 - } 268 - 269 - static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, 270 - int listener) 295 + static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, 296 + struct net *net) 271 297 { 272 298 struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL); 273 299 274 - if (!cma_xprt) 300 + if (!cma_xprt) { 301 + dprintk("svcrdma: failed to create new transport\n"); 275 302 return NULL; 276 - svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); 303 + } 304 + svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); 277 305 INIT_LIST_HEAD(&cma_xprt->sc_accept_q); 278 306 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); 279 307 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); 280 - INIT_LIST_HEAD(&cma_xprt->sc_ctxts); 308 + INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts); 309 + INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts); 281 310 INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts); 282 311 init_waitqueue_head(&cma_xprt->sc_send_wait); 283 312 284 313 spin_lock_init(&cma_xprt->sc_lock); 285 314 spin_lock_init(&cma_xprt->sc_rq_dto_lock); 286 - spin_lock_init(&cma_xprt->sc_ctxt_lock); 315 + spin_lock_init(&cma_xprt->sc_send_lock); 316 + spin_lock_init(&cma_xprt->sc_recv_lock); 287 317 spin_lock_init(&cma_xprt->sc_rw_ctxt_lock); 288 318 289 319 /* ··· 217 401 */ 218 402 set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags); 219 403 220 - if (listener) { 221 - strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener"); 222 - set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); 223 - } 224 - 225 404 return cma_xprt; 226 - } 227 - 228 - static int 229 - svc_rdma_post_recv(struct svcxprt_rdma *xprt) 230 - { 231 - struct ib_recv_wr recv_wr, *bad_recv_wr; 232 - struct svc_rdma_op_ctxt *ctxt; 233 - struct page *page; 234 - dma_addr_t pa; 235 - int sge_no; 236 - int buflen; 237 - int ret; 238 - 239 - ctxt = svc_rdma_get_context(xprt); 240 - buflen = 0; 241 - ctxt->direction = DMA_FROM_DEVICE; 242 - ctxt->cqe.done = svc_rdma_wc_receive; 243 - for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { 244 - if (sge_no >= xprt->sc_max_sge) { 245 - pr_err("svcrdma: Too many sges (%d)\n", sge_no); 246 - goto err_put_ctxt; 247 - } 248 - page = alloc_page(GFP_KERNEL); 249 - if (!page) 250 - goto err_put_ctxt; 251 - ctxt->pages[sge_no] = page; 252 - pa = ib_dma_map_page(xprt->sc_cm_id->device, 253 - page, 0, PAGE_SIZE, 254 - DMA_FROM_DEVICE); 255 - if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) 256 - goto err_put_ctxt; 257 - svc_rdma_count_mappings(xprt, ctxt); 258 - ctxt->sge[sge_no].addr = pa; 259 - ctxt->sge[sge_no].length = PAGE_SIZE; 260 - ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; 261 - ctxt->count = sge_no + 1; 262 - buflen += PAGE_SIZE; 263 - } 264 - recv_wr.next = NULL; 265 - recv_wr.sg_list = &ctxt->sge[0]; 266 - recv_wr.num_sge = ctxt->count; 267 - recv_wr.wr_cqe = &ctxt->cqe; 268 - 269 - svc_xprt_get(&xprt->sc_xprt); 270 - ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); 271 - if (ret) { 272 - svc_rdma_unmap_dma(ctxt); 273 - svc_rdma_put_context(ctxt, 1); 274 - svc_xprt_put(&xprt->sc_xprt); 275 - } 276 - return ret; 277 - 278 - err_put_ctxt: 279 - svc_rdma_unmap_dma(ctxt); 280 - svc_rdma_put_context(ctxt, 1); 281 - return -ENOMEM; 282 405 } 283 406 284 407 static void ··· 259 504 struct sockaddr *sa; 260 505 261 506 /* Create a new transport */ 262 - newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); 263 - if (!newxprt) { 264 - dprintk("svcrdma: failed to create new transport\n"); 507 + newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 508 + listen_xprt->sc_xprt.xpt_net); 509 + if (!newxprt) 265 510 return; 266 - } 267 511 newxprt->sc_cm_id = new_cma_id; 268 512 new_cma_id->context = newxprt; 269 - dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", 270 - newxprt, newxprt->sc_cm_id, listen_xprt); 271 513 svc_rdma_parse_connect_private(newxprt, param); 272 514 273 515 /* Save client advertised inbound read limit for use later in accept. */ ··· 295 543 static int rdma_listen_handler(struct rdma_cm_id *cma_id, 296 544 struct rdma_cm_event *event) 297 545 { 298 - struct svcxprt_rdma *xprt = cma_id->context; 546 + struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr; 299 547 int ret = 0; 548 + 549 + trace_svcrdma_cm_event(event, sap); 300 550 301 551 switch (event->event) { 302 552 case RDMA_CM_EVENT_CONNECT_REQUEST: ··· 307 553 rdma_event_msg(event->event), event->event); 308 554 handle_connect_req(cma_id, &event->param.conn); 309 555 break; 310 - 311 - case RDMA_CM_EVENT_ESTABLISHED: 312 - /* Accept complete */ 313 - dprintk("svcrdma: Connection completed on LISTEN xprt=%p, " 314 - "cm_id=%p\n", xprt, cma_id); 315 - break; 316 - 317 - case RDMA_CM_EVENT_DEVICE_REMOVAL: 318 - dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", 319 - xprt, cma_id); 320 - if (xprt) { 321 - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 322 - svc_xprt_enqueue(&xprt->sc_xprt); 323 - } 324 - break; 325 - 326 556 default: 557 + /* NB: No device removal upcall for INADDR_ANY listeners */ 327 558 dprintk("svcrdma: Unexpected event on listening endpoint %p, " 328 559 "event = %s (%d)\n", cma_id, 329 560 rdma_event_msg(event->event), event->event); ··· 321 582 static int rdma_cma_handler(struct rdma_cm_id *cma_id, 322 583 struct rdma_cm_event *event) 323 584 { 324 - struct svc_xprt *xprt = cma_id->context; 325 - struct svcxprt_rdma *rdma = 326 - container_of(xprt, struct svcxprt_rdma, sc_xprt); 585 + struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.dst_addr; 586 + struct svcxprt_rdma *rdma = cma_id->context; 587 + struct svc_xprt *xprt = &rdma->sc_xprt; 588 + 589 + trace_svcrdma_cm_event(event, sap); 590 + 327 591 switch (event->event) { 328 592 case RDMA_CM_EVENT_ESTABLISHED: 329 593 /* Accept complete */ ··· 339 597 case RDMA_CM_EVENT_DISCONNECTED: 340 598 dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n", 341 599 xprt, cma_id); 342 - if (xprt) { 343 - set_bit(XPT_CLOSE, &xprt->xpt_flags); 344 - svc_xprt_enqueue(xprt); 345 - svc_xprt_put(xprt); 346 - } 600 + set_bit(XPT_CLOSE, &xprt->xpt_flags); 601 + svc_xprt_enqueue(xprt); 602 + svc_xprt_put(xprt); 347 603 break; 348 604 case RDMA_CM_EVENT_DEVICE_REMOVAL: 349 605 dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, " 350 606 "event = %s (%d)\n", cma_id, xprt, 351 607 rdma_event_msg(event->event), event->event); 352 - if (xprt) { 353 - set_bit(XPT_CLOSE, &xprt->xpt_flags); 354 - svc_xprt_enqueue(xprt); 355 - svc_xprt_put(xprt); 356 - } 608 + set_bit(XPT_CLOSE, &xprt->xpt_flags); 609 + svc_xprt_enqueue(xprt); 610 + svc_xprt_put(xprt); 357 611 break; 358 612 default: 359 613 dprintk("svcrdma: Unexpected event on DTO endpoint %p, " ··· 372 634 struct svcxprt_rdma *cma_xprt; 373 635 int ret; 374 636 375 - dprintk("svcrdma: Creating RDMA socket\n"); 637 + dprintk("svcrdma: Creating RDMA listener\n"); 376 638 if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) { 377 639 dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family); 378 640 return ERR_PTR(-EAFNOSUPPORT); 379 641 } 380 - cma_xprt = rdma_create_xprt(serv, 1); 642 + cma_xprt = svc_rdma_create_xprt(serv, net); 381 643 if (!cma_xprt) 382 644 return ERR_PTR(-ENOMEM); 645 + set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); 646 + strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener"); 383 647 384 - listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt, 648 + listen_id = rdma_create_id(net, rdma_listen_handler, cma_xprt, 385 649 RDMA_PS_TCP, IB_QPT_RC); 386 650 if (IS_ERR(listen_id)) { 387 651 ret = PTR_ERR(listen_id); ··· 448 708 struct rdma_conn_param conn_param; 449 709 struct rpcrdma_connect_private pmsg; 450 710 struct ib_qp_init_attr qp_attr; 711 + unsigned int ctxts, rq_depth; 451 712 struct ib_device *dev; 452 713 struct sockaddr *sap; 453 - unsigned int i, ctxts; 454 714 int ret = 0; 455 715 456 716 listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); ··· 476 736 477 737 /* Qualify the transport resource defaults with the 478 738 * capabilities of this particular device */ 479 - newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge, 480 - (size_t)RPCSVC_MAXPAGES); 739 + newxprt->sc_max_send_sges = dev->attrs.max_sge; 740 + /* transport hdr, head iovec, one page list entry, tail iovec */ 741 + if (newxprt->sc_max_send_sges < 4) { 742 + pr_err("svcrdma: too few Send SGEs available (%d)\n", 743 + newxprt->sc_max_send_sges); 744 + goto errout; 745 + } 481 746 newxprt->sc_max_req_size = svcrdma_max_req_size; 482 747 newxprt->sc_max_requests = svcrdma_max_requests; 483 748 newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; 484 - newxprt->sc_rq_depth = newxprt->sc_max_requests + 485 - newxprt->sc_max_bc_requests; 486 - if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) { 749 + rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests; 750 + if (rq_depth > dev->attrs.max_qp_wr) { 487 751 pr_warn("svcrdma: reducing receive depth to %d\n", 488 752 dev->attrs.max_qp_wr); 489 - newxprt->sc_rq_depth = dev->attrs.max_qp_wr; 490 - newxprt->sc_max_requests = newxprt->sc_rq_depth - 2; 753 + rq_depth = dev->attrs.max_qp_wr; 754 + newxprt->sc_max_requests = rq_depth - 2; 491 755 newxprt->sc_max_bc_requests = 2; 492 756 } 493 757 newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); 494 758 ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES); 495 759 ctxts *= newxprt->sc_max_requests; 496 - newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts; 760 + newxprt->sc_sq_depth = rq_depth + ctxts; 497 761 if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) { 498 762 pr_warn("svcrdma: reducing send depth to %d\n", 499 763 dev->attrs.max_qp_wr); 500 764 newxprt->sc_sq_depth = dev->attrs.max_qp_wr; 501 765 } 502 766 atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); 503 - 504 - if (!svc_rdma_prealloc_ctxts(newxprt)) 505 - goto errout; 506 767 507 768 newxprt->sc_pd = ib_alloc_pd(dev, 0); 508 769 if (IS_ERR(newxprt->sc_pd)) { ··· 516 775 dprintk("svcrdma: error creating SQ CQ for connect request\n"); 517 776 goto errout; 518 777 } 519 - newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth, 778 + newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth, 520 779 0, IB_POLL_WORKQUEUE); 521 780 if (IS_ERR(newxprt->sc_rq_cq)) { 522 781 dprintk("svcrdma: error creating RQ CQ for connect request\n"); ··· 529 788 qp_attr.port_num = newxprt->sc_port_num; 530 789 qp_attr.cap.max_rdma_ctxs = ctxts; 531 790 qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts; 532 - qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth; 533 - qp_attr.cap.max_send_sge = newxprt->sc_max_sge; 534 - qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; 791 + qp_attr.cap.max_recv_wr = rq_depth; 792 + qp_attr.cap.max_send_sge = newxprt->sc_max_send_sges; 793 + qp_attr.cap.max_recv_sge = 1; 535 794 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 536 795 qp_attr.qp_type = IB_QPT_RC; 537 796 qp_attr.send_cq = newxprt->sc_sq_cq; ··· 556 815 !rdma_ib_or_roce(dev, newxprt->sc_port_num)) 557 816 goto errout; 558 817 559 - /* Post receive buffers */ 560 - for (i = 0; i < newxprt->sc_max_requests; i++) { 561 - ret = svc_rdma_post_recv(newxprt); 562 - if (ret) { 563 - dprintk("svcrdma: failure posting receive buffers\n"); 564 - goto errout; 565 - } 566 - } 818 + if (!svc_rdma_post_recvs(newxprt)) 819 + goto errout; 567 820 568 821 /* Swap out the handler */ 569 822 newxprt->sc_cm_id->event_handler = rdma_cma_handler; ··· 591 856 dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); 592 857 sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; 593 858 dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); 594 - dprintk(" max_sge : %d\n", newxprt->sc_max_sge); 859 + dprintk(" max_sge : %d\n", newxprt->sc_max_send_sges); 595 860 dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); 596 861 dprintk(" rdma_rw_ctxs : %d\n", ctxts); 597 862 dprintk(" max_requests : %d\n", newxprt->sc_max_requests); 598 863 dprintk(" ord : %d\n", conn_param.initiator_depth); 599 864 865 + trace_svcrdma_xprt_accept(&newxprt->sc_xprt); 600 866 return &newxprt->sc_xprt; 601 867 602 868 errout: 603 869 dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); 870 + trace_svcrdma_xprt_fail(&newxprt->sc_xprt); 604 871 /* Take a reference in case the DTO handler runs */ 605 872 svc_xprt_get(&newxprt->sc_xprt); 606 873 if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) ··· 633 896 { 634 897 struct svcxprt_rdma *rdma = 635 898 container_of(xprt, struct svcxprt_rdma, sc_xprt); 636 - dprintk("svc: svc_rdma_detach(%p)\n", xprt); 637 899 638 900 /* Disconnect and flush posted WQE */ 639 901 rdma_disconnect(rdma->sc_cm_id); ··· 644 908 container_of(work, struct svcxprt_rdma, sc_work); 645 909 struct svc_xprt *xprt = &rdma->sc_xprt; 646 910 647 - dprintk("svcrdma: %s(%p)\n", __func__, rdma); 911 + trace_svcrdma_xprt_free(xprt); 648 912 649 913 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 650 914 ib_drain_qp(rdma->sc_qp); ··· 654 918 pr_err("svcrdma: sc_xprt still in use? (%d)\n", 655 919 kref_read(&xprt->xpt_ref)); 656 920 657 - while (!list_empty(&rdma->sc_read_complete_q)) { 658 - struct svc_rdma_op_ctxt *ctxt; 659 - ctxt = list_first_entry(&rdma->sc_read_complete_q, 660 - struct svc_rdma_op_ctxt, list); 661 - list_del(&ctxt->list); 662 - svc_rdma_put_context(ctxt, 1); 663 - } 664 - while (!list_empty(&rdma->sc_rq_dto_q)) { 665 - struct svc_rdma_op_ctxt *ctxt; 666 - ctxt = list_first_entry(&rdma->sc_rq_dto_q, 667 - struct svc_rdma_op_ctxt, list); 668 - list_del(&ctxt->list); 669 - svc_rdma_put_context(ctxt, 1); 670 - } 671 - 672 - /* Warn if we leaked a resource or under-referenced */ 673 - if (rdma->sc_ctxt_used != 0) 674 - pr_err("svcrdma: ctxt still in use? (%d)\n", 675 - rdma->sc_ctxt_used); 921 + svc_rdma_flush_recv_queues(rdma); 676 922 677 923 /* Final put of backchannel client transport */ 678 924 if (xprt->xpt_bc_xprt) { ··· 663 945 } 664 946 665 947 svc_rdma_destroy_rw_ctxts(rdma); 666 - svc_rdma_destroy_ctxts(rdma); 948 + svc_rdma_send_ctxts_destroy(rdma); 949 + svc_rdma_recv_ctxts_destroy(rdma); 667 950 668 951 /* Destroy the QP if present (not a listener) */ 669 952 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ··· 716 997 717 998 static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) 718 999 { 719 - } 720 - 721 - int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) 722 - { 723 - struct ib_send_wr *bad_wr, *n_wr; 724 - int wr_count; 725 - int i; 726 - int ret; 727 - 728 - if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 729 - return -ENOTCONN; 730 - 731 - wr_count = 1; 732 - for (n_wr = wr->next; n_wr; n_wr = n_wr->next) 733 - wr_count++; 734 - 735 - /* If the SQ is full, wait until an SQ entry is available */ 736 - while (1) { 737 - if ((atomic_sub_return(wr_count, &xprt->sc_sq_avail) < 0)) { 738 - atomic_inc(&rdma_stat_sq_starve); 739 - 740 - /* Wait until SQ WR available if SQ still full */ 741 - atomic_add(wr_count, &xprt->sc_sq_avail); 742 - wait_event(xprt->sc_send_wait, 743 - atomic_read(&xprt->sc_sq_avail) > wr_count); 744 - if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 745 - return -ENOTCONN; 746 - continue; 747 - } 748 - /* Take a transport ref for each WR posted */ 749 - for (i = 0; i < wr_count; i++) 750 - svc_xprt_get(&xprt->sc_xprt); 751 - 752 - /* Bump used SQ WR count and post */ 753 - ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); 754 - if (ret) { 755 - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 756 - for (i = 0; i < wr_count; i ++) 757 - svc_xprt_put(&xprt->sc_xprt); 758 - dprintk("svcrdma: failed to post SQ WR rc=%d\n", ret); 759 - dprintk(" sc_sq_avail=%d, sc_sq_depth=%d\n", 760 - atomic_read(&xprt->sc_sq_avail), 761 - xprt->sc_sq_depth); 762 - wake_up(&xprt->sc_send_wait); 763 - } 764 - break; 765 - } 766 - return ret; 767 1000 }
+4
net/sunrpc/xprtrdma/transport.c
··· 51 51 #include <linux/module.h> 52 52 #include <linux/slab.h> 53 53 #include <linux/seq_file.h> 54 + #include <linux/smp.h> 55 + 54 56 #include <linux/sunrpc/addr.h> 57 + #include <linux/sunrpc/svc_rdma.h> 55 58 56 59 #include "xprt_rdma.h" 60 + #include <trace/events/rpcrdma.h> 57 61 58 62 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 59 63 # define RPCDBG_FACILITY RPCDBG_TRANS
+1
net/sunrpc/xprtrdma/verbs.c
··· 59 59 #include <rdma/ib_cm.h> 60 60 61 61 #include "xprt_rdma.h" 62 + #include <trace/events/rpcrdma.h> 62 63 63 64 /* 64 65 * Globals/Macros
-2
net/sunrpc/xprtrdma/xprt_rdma.h
··· 675 675 extern struct xprt_class xprt_rdma_bc; 676 676 677 677 #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ 678 - 679 - #include <trace/events/rpcrdma.h>