Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

SUNRPC: Move xpt_mutex into socket xpo_sendto methods

It appears that the RPC/RDMA transport does not need serialization
of calls to its xpo_sendto method. Move the mutex into the socket
methods that still need that serialization.

Tail latencies are unambiguously better with this patch applied.
fio randrw 8KB 70/30 on NFSv3, smaller numbers are better:

clat percentiles (usec):

With xpt_mutex:
r | 99.99th=[ 8848]
w | 99.99th=[ 9634]

Without xpt_mutex:
r | 99.99th=[ 8586]
w | 99.99th=[ 8979]

Serializing the construction of RPC/RDMA transport headers is not
really necessary at this point, because the Linux NFS server
implementation never changes its credit grant on a connection. If
that should change, then svc_rdma_sendto will need to serialize
access to the transport's credit grant fields.

Reported-by: kbuild test robot <lkp@intel.com>
[ cel: fix uninitialized variable warning ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>

+64 -36
+6
include/linux/sunrpc/svc_xprt.h
··· 117 117 return 0; 118 118 } 119 119 120 + static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt) 121 + { 122 + return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) || 123 + (test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0); 124 + } 125 + 120 126 int svc_reg_xprt_class(struct svc_xprt_class *); 121 127 void svc_unreg_xprt_class(struct svc_xprt_class *); 122 128 void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
+3 -9
net/sunrpc/svc_xprt.c
··· 914 914 xb->page_len + 915 915 xb->tail[0].iov_len; 916 916 trace_svc_sendto(xb); 917 - 918 - /* Grab mutex to serialize outgoing data. */ 919 - mutex_lock(&xprt->xpt_mutex); 920 917 trace_svc_stats_latency(rqstp); 921 - if (test_bit(XPT_DEAD, &xprt->xpt_flags) 922 - || test_bit(XPT_CLOSE, &xprt->xpt_flags)) 923 - len = -ENOTCONN; 924 - else 925 - len = xprt->xpt_ops->xpo_sendto(rqstp); 926 - mutex_unlock(&xprt->xpt_mutex); 918 + 919 + len = xprt->xpt_ops->xpo_sendto(rqstp); 920 + 927 921 trace_svc_send(rqstp, len); 928 922 svc_xprt_release(rqstp); 929 923
+25
net/sunrpc/svcsock.c
··· 506 506 * svc_udp_sendto - Send out a reply on a UDP socket 507 507 * @rqstp: completed svc_rqst 508 508 * 509 + * xpt_mutex ensures @rqstp's whole message is written to the socket 510 + * without interruption. 511 + * 509 512 * Returns the number of bytes sent, or a negative errno. 510 513 */ 511 514 static int svc_udp_sendto(struct svc_rqst *rqstp) ··· 534 531 535 532 svc_set_cmsg_data(rqstp, cmh); 536 533 534 + mutex_lock(&xprt->xpt_mutex); 535 + 536 + if (svc_xprt_is_dead(xprt)) 537 + goto out_notconn; 538 + 537 539 err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); 538 540 xdr_free_bvec(xdr); 539 541 if (err == -ECONNREFUSED) { ··· 546 538 err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); 547 539 xdr_free_bvec(xdr); 548 540 } 541 + 542 + mutex_unlock(&xprt->xpt_mutex); 549 543 if (err < 0) 550 544 return err; 551 545 return sent; 546 + 547 + out_notconn: 548 + mutex_unlock(&xprt->xpt_mutex); 549 + return -ENOTCONN; 552 550 } 553 551 554 552 static int svc_udp_has_wspace(struct svc_xprt *xprt) ··· 1077 1063 * svc_tcp_sendto - Send out a reply on a TCP socket 1078 1064 * @rqstp: completed svc_rqst 1079 1065 * 1066 + * xpt_mutex ensures @rqstp's whole message is written to the socket 1067 + * without interruption. 1068 + * 1080 1069 * Returns the number of bytes sent, or a negative errno. 1081 1070 */ 1082 1071 static int svc_tcp_sendto(struct svc_rqst *rqstp) ··· 1097 1080 1098 1081 svc_release_skb(rqstp); 1099 1082 1083 + mutex_lock(&xprt->xpt_mutex); 1084 + if (svc_xprt_is_dead(xprt)) 1085 + goto out_notconn; 1100 1086 err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); 1101 1087 xdr_free_bvec(xdr); 1102 1088 if (err < 0 || sent != (xdr->len + sizeof(marker))) 1103 1089 goto out_close; 1090 + mutex_unlock(&xprt->xpt_mutex); 1104 1091 return sent; 1105 1092 1093 + out_notconn: 1094 + mutex_unlock(&xprt->xpt_mutex); 1095 + return -ENOTCONN; 1106 1096 out_close: 1107 1097 pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", 1108 1098 xprt->xpt_server->sv_name, ··· 1117 1093 (err < 0) ? err : sent, xdr->len); 1118 1094 set_bit(XPT_CLOSE, &xprt->xpt_flags); 1119 1095 svc_xprt_enqueue(xprt); 1096 + mutex_unlock(&xprt->xpt_mutex); 1120 1097 return -EAGAIN; 1121 1098 } 1122 1099
+16 -19
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
··· 210 210 return -ENOTCONN; 211 211 } 212 212 213 - /* Send an RPC call on the passive end of a transport 214 - * connection. 213 + /** 214 + * xprt_rdma_bc_send_request - Send a reverse-direction Call 215 + * @rqst: rpc_rqst containing Call message to be sent 216 + * 217 + * Return values: 218 + * %0 if the message was sent successfully 219 + * %ENOTCONN if the message was not sent 215 220 */ 216 - static int 217 - xprt_rdma_bc_send_request(struct rpc_rqst *rqst) 221 + static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) 218 222 { 219 223 struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; 220 - struct svcxprt_rdma *rdma; 224 + struct svcxprt_rdma *rdma = 225 + container_of(sxprt, struct svcxprt_rdma, sc_xprt); 221 226 int ret; 222 227 223 228 dprintk("svcrdma: sending bc call with xid: %08x\n", 224 229 be32_to_cpu(rqst->rq_xid)); 225 230 226 - mutex_lock(&sxprt->xpt_mutex); 231 + if (test_bit(XPT_DEAD, &sxprt->xpt_flags)) 232 + return -ENOTCONN; 227 233 228 - ret = -ENOTCONN; 229 - rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); 230 - if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) { 231 - ret = rpcrdma_bc_send_request(rdma, rqst); 232 - if (ret == -ENOTCONN) 233 - svc_close_xprt(sxprt); 234 - } 235 - 236 - mutex_unlock(&sxprt->xpt_mutex); 237 - 238 - if (ret < 0) 239 - return ret; 240 - return 0; 234 + ret = rpcrdma_bc_send_request(rdma, rqst); 235 + if (ret == -ENOTCONN) 236 + svc_close_xprt(sxprt); 237 + return ret; 241 238 } 242 239 243 240 static void
+4 -6
net/sunrpc/xprtrdma/svc_rdma_sendto.c
··· 868 868 __be32 *p; 869 869 int ret; 870 870 871 - /* Create the RDMA response header. xprt->xpt_mutex, 872 - * acquired in svc_send(), serializes RPC replies. The 873 - * code path below that inserts the credit grant value 874 - * into each transport header runs only inside this 875 - * critical section. 876 - */ 871 + ret = -ENOTCONN; 872 + if (svc_xprt_is_dead(xprt)) 873 + goto err0; 874 + 877 875 ret = -ENOMEM; 878 876 sctxt = svc_rdma_send_ctxt_get(rdma); 879 877 if (!sctxt)
+10 -2
net/sunrpc/xprtsock.c
··· 2548 2548 return sent; 2549 2549 } 2550 2550 2551 - /* 2552 - * The send routine. Borrows from svc_send 2551 + /** 2552 + * bc_send_request - Send a backchannel Call on a TCP socket 2553 + * @req: rpc_rqst containing Call message to be sent 2554 + * 2555 + * xpt_mutex ensures @rqstp's whole message is written to the socket 2556 + * without interruption. 2557 + * 2558 + * Return values: 2559 + * %0 if the message was sent successfully 2560 + * %ENOTCONN if the message was not sent 2553 2561 */ 2554 2562 static int bc_send_request(struct rpc_rqst *req) 2555 2563 {