Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sunrpc: allocate a separate bvec array for socket sends

svc_tcp_sendmsg() calls xdr_buf_to_bvec() with the second slot of
rq_bvec as the start, but doesn't reduce the array length by one, which
could lead to an array overrun. Also, rq_bvec is always rq_maxpages in
length, which can be too short in some cases, since the TCP record
marker consumes a slot.

Fix both problems by adding a separate bvec array to the svc_sock that
is specifically for sending. For TCP, make this array one slot longer
than rq_maxpages, to account for the record marker. For UDP, only
allocate as large an array as we need since it's limited to 64k of
payload.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>

authored by

Jeff Layton and committed by
Chuck Lever
6b3b697d ebd3330d

+51 -7
+3
include/linux/sunrpc/svcsock.h
··· 26 26 void (*sk_odata)(struct sock *); 27 27 void (*sk_owspace)(struct sock *); 28 28 29 + /* For sends (protected by xpt_mutex) */ 30 + struct bio_vec *sk_bvec; 31 + 29 32 /* private TCP part */ 30 33 /* On-the-wire fragment header: */ 31 34 __be32 sk_marker;
+48 -7
net/sunrpc/svcsock.c
··· 68 68 69 69 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 70 70 71 + /* 72 + * For UDP: 73 + * 1 for header page 74 + * enough pages for RPCSVC_MAXPAYLOAD_UDP 75 + * 1 in case payload is not aligned 76 + * 1 for tail page 77 + */ 78 + enum { 79 + SUNRPC_MAX_UDP_SENDPAGES = 1 + RPCSVC_MAXPAYLOAD_UDP / PAGE_SIZE + 1 + 1 80 + }; 81 + 71 82 /* To-do: to avoid tying up an nfsd thread while waiting for a 72 83 * handshake request, the request could instead be deferred. 73 84 */ ··· 751 740 if (svc_xprt_is_dead(xprt)) 752 741 goto out_notconn; 753 742 754 - count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr); 743 + count = xdr_buf_to_bvec(svsk->sk_bvec, SUNRPC_MAX_UDP_SENDPAGES, xdr); 755 744 756 - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, 745 + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec, 757 746 count, rqstp->rq_res.len); 758 747 err = sock_sendmsg(svsk->sk_sock, &msg); 759 748 if (err == -ECONNREFUSED) { 760 749 /* ICMP error on earlier request. */ 761 - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, 750 + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec, 762 751 count, rqstp->rq_res.len); 763 752 err = sock_sendmsg(svsk->sk_sock, &msg); 764 753 } ··· 1247 1236 int ret; 1248 1237 1249 1238 /* The stream record marker is copied into a temporary page 1250 - * fragment buffer so that it can be included in rq_bvec. 1239 + * fragment buffer so that it can be included in sk_bvec. 1251 1240 */ 1252 1241 buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker), 1253 1242 GFP_KERNEL); 1254 1243 if (!buf) 1255 1244 return -ENOMEM; 1256 1245 memcpy(buf, &marker, sizeof(marker)); 1257 - bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker)); 1246 + bvec_set_virt(svsk->sk_bvec, buf, sizeof(marker)); 1258 1247 1259 - count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages, 1248 + count = xdr_buf_to_bvec(svsk->sk_bvec + 1, rqstp->rq_maxpages, 1260 1249 &rqstp->rq_res); 1261 1250 1262 - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, 1251 + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec, 1263 1252 1 + count, sizeof(marker) + rqstp->rq_res.len); 1264 1253 ret = sock_sendmsg(svsk->sk_sock, &msg); 1265 1254 page_frag_free(buf); ··· 1404 1393 spin_unlock_bh(&serv->sv_lock); 1405 1394 } 1406 1395 1396 + static int svc_sock_sendpages(struct svc_serv *serv, struct socket *sock, int flags) 1397 + { 1398 + switch (sock->type) { 1399 + case SOCK_STREAM: 1400 + /* +1 for TCP record marker */ 1401 + if (flags & SVC_SOCK_TEMPORARY) 1402 + return svc_serv_maxpages(serv) + 1; 1403 + return 0; 1404 + case SOCK_DGRAM: 1405 + return SUNRPC_MAX_UDP_SENDPAGES; 1406 + } 1407 + return -EINVAL; 1408 + } 1409 + 1407 1410 /* 1408 1411 * Initialize socket for RPC use and create svc_sock struct 1409 1412 */ ··· 1428 1403 struct svc_sock *svsk; 1429 1404 struct sock *inet; 1430 1405 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1406 + int sendpages; 1431 1407 unsigned long pages; 1408 + 1409 + sendpages = svc_sock_sendpages(serv, sock, flags); 1410 + if (sendpages < 0) 1411 + return ERR_PTR(sendpages); 1432 1412 1433 1413 pages = svc_serv_maxpages(serv); 1434 1414 svsk = kzalloc(struct_size(svsk, sk_pages, pages), GFP_KERNEL); 1435 1415 if (!svsk) 1436 1416 return ERR_PTR(-ENOMEM); 1417 + 1418 + if (sendpages) { 1419 + svsk->sk_bvec = kcalloc(sendpages, sizeof(*svsk->sk_bvec), GFP_KERNEL); 1420 + if (!svsk->sk_bvec) { 1421 + kfree(svsk); 1422 + return ERR_PTR(-ENOMEM); 1423 + } 1424 + } 1425 + 1437 1426 svsk->sk_maxpages = pages; 1438 1427 1439 1428 inet = sock->sk; ··· 1459 1420 inet->sk_protocol, 1460 1421 ntohs(inet_sk(inet)->inet_sport)); 1461 1422 if (err < 0) { 1423 + kfree(svsk->sk_bvec); 1462 1424 kfree(svsk); 1463 1425 return ERR_PTR(err); 1464 1426 } ··· 1677 1637 sock_release(sock); 1678 1638 1679 1639 page_frag_cache_drain(&svsk->sk_frag_cache); 1640 + kfree(svsk->sk_bvec); 1680 1641 kfree(svsk); 1681 1642 }