Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfsd-5.7-rc-1' of git://git.linux-nfs.org/projects/cel/cel-2.6

Pull nfsd fixes from Chuck Lever:
"The first set of 5.7-rc fixes for NFS server issues.

These were all unresolved at the time the 5.7 window opened, and
needed some additional time to ensure they were correctly addressed.
They are ready now.

At the moment I know of one more urgent issue regarding the NFS
server. A fix has been tested and is under review. I expect to send
one more pull request, containing this fix (which now consists of 3
patches).

Fixes:

- Address several use-after-free and memory leak bugs

- Prevent a backchannel livelock"

* tag 'nfsd-5.7-rc-1' of git://git.linux-nfs.org/projects/cel/cel-2.6:
svcrdma: Fix leak of svc_rdma_recv_ctxt objects
svcrdma: Fix trace point use-after-free race
SUNRPC: Fix backchannel RPC soft lockups
SUNRPC/cache: Fix unsafe traverse caused double-free in cache_purge
nfsd: memory corruption in nfsd4_lock()

+89 -44
+3 -1
fs/nfsd/nfs4callback.c
··· 1312 1312 container_of(work, struct nfsd4_callback, cb_work); 1313 1313 struct nfs4_client *clp = cb->cb_clp; 1314 1314 struct rpc_clnt *clnt; 1315 + int flags; 1315 1316 1316 1317 if (cb->cb_need_restart) { 1317 1318 cb->cb_need_restart = false; ··· 1341 1340 } 1342 1341 1343 1342 cb->cb_msg.rpc_cred = clp->cl_cb_cred; 1344 - rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 1343 + flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN; 1344 + rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, 1345 1345 cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); 1346 1346 } 1347 1347
+2
fs/nfsd/nfs4state.c
··· 267 267 if (!nbl) { 268 268 nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); 269 269 if (nbl) { 270 + INIT_LIST_HEAD(&nbl->nbl_list); 271 + INIT_LIST_HEAD(&nbl->nbl_lru); 270 272 fh_copy_shallow(&nbl->nbl_fh, fh); 271 273 locks_init_lock(&nbl->nbl_lock); 272 274 nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
+1
include/linux/sunrpc/svc_rdma.h
··· 170 170 extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, 171 171 struct svc_rdma_recv_ctxt *ctxt); 172 172 extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); 173 + extern void svc_rdma_release_rqst(struct svc_rqst *rqstp); 173 174 extern int svc_rdma_recvfrom(struct svc_rqst *); 174 175 175 176 /* svc_rdma_rw.c */
+36 -14
include/trace/events/rpcrdma.h
··· 1695 1695 1696 1696 TRACE_EVENT(svcrdma_post_send, 1697 1697 TP_PROTO( 1698 - const struct ib_send_wr *wr, 1699 - int status 1698 + const struct ib_send_wr *wr 1700 1699 ), 1701 1700 1702 - TP_ARGS(wr, status), 1701 + TP_ARGS(wr), 1703 1702 1704 1703 TP_STRUCT__entry( 1705 1704 __field(const void *, cqe) 1706 1705 __field(unsigned int, num_sge) 1707 1706 __field(u32, inv_rkey) 1708 - __field(int, status) 1709 1707 ), 1710 1708 1711 1709 TP_fast_assign( ··· 1711 1713 __entry->num_sge = wr->num_sge; 1712 1714 __entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ? 1713 1715 wr->ex.invalidate_rkey : 0; 1714 - __entry->status = status; 1715 1716 ), 1716 1717 1717 - TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x status=%d", 1718 + TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x", 1718 1719 __entry->cqe, __entry->num_sge, 1719 - __entry->inv_rkey, __entry->status 1720 + __entry->inv_rkey 1720 1721 ) 1721 1722 ); 1722 1723 ··· 1780 1783 TRACE_EVENT(svcrdma_post_rw, 1781 1784 TP_PROTO( 1782 1785 const void *cqe, 1783 - int sqecount, 1784 - int status 1786 + int sqecount 1785 1787 ), 1786 1788 1787 - TP_ARGS(cqe, sqecount, status), 1789 + TP_ARGS(cqe, sqecount), 1788 1790 1789 1791 TP_STRUCT__entry( 1790 1792 __field(const void *, cqe) 1791 1793 __field(int, sqecount) 1792 - __field(int, status) 1793 1794 ), 1794 1795 1795 1796 TP_fast_assign( 1796 1797 __entry->cqe = cqe; 1797 1798 __entry->sqecount = sqecount; 1798 - __entry->status = status; 1799 1799 ), 1800 1800 1801 - TP_printk("cqe=%p sqecount=%d status=%d", 1802 - __entry->cqe, __entry->sqecount, __entry->status 1801 + TP_printk("cqe=%p sqecount=%d", 1802 + __entry->cqe, __entry->sqecount 1803 1803 ) 1804 1804 ); 1805 1805 ··· 1863 1869 1864 1870 DEFINE_SQ_EVENT(full); 1865 1871 DEFINE_SQ_EVENT(retry); 1872 + 1873 + TRACE_EVENT(svcrdma_sq_post_err, 1874 + TP_PROTO( 1875 + const struct svcxprt_rdma *rdma, 1876 + int status 1877 + ), 1878 + 1879 + TP_ARGS(rdma, status), 1880 + 1881 + TP_STRUCT__entry( 1882 + __field(int, avail) 1883 + __field(int, depth) 1884 + __field(int, status) 1885 + __string(addr, rdma->sc_xprt.xpt_remotebuf) 1886 + ), 1887 + 1888 + TP_fast_assign( 1889 + __entry->avail = atomic_read(&rdma->sc_sq_avail); 1890 + __entry->depth = rdma->sc_sq_depth; 1891 + __entry->status = status; 1892 + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); 1893 + ), 1894 + 1895 + TP_printk("addr=%s sc_sq_avail=%d/%d status=%d", 1896 + __get_str(addr), __entry->avail, __entry->depth, 1897 + __entry->status 1898 + ) 1899 + ); 1866 1900 1867 1901 #endif /* _TRACE_RPCRDMA_H */ 1868 1902
+3 -2
net/sunrpc/cache.c
··· 529 529 { 530 530 struct cache_head *ch = NULL; 531 531 struct hlist_head *head = NULL; 532 - struct hlist_node *tmp = NULL; 533 532 int i = 0; 534 533 535 534 spin_lock(&detail->hash_lock); ··· 540 541 dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name); 541 542 for (i = 0; i < detail->hash_size; i++) { 542 543 head = &detail->hash_table[i]; 543 - hlist_for_each_entry_safe(ch, tmp, head, cache_list) { 544 + while (!hlist_empty(head)) { 545 + ch = hlist_entry(head->first, struct cache_head, 546 + cache_list); 544 547 sunrpc_begin_cache_remove_entry(ch, detail); 545 548 spin_unlock(&detail->hash_lock); 546 549 sunrpc_end_cache_remove_entry(ch, detail);
+2 -3
net/sunrpc/svc_xprt.c
··· 908 908 if (!xprt) 909 909 goto out; 910 910 911 - /* release the receive skb before sending the reply */ 912 - xprt->xpt_ops->xpo_release_rqst(rqstp); 913 - 914 911 /* calculate over-all length */ 915 912 xb = &rqstp->rq_res; 916 913 xb->len = xb->head[0].iov_len + ··· 1037 1040 1038 1041 dprintk("svc: svc_delete_xprt(%p)\n", xprt); 1039 1042 xprt->xpt_ops->xpo_detach(xprt); 1043 + if (xprt->xpt_bc_xprt) 1044 + xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt); 1040 1045 1041 1046 spin_lock_bh(&serv->sv_lock); 1042 1047 list_del_init(&xprt->xpt_list);
+4
net/sunrpc/svcsock.c
··· 527 527 unsigned int uninitialized_var(sent); 528 528 int err; 529 529 530 + svc_release_udp_skb(rqstp); 531 + 530 532 svc_set_cmsg_data(rqstp, cmh); 531 533 532 534 err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); ··· 1077 1075 }; 1078 1076 unsigned int uninitialized_var(sent); 1079 1077 int err; 1078 + 1079 + svc_release_skb(rqstp); 1080 1080 1081 1081 err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); 1082 1082 xdr_free_bvec(xdr);
+2
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
··· 244 244 xprt_rdma_bc_close(struct rpc_xprt *xprt) 245 245 { 246 246 dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); 247 + 248 + xprt_disconnect_done(xprt); 247 249 xprt->cwnd = RPC_CWNDSHIFT; 248 250 } 249 251
+22
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
··· 223 223 svc_rdma_recv_ctxt_destroy(rdma, ctxt); 224 224 } 225 225 226 + /** 227 + * svc_rdma_release_rqst - Release transport-specific per-rqst resources 228 + * @rqstp: svc_rqst being released 229 + * 230 + * Ensure that the recv_ctxt is released whether or not a Reply 231 + * was sent. For example, the client could close the connection, 232 + * or svc_process could drop an RPC, before the Reply is sent. 233 + */ 234 + void svc_rdma_release_rqst(struct svc_rqst *rqstp) 235 + { 236 + struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt; 237 + struct svc_xprt *xprt = rqstp->rq_xprt; 238 + struct svcxprt_rdma *rdma = 239 + container_of(xprt, struct svcxprt_rdma, sc_xprt); 240 + 241 + rqstp->rq_xprt_ctxt = NULL; 242 + if (ctxt) 243 + svc_rdma_recv_ctxt_put(rdma, ctxt); 244 + } 245 + 226 246 static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, 227 247 struct svc_rdma_recv_ctxt *ctxt) 228 248 { ··· 839 819 struct svc_rdma_recv_ctxt *ctxt; 840 820 __be32 *p; 841 821 int ret; 822 + 823 + rqstp->rq_xprt_ctxt = NULL; 842 824 843 825 spin_lock(&rdma_xprt->sc_rq_dto_lock); 844 826 ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
+1 -2
net/sunrpc/xprtrdma/svc_rdma_rw.c
··· 323 323 if (atomic_sub_return(cc->cc_sqecount, 324 324 &rdma->sc_sq_avail) > 0) { 325 325 ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); 326 - trace_svcrdma_post_rw(&cc->cc_cqe, 327 - cc->cc_sqecount, ret); 328 326 if (ret) 329 327 break; 330 328 return 0; ··· 335 337 trace_svcrdma_sq_retry(rdma); 336 338 } while (1); 337 339 340 + trace_svcrdma_sq_post_err(rdma, ret); 338 341 set_bit(XPT_CLOSE, &xprt->xpt_flags); 339 342 340 343 /* If even one was posted, there will be a completion. */
+12 -17
net/sunrpc/xprtrdma/svc_rdma_sendto.c
··· 322 322 } 323 323 324 324 svc_xprt_get(&rdma->sc_xprt); 325 + trace_svcrdma_post_send(wr); 325 326 ret = ib_post_send(rdma->sc_qp, wr, NULL); 326 - trace_svcrdma_post_send(wr, ret); 327 - if (ret) { 328 - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 329 - svc_xprt_put(&rdma->sc_xprt); 330 - wake_up(&rdma->sc_send_wait); 331 - } 332 - break; 327 + if (ret) 328 + break; 329 + return 0; 333 330 } 331 + 332 + trace_svcrdma_sq_post_err(rdma, ret); 333 + set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 334 + svc_xprt_put(&rdma->sc_xprt); 335 + wake_up(&rdma->sc_send_wait); 334 336 return ret; 335 337 } 336 338 ··· 926 924 ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); 927 925 if (ret < 0) 928 926 goto err1; 929 - ret = 0; 930 - 931 - out: 932 - rqstp->rq_xprt_ctxt = NULL; 933 - svc_rdma_recv_ctxt_put(rdma, rctxt); 934 - return ret; 927 + return 0; 935 928 936 929 err2: 937 930 if (ret != -E2BIG && ret != -EINVAL) ··· 935 938 ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp); 936 939 if (ret < 0) 937 940 goto err1; 938 - ret = 0; 939 - goto out; 941 + return 0; 940 942 941 943 err1: 942 944 svc_rdma_send_ctxt_put(rdma, sctxt); 943 945 err0: 944 946 trace_svcrdma_send_failed(rqstp, ret); 945 947 set_bit(XPT_CLOSE, &xprt->xpt_flags); 946 - ret = -ENOTCONN; 947 - goto out; 948 + return -ENOTCONN; 948 949 } 949 950 950 951 /**
-5
net/sunrpc/xprtrdma/svc_rdma_transport.c
··· 71 71 struct sockaddr *sa, int salen, 72 72 int flags); 73 73 static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); 74 - static void svc_rdma_release_rqst(struct svc_rqst *); 75 74 static void svc_rdma_detach(struct svc_xprt *xprt); 76 75 static void svc_rdma_free(struct svc_xprt *xprt); 77 76 static int svc_rdma_has_wspace(struct svc_xprt *xprt); ··· 549 550 /* This call to put will destroy the transport */ 550 551 svc_xprt_put(&newxprt->sc_xprt); 551 552 return NULL; 552 - } 553 - 554 - static void svc_rdma_release_rqst(struct svc_rqst *rqstp) 555 - { 556 553 } 557 554 558 555 /*
+1
net/sunrpc/xprtsock.c
··· 2584 2584 2585 2585 static void bc_close(struct rpc_xprt *xprt) 2586 2586 { 2587 + xprt_disconnect_done(xprt); 2587 2588 } 2588 2589 2589 2590 /*