Merge tag 'nfs-for-5.7-4' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client bugfixes from Trond Myklebust:
"Highlights include:

Stable fixes:
- fix handling of backchannel binding in BIND_CONN_TO_SESSION

Bugfixes:
- Fix a credential use-after-free issue in pnfs_roc()
- Fix potential posix_acl refcnt leak in nfs3_set_acl
- defer slow parts of rpc_free_client() to a workqueue
- Fix an Oopsable race in __nfs_list_for_each_server()
- Fix trace point use-after-free race
- Regression: the RDMA client no longer responds to server disconnect
requests
- Fix return values of xdr_stream_encode_item_{present, absent}
- _pnfs_return_layout() must always wait for layoutreturn completion

Cleanups:
- Remove unreachable error conditions"

* tag 'nfs-for-5.7-4' of git://git.linux-nfs.org/projects/trondmy/linux-nfs:
NFS: Fix a race in __nfs_list_for_each_server()
NFSv4.1: fix handling of backchannel binding in BIND_CONN_TO_SESSION
SUNRPC: defer slow parts of rpc_free_client() to a workqueue.
NFSv4: Remove unreachable error condition due to rpc_run_task()
SUNRPC: Remove unreachable error condition
xprtrdma: Fix use of xdr_stream_encode_item_{present, absent}
xprtrdma: Fix trace point use-after-free race
xprtrdma: Restore wake-up-all to rpcrdma_cm_event_handler()
nfs: Fix potential posix_acl refcnt leak in nfs3_set_acl
NFS/pnfs: Fix a credential use-after-free issue in pnfs_roc()
NFS/pnfs: Ensure that _pnfs_return_layout() waits for layoutreturn completion

Changed files
+79 -36
fs
include
linux
sunrpc
trace
events
net
sunrpc
+15 -7
fs/nfs/nfs3acl.c
··· 253 253 254 254 int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type) 255 255 { 256 - struct posix_acl *alloc = NULL, *dfacl = NULL; 256 + struct posix_acl *orig = acl, *dfacl = NULL, *alloc; 257 257 int status; 258 258 259 259 if (S_ISDIR(inode->i_mode)) { 260 260 switch(type) { 261 261 case ACL_TYPE_ACCESS: 262 - alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT); 262 + alloc = get_acl(inode, ACL_TYPE_DEFAULT); 263 263 if (IS_ERR(alloc)) 264 264 goto fail; 265 + dfacl = alloc; 265 266 break; 266 267 267 268 case ACL_TYPE_DEFAULT: 268 - dfacl = acl; 269 - alloc = acl = get_acl(inode, ACL_TYPE_ACCESS); 269 + alloc = get_acl(inode, ACL_TYPE_ACCESS); 270 270 if (IS_ERR(alloc)) 271 271 goto fail; 272 + dfacl = acl; 273 + acl = alloc; 272 274 break; 273 275 } 274 276 } 275 277 276 278 if (acl == NULL) { 277 - alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 279 + alloc = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 278 280 if (IS_ERR(alloc)) 279 281 goto fail; 282 + acl = alloc; 280 283 } 281 284 status = __nfs3_proc_setacls(inode, acl, dfacl); 282 - posix_acl_release(alloc); 285 + out: 286 + if (acl != orig) 287 + posix_acl_release(acl); 288 + if (dfacl != orig) 289 + posix_acl_release(dfacl); 283 290 return status; 284 291 285 292 fail: 286 - return PTR_ERR(alloc); 293 + status = PTR_ERR(alloc); 294 + goto out; 287 295 } 288 296 289 297 const struct xattr_handler *nfs3_xattr_handlers[] = {
+9 -2
fs/nfs/nfs4proc.c
··· 7891 7891 nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) 7892 7892 { 7893 7893 struct nfs41_bind_conn_to_session_args *args = task->tk_msg.rpc_argp; 7894 + struct nfs41_bind_conn_to_session_res *res = task->tk_msg.rpc_resp; 7894 7895 struct nfs_client *clp = args->client; 7895 7896 7896 7897 switch (task->tk_status) { ··· 7899 7898 case -NFS4ERR_DEADSESSION: 7900 7899 nfs4_schedule_session_recovery(clp->cl_session, 7901 7900 task->tk_status); 7901 + } 7902 + if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && 7903 + res->dir != NFS4_CDFS4_BOTH) { 7904 + rpc_task_close_connection(task); 7905 + if (args->retries++ < MAX_BIND_CONN_TO_SESSION_RETRIES) 7906 + rpc_restart_call(task); 7902 7907 } 7903 7908 } 7904 7909 ··· 7928 7921 struct nfs41_bind_conn_to_session_args args = { 7929 7922 .client = clp, 7930 7923 .dir = NFS4_CDFC4_FORE_OR_BOTH, 7924 + .retries = 0, 7931 7925 }; 7932 7926 struct nfs41_bind_conn_to_session_res res; 7933 7927 struct rpc_message msg = { ··· 9199 9191 nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); 9200 9192 9201 9193 task = rpc_run_task(&task_setup_data); 9202 - if (IS_ERR(task)) 9203 - return ERR_CAST(task); 9194 + 9204 9195 status = rpc_wait_for_completion_task(task); 9205 9196 if (status != 0) 9206 9197 goto out;
+5 -6
fs/nfs/pnfs.c
··· 1332 1332 !valid_layout) { 1333 1333 spin_unlock(&ino->i_lock); 1334 1334 dprintk("NFS: %s no layout segments to return\n", __func__); 1335 - goto out_put_layout_hdr; 1335 + goto out_wait_layoutreturn; 1336 1336 } 1337 1337 1338 1338 send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL); 1339 1339 spin_unlock(&ino->i_lock); 1340 1340 if (send) 1341 1341 status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true); 1342 + out_wait_layoutreturn: 1343 + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); 1342 1344 out_put_layout_hdr: 1343 1345 pnfs_free_lseg_list(&tmp_list); 1344 1346 pnfs_put_layout_hdr(lo); ··· 1458 1456 /* lo ref dropped in pnfs_roc_release() */ 1459 1457 layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &lc_cred, &iomode); 1460 1458 /* If the creds don't match, we can't compound the layoutreturn */ 1461 - if (!layoutreturn) 1459 + if (!layoutreturn || cred_fscmp(cred, lc_cred) != 0) 1462 1460 goto out_noroc; 1463 - if (cred_fscmp(cred, lc_cred) != 0) 1464 - goto out_noroc_put_cred; 1465 1461 1466 1462 roc = layoutreturn; 1467 1463 pnfs_init_layoutreturn_args(args, lo, &stateid, iomode); 1468 1464 res->lrs_present = 0; 1469 1465 layoutreturn = false; 1470 - 1471 - out_noroc_put_cred: 1472 1466 put_cred(lc_cred); 1467 + 1473 1468 out_noroc: 1474 1469 spin_unlock(&ino->i_lock); 1475 1470 rcu_read_unlock();
+1 -1
fs/nfs/super.c
··· 185 185 186 186 rcu_read_lock(); 187 187 list_for_each_entry_rcu(server, head, client_link) { 188 - if (!nfs_sb_active(server->super)) 188 + if (!(server->super && nfs_sb_active(server->super))) 189 189 continue; 190 190 rcu_read_unlock(); 191 191 if (last)
+2
include/linux/nfs_xdr.h
··· 1317 1317 struct nfstime4 date; 1318 1318 }; 1319 1319 1320 + #define MAX_BIND_CONN_TO_SESSION_RETRIES 3 1320 1321 struct nfs41_bind_conn_to_session_args { 1321 1322 struct nfs_client *client; 1322 1323 struct nfs4_sessionid sessionid; 1323 1324 u32 dir; 1324 1325 bool use_conn_in_rdma_mode; 1326 + int retries; 1325 1327 }; 1326 1328 1327 1329 struct nfs41_bind_conn_to_session_res {
+12 -1
include/linux/sunrpc/clnt.h
··· 71 71 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 72 72 struct dentry *cl_debugfs; /* debugfs directory */ 73 73 #endif 74 - struct rpc_xprt_iter cl_xpi; 74 + /* cl_work is only needed after cl_xpi is no longer used, 75 + * and that are of similar size 76 + */ 77 + union { 78 + struct rpc_xprt_iter cl_xpi; 79 + struct work_struct cl_work; 80 + }; 75 81 const struct cred *cl_cred; 76 82 }; 77 83 ··· 242 236 (task->tk_msg.rpc_proc->p_decode != NULL); 243 237 } 244 238 239 + static inline void rpc_task_close_connection(struct rpc_task *task) 240 + { 241 + if (task->tk_xprt) 242 + xprt_force_disconnect(task->tk_xprt); 243 + } 245 244 #endif /* _LINUX_SUNRPC_CLNT_H */
+4 -8
include/trace/events/rpcrdma.h
··· 692 692 693 693 TRACE_EVENT(xprtrdma_post_send, 694 694 TP_PROTO( 695 - const struct rpcrdma_req *req, 696 - int status 695 + const struct rpcrdma_req *req 697 696 ), 698 697 699 - TP_ARGS(req, status), 698 + TP_ARGS(req), 700 699 701 700 TP_STRUCT__entry( 702 701 __field(const void *, req) ··· 704 705 __field(unsigned int, client_id) 705 706 __field(int, num_sge) 706 707 __field(int, signaled) 707 - __field(int, status) 708 708 ), 709 709 710 710 TP_fast_assign( ··· 716 718 __entry->sc = req->rl_sendctx; 717 719 __entry->num_sge = req->rl_wr.num_sge; 718 720 __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; 719 - __entry->status = status; 720 721 ), 721 722 722 - TP_printk("task:%u@%u req=%p sc=%p (%d SGE%s) %sstatus=%d", 723 + TP_printk("task:%u@%u req=%p sc=%p (%d SGE%s) %s", 723 724 __entry->task_id, __entry->client_id, 724 725 __entry->req, __entry->sc, __entry->num_sge, 725 726 (__entry->num_sge == 1 ? "" : "s"), 726 - (__entry->signaled ? "signaled " : ""), 727 - __entry->status 727 + (__entry->signaled ? "signaled" : "") 728 728 ) 729 729 ); 730 730
+18 -6
net/sunrpc/clnt.c
··· 880 880 /* 881 881 * Free an RPC client 882 882 */ 883 + static void rpc_free_client_work(struct work_struct *work) 884 + { 885 + struct rpc_clnt *clnt = container_of(work, struct rpc_clnt, cl_work); 886 + 887 + /* These might block on processes that might allocate memory, 888 + * so they cannot be called in rpciod, so they are handled separately 889 + * here. 890 + */ 891 + rpc_clnt_debugfs_unregister(clnt); 892 + rpc_clnt_remove_pipedir(clnt); 893 + 894 + kfree(clnt); 895 + rpciod_down(); 896 + } 883 897 static struct rpc_clnt * 884 898 rpc_free_client(struct rpc_clnt *clnt) 885 899 { ··· 904 890 rcu_dereference(clnt->cl_xprt)->servername); 905 891 if (clnt->cl_parent != clnt) 906 892 parent = clnt->cl_parent; 907 - rpc_clnt_debugfs_unregister(clnt); 908 - rpc_clnt_remove_pipedir(clnt); 909 893 rpc_unregister_client(clnt); 910 894 rpc_free_iostats(clnt->cl_metrics); 911 895 clnt->cl_metrics = NULL; 912 896 xprt_put(rcu_dereference_raw(clnt->cl_xprt)); 913 897 xprt_iter_destroy(&clnt->cl_xpi); 914 - rpciod_down(); 915 898 put_cred(clnt->cl_cred); 916 899 rpc_free_clid(clnt); 917 - kfree(clnt); 900 + 901 + INIT_WORK(&clnt->cl_work, rpc_free_client_work); 902 + schedule_work(&clnt->cl_work); 918 903 return parent; 919 904 } 920 905 ··· 2821 2808 task = rpc_call_null_helper(clnt, xprt, NULL, 2822 2809 RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS, 2823 2810 &rpc_cb_add_xprt_call_ops, data); 2824 - if (IS_ERR(task)) 2825 - return PTR_ERR(task); 2811 + 2826 2812 rpc_put_task(task); 2827 2813 success: 2828 2814 return 1;
+11 -4
net/sunrpc/xprtrdma/rpc_rdma.c
··· 388 388 } while (nsegs); 389 389 390 390 done: 391 - return xdr_stream_encode_item_absent(xdr); 391 + if (xdr_stream_encode_item_absent(xdr) < 0) 392 + return -EMSGSIZE; 393 + return 0; 392 394 } 393 395 394 396 /* Register and XDR encode the Write list. Supports encoding a list ··· 456 454 *segcount = cpu_to_be32(nchunks); 457 455 458 456 done: 459 - return xdr_stream_encode_item_absent(xdr); 457 + if (xdr_stream_encode_item_absent(xdr) < 0) 458 + return -EMSGSIZE; 459 + return 0; 460 460 } 461 461 462 462 /* Register and XDR encode the Reply chunk. Supports encoding an array ··· 484 480 int nsegs, nchunks; 485 481 __be32 *segcount; 486 482 487 - if (wtype != rpcrdma_replych) 488 - return xdr_stream_encode_item_absent(xdr); 483 + if (wtype != rpcrdma_replych) { 484 + if (xdr_stream_encode_item_absent(xdr) < 0) 485 + return -EMSGSIZE; 486 + return 0; 487 + } 489 488 490 489 seg = req->rl_segments; 491 490 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
+2 -1
net/sunrpc/xprtrdma/verbs.c
··· 289 289 case RDMA_CM_EVENT_DISCONNECTED: 290 290 ep->re_connect_status = -ECONNABORTED; 291 291 disconnected: 292 + xprt_force_disconnect(xprt); 292 293 return rpcrdma_ep_destroy(ep); 293 294 default: 294 295 break; ··· 1356 1355 --ep->re_send_count; 1357 1356 } 1358 1357 1358 + trace_xprtrdma_post_send(req); 1359 1359 rc = frwr_send(r_xprt, req); 1360 - trace_xprtrdma_post_send(req, rc); 1361 1360 if (rc) 1362 1361 return -ENOTCONN; 1363 1362 return 0;