Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFSv4.1: Don't loop forever in nfs4_proc_create_session

If a server for some reason keeps sending NFS4ERR_DELAY errors, we can end
up looping forever inside nfs4_proc_create_session, and so the usual
mechanisms for detecting if the nfs_client is dead don't work.

Fix this by ensuring that we loop inside the nfs4_state_manager thread
instead.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

+40 -53
+1
fs/nfs/nfs4_fs.h
··· 47 47 NFS4CLNT_LAYOUTRECALL, 48 48 NFS4CLNT_SESSION_RESET, 49 49 NFS4CLNT_RECALL_SLOT, 50 + NFS4CLNT_LEASE_CONFIRM, 50 51 }; 51 52 52 53 enum nfs4_session_state {
+7 -38
fs/nfs/nfs4proc.c
··· 3754 3754 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 3755 3755 if (status != -NFS4ERR_CLID_INUSE) 3756 3756 break; 3757 - if (signalled()) 3757 + if (loop != 0) { 3758 + ++clp->cl_id_uniquifier; 3758 3759 break; 3759 - if (loop++ & 1) 3760 - ssleep(clp->cl_lease_time / HZ + 1); 3761 - else 3762 - if (++clp->cl_id_uniquifier == 0) 3763 - break; 3760 + } 3761 + ++loop; 3762 + ssleep(clp->cl_lease_time / HZ + 1); 3764 3763 } 3765 3764 return status; 3766 3765 } 3767 3766 3768 - static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, 3767 + int nfs4_proc_setclientid_confirm(struct nfs_client *clp, 3769 3768 struct nfs4_setclientid_res *arg, 3770 3769 struct rpc_cred *cred) 3771 3770 { ··· 3787 3788 spin_unlock(&clp->cl_lock); 3788 3789 } 3789 3790 return status; 3790 - } 3791 - 3792 - int nfs4_proc_setclientid_confirm(struct nfs_client *clp, 3793 - struct nfs4_setclientid_res *arg, 3794 - struct rpc_cred *cred) 3795 - { 3796 - long timeout = 0; 3797 - int err; 3798 - do { 3799 - err = _nfs4_proc_setclientid_confirm(clp, arg, cred); 3800 - switch (err) { 3801 - case 0: 3802 - return err; 3803 - case -NFS4ERR_RESOURCE: 3804 - /* The IBM lawyers misread another document! */ 3805 - case -NFS4ERR_DELAY: 3806 - err = nfs4_delay(clp->cl_rpcclient, &timeout); 3807 - } 3808 - } while (err == 0); 3809 - return err; 3810 3791 } 3811 3792 3812 3793 struct nfs4_delegreturndata { ··· 5201 5222 int status; 5202 5223 unsigned *ptr; 5203 5224 struct nfs4_session *session = clp->cl_session; 5204 - long timeout = 0; 5205 - int err; 5206 5225 5207 5226 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5208 5227 5209 - do { 5210 - status = _nfs4_proc_create_session(clp); 5211 - if (status == -NFS4ERR_DELAY) { 5212 - err = nfs4_delay(clp->cl_rpcclient, &timeout); 5213 - if (err) 5214 - status = err; 5215 - } 5216 - } while (status == -NFS4ERR_DELAY); 5217 - 5228 + status = _nfs4_proc_create_session(clp); 5218 5229 if (status) 5219 5230 goto out; 5220 5231
+31 -15
fs/nfs/nfs4state.c
··· 64 64 65 65 int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) 66 66 { 67 - struct nfs4_setclientid_res clid; 67 + struct nfs4_setclientid_res clid = { 68 + .clientid = clp->cl_clientid, 69 + .confirm = clp->cl_confirm, 70 + }; 68 71 unsigned short port; 69 72 int status; 70 73 74 + if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) 75 + goto do_confirm; 71 76 port = nfs_callback_tcpport; 72 77 if (clp->cl_addr.ss_family == AF_INET6) 73 78 port = nfs_callback_tcpport6; ··· 80 75 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); 81 76 if (status != 0) 82 77 goto out; 78 + clp->cl_clientid = clid.clientid; 79 + clp->cl_confirm = clid.confirm; 80 + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 81 + do_confirm: 83 82 status = nfs4_proc_setclientid_confirm(clp, &clid, cred); 84 83 if (status != 0) 85 84 goto out; 86 - clp->cl_clientid = clid.clientid; 85 + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 87 86 nfs4_schedule_state_renewal(clp); 88 87 out: 89 88 return status; ··· 239 230 { 240 231 int status; 241 232 233 + if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) 234 + goto do_confirm; 242 235 nfs4_begin_drain_session(clp); 243 236 status = nfs4_proc_exchange_id(clp, cred); 244 237 if (status != 0) 245 238 goto out; 239 + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 240 + do_confirm: 246 241 status = nfs4_proc_create_session(clp); 247 242 if (status != 0) 248 243 goto out; 244 + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 249 245 nfs41_setup_state_renewal(clp); 250 246 nfs_mark_client_ready(clp, NFS_CS_READY); 251 247 out: ··· 1598 1584 */ 1599 1585 static void nfs4_set_lease_expired(struct nfs_client *clp, int status) 1600 1586 { 1601 - if (nfs4_has_session(clp)) { 1602 - switch (status) { 1603 - case -NFS4ERR_DELAY: 1604 - case -NFS4ERR_CLID_INUSE: 1605 - case -EAGAIN: 1606 - break; 1587 + switch (status) { 1588 + case -NFS4ERR_CLID_INUSE: 1589 + case -NFS4ERR_STALE_CLIENTID: 1590 + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 1591 + break; 1592 + case -NFS4ERR_DELAY: 1593 + case -EAGAIN: 1594 + ssleep(1); 1595 + break; 1607 1596 1608 - case -EKEYEXPIRED: 1609 - nfs4_warn_keyexpired(clp->cl_hostname); 1610 - case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery 1611 - * in nfs4_exchange_id */ 1612 - default: 1613 - return; 1614 - } 1597 + case -EKEYEXPIRED: 1598 + nfs4_warn_keyexpired(clp->cl_hostname); 1599 + case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery 1600 + * in nfs4_exchange_id */ 1601 + default: 1602 + return; 1615 1603 } 1616 1604 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1617 1605 }
+1
include/linux/nfs_fs_sb.h
··· 47 47 48 48 #ifdef CONFIG_NFS_V4 49 49 u64 cl_clientid; /* constant */ 50 + nfs4_verifier cl_confirm; /* Clientid verifier */ 50 51 unsigned long cl_state; 51 52 52 53 spinlock_t cl_lock;