Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nfsd: serialize state seqid morphing operations

Andrew was seeing a race occur when an OPEN and OPEN_DOWNGRADE were
running in parallel. The server would receive the OPEN_DOWNGRADE first
and check its seqid, but then an OPEN would race in and bump it. The
OPEN_DOWNGRADE would then complete and bump the seqid again. The result
was that the OPEN_DOWNGRADE would be applied after the OPEN, even though
it should have been rejected since the seqid changed.

The only recourse we have here I think is to serialize operations that
bump the seqid in a stateid, particularly when we're given a seqid in
the call. To address this, we add a new rw_semaphore to the
nfs4_ol_stateid struct. We do a down_write prior to checking the seqid
after looking up the stateid to ensure that nothing else is going to
bump it while we're operating on it.

In the case of OPEN, we do a down_read, as the call doesn't contain a
seqid. Those can run in parallel -- we just need to serialize them when
there is a concurrent OPEN_DOWNGRADE or CLOSE.

LOCK and LOCKU however always take the write lock as there is no
opportunity for parallelizing those.

Reported-and-Tested-by: Andrew W Elble <aweits@rit.edu>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

authored by

Jeff Layton and committed by
J. Bruce Fields
35a92fe8 3be7f328

+38 -14
+28 -5
fs/nfsd/nfs4state.c
··· 3360 3360 stp->st_access_bmap = 0; 3361 3361 stp->st_deny_bmap = 0; 3362 3362 stp->st_openstp = NULL; 3363 + init_rwsem(&stp->st_rwsem); 3363 3364 spin_lock(&oo->oo_owner.so_client->cl_lock); 3364 3365 list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); 3365 3366 spin_lock(&fp->fi_lock); ··· 4188 4187 */ 4189 4188 if (stp) { 4190 4189 /* Stateid was found, this is an OPEN upgrade */ 4190 + down_read(&stp->st_rwsem); 4191 4191 status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); 4192 - if (status) 4192 + if (status) { 4193 + up_read(&stp->st_rwsem); 4193 4194 goto out; 4195 + } 4194 4196 } else { 4195 4197 stp = open->op_stp; 4196 4198 open->op_stp = NULL; 4197 4199 init_open_stateid(stp, fp, open); 4200 + down_read(&stp->st_rwsem); 4198 4201 status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); 4199 4202 if (status) { 4203 + up_read(&stp->st_rwsem); 4200 4204 release_open_stateid(stp); 4201 4205 goto out; 4202 4206 } ··· 4213 4207 } 4214 4208 update_stateid(&stp->st_stid.sc_stateid); 4215 4209 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4210 + up_read(&stp->st_rwsem); 4216 4211 4217 4212 if (nfsd4_has_session(&resp->cstate)) { 4218 4213 if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { ··· 4826 4819 * revoked delegations are kept only for free_stateid. 4827 4820 */ 4828 4821 return nfserr_bad_stateid; 4822 + down_write(&stp->st_rwsem); 4829 4823 status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); 4830 - if (status) 4831 - return status; 4832 - return nfs4_check_fh(current_fh, &stp->st_stid); 4824 + if (status == nfs_ok) 4825 + status = nfs4_check_fh(current_fh, &stp->st_stid); 4826 + if (status != nfs_ok) 4827 + up_write(&stp->st_rwsem); 4828 + return status; 4833 4829 } 4834 4830 4835 4831 /* ··· 4879 4869 return status; 4880 4870 oo = openowner(stp->st_stateowner); 4881 4871 if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { 4872 + up_write(&stp->st_rwsem); 4882 4873 nfs4_put_stid(&stp->st_stid); 4883 4874 return nfserr_bad_stateid; 4884 4875 } ··· 4910 4899 goto out; 4911 4900 oo = openowner(stp->st_stateowner); 4912 4901 status = nfserr_bad_stateid; 4913 - if (oo->oo_flags & NFS4_OO_CONFIRMED) 4902 + if (oo->oo_flags & NFS4_OO_CONFIRMED) { 4903 + up_write(&stp->st_rwsem); 4914 4904 goto put_stateid; 4905 + } 4915 4906 oo->oo_flags |= NFS4_OO_CONFIRMED; 4916 4907 update_stateid(&stp->st_stid.sc_stateid); 4917 4908 memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4909 + up_write(&stp->st_rwsem); 4918 4910 dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", 4919 4911 __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); 4920 4912 ··· 4996 4982 memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4997 4983 status = nfs_ok; 4998 4984 put_stateid: 4985 + up_write(&stp->st_rwsem); 4999 4986 nfs4_put_stid(&stp->st_stid); 5000 4987 out: 5001 4988 nfsd4_bump_seqid(cstate, status); ··· 5050 5035 goto out; 5051 5036 update_stateid(&stp->st_stid.sc_stateid); 5052 5037 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 5038 + up_write(&stp->st_rwsem); 5053 5039 5054 5040 nfsd4_close_open_stateid(stp); 5055 5041 ··· 5276 5260 stp->st_access_bmap = 0; 5277 5261 stp->st_deny_bmap = open_stp->st_deny_bmap; 5278 5262 stp->st_openstp = open_stp; 5263 + init_rwsem(&stp->st_rwsem); 5279 5264 list_add(&stp->st_locks, &open_stp->st_locks); 5280 5265 list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); 5281 5266 spin_lock(&fp->fi_lock); ··· 5445 5428 &open_stp, nn); 5446 5429 if (status) 5447 5430 goto out; 5431 + up_write(&open_stp->st_rwsem); 5448 5432 open_sop = openowner(open_stp->st_stateowner); 5449 5433 status = nfserr_bad_stateid; 5450 5434 if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, ··· 5453 5435 goto out; 5454 5436 status = lookup_or_create_lock_state(cstate, open_stp, lock, 5455 5437 &lock_stp, &new); 5438 + if (status == nfs_ok) 5439 + down_write(&lock_stp->st_rwsem); 5456 5440 } else { 5457 5441 status = nfs4_preprocess_seqid_op(cstate, 5458 5442 lock->lk_old_lock_seqid, ··· 5559 5539 cstate->replay_owner != &lock_sop->lo_owner && 5560 5540 seqid_mutating_err(ntohl(status))) 5561 5541 lock_sop->lo_owner.so_seqid++; 5542 + 5543 + up_write(&lock_stp->st_rwsem); 5562 5544 5563 5545 /* 5564 5546 * If this is a new, never-before-used stateid, and we are ··· 5731 5709 fput: 5732 5710 fput(filp); 5733 5711 put_stateid: 5712 + up_write(&stp->st_rwsem); 5734 5713 nfs4_put_stid(&stp->st_stid); 5735 5714 out: 5736 5715 nfsd4_bump_seqid(cstate, status);
+10 -9
fs/nfsd/state.h
··· 534 534 * Better suggestions welcome. 535 535 */ 536 536 struct nfs4_ol_stateid { 537 - struct nfs4_stid st_stid; /* must be first field */ 538 - struct list_head st_perfile; 539 - struct list_head st_perstateowner; 540 - struct list_head st_locks; 541 - struct nfs4_stateowner * st_stateowner; 542 - struct nfs4_clnt_odstate * st_clnt_odstate; 543 - unsigned char st_access_bmap; 544 - unsigned char st_deny_bmap; 545 - struct nfs4_ol_stateid * st_openstp; 537 + struct nfs4_stid st_stid; 538 + struct list_head st_perfile; 539 + struct list_head st_perstateowner; 540 + struct list_head st_locks; 541 + struct nfs4_stateowner *st_stateowner; 542 + struct nfs4_clnt_odstate *st_clnt_odstate; 543 + unsigned char st_access_bmap; 544 + unsigned char st_deny_bmap; 545 + struct nfs4_ol_stateid *st_openstp; 546 + struct rw_semaphore st_rwsem; 546 547 }; 547 548 548 549 static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)