Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFS: Fix a race when updating an existing write

After nfs_lock_and_join_requests() tests for whether the request is
still attached to the mapping, nothing prevents a call to
nfs_inode_remove_request() from succeeding until we actually lock the
page group.
The reason is that whoever called nfs_inode_remove_request() doesn't
necessarily have a lock on the page group head.

So in order to avoid races, let's take the page group lock earlier in
nfs_lock_and_join_requests(), and hold it across the removal of the
request in nfs_inode_remove_request().

Reported-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Joe Quanaim <jdq@meta.com>
Tested-by: Andrew Steffen <aksteffen@meta.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Fixes: bd37d6fce184 ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>

+16 -23
+5 -4
fs/nfs/pagelist.c
··· 253 253 nfs_page_clear_headlock(req); 254 254 } 255 255 256 - /* 257 - * nfs_page_group_sync_on_bit_locked 256 + /** 257 + * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set 258 + * @req: request in page group 259 + * @bit: PG_* bit that is used to sync page group 258 260 * 259 261 * must be called with page group lock held 260 262 */ 261 - static bool 262 - nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) 263 + bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) 263 264 { 264 265 struct nfs_page *head = req->wb_head; 265 266 struct nfs_page *tmp;
+10 -19
fs/nfs/write.c
··· 153 153 } 154 154 } 155 155 156 - static int 157 - nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) 156 + static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) 158 157 { 159 - int ret; 160 - 161 - if (!test_bit(PG_REMOVE, &req->wb_flags)) 162 - return 0; 163 - ret = nfs_page_group_lock(req); 164 - if (ret) 165 - return ret; 166 158 if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) 167 159 nfs_page_set_inode_ref(req, inode); 168 - nfs_page_group_unlock(req); 169 - return 0; 170 160 } 171 161 172 162 /** ··· 575 585 } 576 586 } 577 587 588 + ret = nfs_page_group_lock(head); 589 + if (ret < 0) 590 + goto out_unlock; 591 + 578 592 /* Ensure that nobody removed the request before we locked it */ 579 593 if (head != folio->private) { 594 + nfs_page_group_unlock(head); 580 595 nfs_unlock_and_release_request(head); 581 596 goto retry; 582 597 } 583 598 584 - ret = nfs_cancel_remove_inode(head, inode); 585 - if (ret < 0) 586 - goto out_unlock; 587 - 588 - ret = nfs_page_group_lock(head); 589 - if (ret < 0) 590 - goto out_unlock; 599 + nfs_cancel_remove_inode(head, inode); 591 600 592 601 /* lock each request in the page group */ 593 602 for (subreq = head->wb_this_page; ··· 775 786 { 776 787 struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); 777 788 778 - if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { 789 + nfs_page_group_lock(req); 790 + if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) { 779 791 struct folio *folio = nfs_page_to_folio(req->wb_head); 780 792 struct address_space *mapping = folio->mapping; 781 793 ··· 788 798 } 789 799 spin_unlock(&mapping->i_private_lock); 790 800 } 801 + nfs_page_group_unlock(req); 791 802 792 803 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { 793 804 atomic_long_dec(&nfsi->nrequests);
+1
include/linux/nfs_page.h
··· 160 160 extern int nfs_page_group_lock(struct nfs_page *); 161 161 extern void nfs_page_group_unlock(struct nfs_page *); 162 162 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); 163 + extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int); 163 164 extern int nfs_page_set_headlock(struct nfs_page *req); 164 165 extern void nfs_page_clear_headlock(struct nfs_page *req); 165 166 extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);