Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nfsd: provide locking for v4_end_grace

Writing to v4_end_grace can race with server shutdown and result in
memory being accessed after it was freed - reclaim_str_hashtbl in
particularly.

We cannot hold nfsd_mutex across the nfsd4_end_grace() call as that is
held while client_tracking_op->init() is called and that can wait for
an upcall to nfsdcltrack which can write to v4_end_grace, resulting in a
deadlock.

nfsd4_end_grace() is also called by the landromat work queue and this
doesn't require locking as server shutdown will stop the work and wait
for it before freeing anything that nfsd4_end_grace() might access.

However, we must be sure that writing to v4_end_grace doesn't restart
the work item after shutdown has already waited for it. For this we
add a new flag protected with nn->client_lock. It is set only while it
is safe to make client tracking calls, and v4_end_grace only schedules
work while the flag is set with the spinlock held.

So this patch adds a nfsd_net field "client_tracking_active" which is
set as described. Another field "grace_end_forced", is set when
v4_end_grace is written. After this is set, and providing
client_tracking_active is set, the laundromat is scheduled.
This "grace_end_forced" field bypasses other checks for whether the
grace period has finished.

This resolves a race which can result in use-after-free.

Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
Closes: https://lore.kernel.org/linux-nfs/20250623030015.2353515-1-neil@brown.name/T/#t
Fixes: 7f5ef2e900d9 ("nfsd: add a v4_end_grace file to /proc/fs/nfsd")
Cc: stable@vger.kernel.org
Signed-off-by: NeilBrown <neil@brown.name>
Tested-by: Li Lingfeng <lilingfeng3@huawei.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>

authored by

NeilBrown and committed by
Chuck Lever
2857bd59 e901c7fc

+44 -5
+2
fs/nfsd/netns.h
··· 66 66 67 67 struct lock_manager nfsd4_manager; 68 68 bool grace_ended; 69 + bool grace_end_forced; 70 + bool client_tracking_active; 69 71 time64_t boot_time; 70 72 71 73 struct dentry *nfsd_client_dir;
+40 -2
fs/nfsd/nfs4state.c
··· 84 84 /* forward declarations */ 85 85 static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); 86 86 static void nfs4_free_ol_stateid(struct nfs4_stid *stid); 87 - void nfsd4_end_grace(struct nfsd_net *nn); 87 + static void nfsd4_end_grace(struct nfsd_net *nn); 88 88 static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); 89 89 static void nfsd4_file_hash_remove(struct nfs4_file *fi); 90 90 static void deleg_reaper(struct nfsd_net *nn); ··· 6570 6570 return nfs_ok; 6571 6571 } 6572 6572 6573 - void 6573 + static void 6574 6574 nfsd4_end_grace(struct nfsd_net *nn) 6575 6575 { 6576 6576 /* do nothing if grace period already ended */ ··· 6603 6603 */ 6604 6604 } 6605 6605 6606 + /** 6607 + * nfsd4_force_end_grace - forcibly end the NFSv4 grace period 6608 + * @nn: network namespace for the server instance to be updated 6609 + * 6610 + * Forces bypass of normal grace period completion, then schedules 6611 + * the laundromat to end the grace period immediately. Does not wait 6612 + * for the grace period to fully terminate before returning. 6613 + * 6614 + * Return values: 6615 + * %true: Grace termination schedule 6616 + * %false: No action was taken 6617 + */ 6618 + bool nfsd4_force_end_grace(struct nfsd_net *nn) 6619 + { 6620 + if (!nn->client_tracking_ops) 6621 + return false; 6622 + spin_lock(&nn->client_lock); 6623 + if (nn->grace_ended || !nn->client_tracking_active) { 6624 + spin_unlock(&nn->client_lock); 6625 + return false; 6626 + } 6627 + WRITE_ONCE(nn->grace_end_forced, true); 6628 + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); 6629 + spin_unlock(&nn->client_lock); 6630 + return true; 6631 + } 6632 + 6606 6633 /* 6607 6634 * If we've waited a lease period but there are still clients trying to 6608 6635 * reclaim, wait a little longer to give them a chance to finish. ··· 6639 6612 time64_t double_grace_period_end = nn->boot_time + 6640 6613 2 * nn->nfsd4_lease; 6641 6614 6615 + if (READ_ONCE(nn->grace_end_forced)) 6616 + return false; 6642 6617 if (nn->track_reclaim_completes && 6643 6618 atomic_read(&nn->nr_reclaim_complete) == 6644 6619 nn->reclaim_str_hashtbl_size) ··· 8960 8931 nn->unconf_name_tree = RB_ROOT; 8961 8932 nn->boot_time = ktime_get_real_seconds(); 8962 8933 nn->grace_ended = false; 8934 + nn->grace_end_forced = false; 8935 + nn->client_tracking_active = false; 8963 8936 nn->nfsd4_manager.block_opens = true; 8964 8937 INIT_LIST_HEAD(&nn->nfsd4_manager.list); 8965 8938 INIT_LIST_HEAD(&nn->client_lru); ··· 9042 9011 return ret; 9043 9012 locks_start_grace(net, &nn->nfsd4_manager); 9044 9013 nfsd4_client_tracking_init(net); 9014 + /* safe for laundromat to run now */ 9015 + spin_lock(&nn->client_lock); 9016 + nn->client_tracking_active = true; 9017 + spin_unlock(&nn->client_lock); 9045 9018 if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) 9046 9019 goto skip_grace; 9047 9020 printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n", ··· 9094 9059 9095 9060 shrinker_free(nn->nfsd_client_shrinker); 9096 9061 cancel_work_sync(&nn->nfsd_shrinker_work); 9062 + spin_lock(&nn->client_lock); 9063 + nn->client_tracking_active = false; 9064 + spin_unlock(&nn->client_lock); 9097 9065 cancel_delayed_work_sync(&nn->laundromat_work); 9098 9066 locks_end_grace(&nn->nfsd4_manager); 9099 9067
+1 -2
fs/nfsd/nfsctl.c
··· 1082 1082 case 'Y': 1083 1083 case 'y': 1084 1084 case '1': 1085 - if (!nn->nfsd_serv) 1085 + if (!nfsd4_force_end_grace(nn)) 1086 1086 return -EBUSY; 1087 1087 trace_nfsd_end_grace(netns(file)); 1088 - nfsd4_end_grace(nn); 1089 1088 break; 1090 1089 default: 1091 1090 return -EINVAL;
+1 -1
fs/nfsd/state.h
··· 849 849 #endif 850 850 851 851 /* grace period management */ 852 - void nfsd4_end_grace(struct nfsd_net *nn); 852 + bool nfsd4_force_end_grace(struct nfsd_net *nn); 853 853 854 854 /* nfs4recover operations */ 855 855 extern int nfsd4_client_tracking_init(struct net *net);