Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFSD: add delegation reaper to react to low memory condition

The delegation reaper is called by nfsd memory shrinker's on
the 'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.

To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation reaper sends only one CB_RECALL_ANY request to each
client per 5 seconds.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
[ cel: moved definition of RCA4_TYPE_MASK_RDATA_DLG ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>

authored by

Dai Ngo and committed by
Chuck Lever
44df6f43 3959066b

+102 -4
+84 -4
fs/nfsd/nfs4state.c
··· 2144 2144 kfree(clp->cl_nii_domain.data); 2145 2145 kfree(clp->cl_nii_name.data); 2146 2146 idr_destroy(&clp->cl_stateids); 2147 + kfree(clp->cl_ra); 2147 2148 kmem_cache_free(client_slab, clp); 2148 2149 } 2149 2150 ··· 2872 2871 [3] = {""}, 2873 2872 }; 2874 2873 2874 + static int 2875 + nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, 2876 + struct rpc_task *task) 2877 + { 2878 + switch (task->tk_status) { 2879 + case -NFS4ERR_DELAY: 2880 + rpc_delay(task, 2 * HZ); 2881 + return 0; 2882 + default: 2883 + return 1; 2884 + } 2885 + } 2886 + 2887 + static void 2888 + nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) 2889 + { 2890 + struct nfs4_client *clp = cb->cb_clp; 2891 + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 2892 + 2893 + spin_lock(&nn->client_lock); 2894 + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); 2895 + put_client_renew_locked(clp); 2896 + spin_unlock(&nn->client_lock); 2897 + } 2898 + 2899 + static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { 2900 + .done = nfsd4_cb_recall_any_done, 2901 + .release = nfsd4_cb_recall_any_release, 2902 + }; 2903 + 2875 2904 static struct nfs4_client *create_client(struct xdr_netobj name, 2876 2905 struct svc_rqst *rqstp, nfs4_verifier *verf) 2877 2906 { ··· 2939 2908 free_client(clp); 2940 2909 return NULL; 2941 2910 } 2911 + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL); 2912 + if (!clp->cl_ra) { 2913 + free_client(clp); 2914 + return NULL; 2915 + } 2916 + clp->cl_ra_time = 0; 2917 + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops, 2918 + NFSPROC4_CLNT_CB_RECALL_ANY); 2942 2919 return clp; 2943 2920 } 2944 2921 ··· 4402 4363 static unsigned long 4403 4364 nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) 4404 4365 { 4405 - int cnt; 4366 + int count; 4406 4367 struct nfsd_net *nn = container_of(shrink, 4407 4368 struct nfsd_net, nfsd_client_shrinker); 4408 4369 4409 - cnt = atomic_read(&nn->nfsd_courtesy_clients); 4410 - if (cnt > 0) 4370 + count = atomic_read(&nn->nfsd_courtesy_clients); 4371 + if (!count) 4372 + count = atomic_long_read(&num_delegations); 4373 + if (count) 4411 4374 mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); 4412 - return (unsigned long)cnt; 4375 + return (unsigned long)count; 4413 4376 } 4414 4377 4415 4378 static unsigned long ··· 6201 6160 } 6202 6161 6203 6162 static void 6163 + deleg_reaper(struct nfsd_net *nn) 6164 + { 6165 + struct list_head *pos, *next; 6166 + struct nfs4_client *clp; 6167 + struct list_head cblist; 6168 + 6169 + INIT_LIST_HEAD(&cblist); 6170 + spin_lock(&nn->client_lock); 6171 + list_for_each_safe(pos, next, &nn->client_lru) { 6172 + clp = list_entry(pos, struct nfs4_client, cl_lru); 6173 + if (clp->cl_state != NFSD4_ACTIVE || 6174 + list_empty(&clp->cl_delegations) || 6175 + atomic_read(&clp->cl_delegs_in_recall) || 6176 + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || 6177 + (ktime_get_boottime_seconds() - 6178 + clp->cl_ra_time < 5)) { 6179 + continue; 6180 + } 6181 + list_add(&clp->cl_ra_cblist, &cblist); 6182 + 6183 + /* release in nfsd4_cb_recall_any_release */ 6184 + atomic_inc(&clp->cl_rpc_users); 6185 + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); 6186 + clp->cl_ra_time = ktime_get_boottime_seconds(); 6187 + } 6188 + spin_unlock(&nn->client_lock); 6189 + 6190 + while (!list_empty(&cblist)) { 6191 + clp = list_first_entry(&cblist, struct nfs4_client, 6192 + cl_ra_cblist); 6193 + list_del_init(&clp->cl_ra_cblist); 6194 + clp->cl_ra->ra_keep = 0; 6195 + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); 6196 + nfsd4_run_cb(&clp->cl_ra->ra_cb); 6197 + } 6198 + } 6199 + 6200 + static void 6204 6201 nfsd4_state_shrinker_worker(struct work_struct *work) 6205 6202 { 6206 6203 struct delayed_work *dwork = to_delayed_work(work); ··· 6246 6167 nfsd_shrinker_work); 6247 6168 6248 6169 courtesy_client_reaper(nn); 6170 + deleg_reaper(nn); 6249 6171 } 6250 6172 6251 6173 static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
+5
fs/nfsd/state.h
··· 368 368 #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ 369 369 #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 370 370 1 << NFSD4_CLIENT_CB_KILL) 371 + #define NFSD4_CLIENT_CB_RECALL_ANY (6) 371 372 unsigned long cl_flags; 372 373 const struct cred *cl_cb_cred; 373 374 struct rpc_clnt *cl_cb_client; ··· 412 411 413 412 unsigned int cl_state; 414 413 atomic_t cl_delegs_in_recall; 414 + 415 + struct nfsd4_cb_recall_any *cl_ra; 416 + time64_t cl_ra_time; 417 + struct list_head cl_ra_cblist; 415 418 }; 416 419 417 420 /* struct nfs4_client_reset
+13
include/linux/nfs4.h
··· 732 732 SETXATTR4_CREATE = 1, 733 733 SETXATTR4_REPLACE = 2, 734 734 }; 735 + 736 + enum { 737 + RCA4_TYPE_MASK_RDATA_DLG = 0, 738 + RCA4_TYPE_MASK_WDATA_DLG = 1, 739 + RCA4_TYPE_MASK_DIR_DLG = 2, 740 + RCA4_TYPE_MASK_FILE_LAYOUT = 3, 741 + RCA4_TYPE_MASK_BLK_LAYOUT = 4, 742 + RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8, 743 + RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9, 744 + RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12, 745 + RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, 746 + }; 747 + 735 748 #endif