Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFSD: delay unmount source's export after inter-server copy completed.

Currently the source's export is mounted and unmounted on every
inter-server copy operation. This patch is an enhancement to delay
the unmount of the source export for a certain period of time to
eliminate the mount and unmount overhead on subsequent copy operations.

After a copy operation completes, a work entry is added to the
delayed unmount list with an expiration time. This list is serviced
by the laundromat thread to unmount the export of the expired entries.
Each time the export is being used again, its expiration time is
extended and the entry is re-inserted to the tail of the list.

The unmount task and the mount operation of the copy request are
synced to make sure the export is not unmounted while it's being
used.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

authored by

Dai Ngo and committed by
J. Bruce Fields
f4e44b39 eac0b17a

+229 -4
+6
fs/nfsd/netns.h
··· 176 176 unsigned int longest_chain_cachesize; 177 177 178 178 struct shrinker nfsd_reply_cache_shrinker; 179 + 180 + /* tracking server-to-server copy mounts */ 181 + spinlock_t nfsd_ssc_lock; 182 + struct list_head nfsd_ssc_mount_list; 183 + wait_queue_head_t nfsd_ssc_waitq; 184 + 179 185 /* utsname taken from the process that starts the server */ 180 186 char nfsd_name[UNX_MAXNODENAME+1]; 181 187 };
+131 -4
fs/nfsd/nfs4proc.c
··· 55 55 MODULE_PARM_DESC(inter_copy_offload_enable, 56 56 "Enable inter server to server copy offload. Default: false"); 57 57 58 + #ifdef CONFIG_NFSD_V4_2_INTER_SSC 59 + static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */ 60 + module_param(nfsd4_ssc_umount_timeout, int, 0644); 61 + MODULE_PARM_DESC(nfsd4_ssc_umount_timeout, 62 + "idle msecs before unmount export from source server"); 63 + #endif 64 + 58 65 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL 59 66 #include <linux/security.h> 60 67 ··· 1173 1166 #define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys" 1174 1167 1175 1168 /* 1169 + * setup a work entry in the ssc delayed unmount list. 1170 + */ 1171 + static int nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, 1172 + struct nfsd4_ssc_umount_item **retwork, struct vfsmount **ss_mnt) 1173 + { 1174 + struct nfsd4_ssc_umount_item *ni = 0; 1175 + struct nfsd4_ssc_umount_item *work = NULL; 1176 + struct nfsd4_ssc_umount_item *tmp; 1177 + DEFINE_WAIT(wait); 1178 + 1179 + *ss_mnt = NULL; 1180 + *retwork = NULL; 1181 + work = kzalloc(sizeof(*work), GFP_KERNEL); 1182 + try_again: 1183 + spin_lock(&nn->nfsd_ssc_lock); 1184 + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { 1185 + if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr))) 1186 + continue; 1187 + /* found a match */ 1188 + if (ni->nsui_busy) { 1189 + /* wait - and try again */ 1190 + prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, 1191 + TASK_INTERRUPTIBLE); 1192 + spin_unlock(&nn->nfsd_ssc_lock); 1193 + 1194 + /* allow 20secs for mount/unmount for now - revisit */ 1195 + if (signal_pending(current) || 1196 + (schedule_timeout(20*HZ) == 0)) { 1197 + kfree(work); 1198 + return nfserr_eagain; 1199 + } 1200 + finish_wait(&nn->nfsd_ssc_waitq, &wait); 1201 + goto try_again; 1202 + } 1203 + *ss_mnt = ni->nsui_vfsmount; 1204 + refcount_inc(&ni->nsui_refcnt); 1205 + spin_unlock(&nn->nfsd_ssc_lock); 1206 + kfree(work); 1207 + 1208 + /* return vfsmount in ss_mnt */ 1209 + return 0; 1210 + } 1211 + if (work) { 1212 + strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr)); 1213 + refcount_set(&work->nsui_refcnt, 2); 1214 + work->nsui_busy = true; 1215 + list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list); 1216 + *retwork = work; 1217 + } 1218 + spin_unlock(&nn->nfsd_ssc_lock); 1219 + return 0; 1220 + } 1221 + 1222 + static void nfsd4_ssc_update_dul_work(struct nfsd_net *nn, 1223 + struct nfsd4_ssc_umount_item *work, struct vfsmount *ss_mnt) 1224 + { 1225 + /* set nsui_vfsmount, clear busy flag and wakeup waiters */ 1226 + spin_lock(&nn->nfsd_ssc_lock); 1227 + work->nsui_vfsmount = ss_mnt; 1228 + work->nsui_busy = false; 1229 + wake_up_all(&nn->nfsd_ssc_waitq); 1230 + spin_unlock(&nn->nfsd_ssc_lock); 1231 + } 1232 + 1233 + static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn, 1234 + struct nfsd4_ssc_umount_item *work) 1235 + { 1236 + spin_lock(&nn->nfsd_ssc_lock); 1237 + list_del(&work->nsui_list); 1238 + wake_up_all(&nn->nfsd_ssc_waitq); 1239 + spin_unlock(&nn->nfsd_ssc_lock); 1240 + kfree(work); 1241 + } 1242 + 1243 + /* 1176 1244 * Support one copy source server for now. 1177 1245 */ 1178 1246 static __be32 ··· 1263 1181 char *ipaddr, *dev_name, *raw_data; 1264 1182 int len, raw_len; 1265 1183 __be32 status = nfserr_inval; 1184 + struct nfsd4_ssc_umount_item *work = NULL; 1185 + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 1266 1186 1267 1187 naddr = &nss->u.nl4_addr; 1268 1188 tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, ··· 1313 1229 goto out_free_rawdata; 1314 1230 snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); 1315 1231 1232 + status = nfsd4_ssc_setup_dul(nn, ipaddr, &work, &ss_mnt); 1233 + if (status) 1234 + goto out_free_devname; 1235 + if (ss_mnt) 1236 + goto out_done; 1237 + 1316 1238 /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */ 1317 1239 ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data); 1318 1240 module_put(type->owner); 1319 - if (IS_ERR(ss_mnt)) 1241 + if (IS_ERR(ss_mnt)) { 1242 + if (work) 1243 + nfsd4_ssc_cancel_dul_work(nn, work); 1320 1244 goto out_free_devname; 1321 - 1245 + } 1246 + if (work) 1247 + nfsd4_ssc_update_dul_work(nn, work, ss_mnt); 1248 + out_done: 1322 1249 status = 0; 1323 1250 *mount = ss_mnt; 1324 1251 ··· 1396 1301 nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, 1397 1302 struct nfsd_file *dst) 1398 1303 { 1304 + bool found = false; 1305 + long timeout; 1306 + struct nfsd4_ssc_umount_item *tmp; 1307 + struct nfsd4_ssc_umount_item *ni = 0; 1308 + struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id); 1309 + 1399 1310 nfs42_ssc_close(src->nf_file); 1400 - fput(src->nf_file); 1401 1311 nfsd_file_put(dst); 1402 - mntput(ss_mnt); 1312 + fput(src->nf_file); 1313 + 1314 + if (!nn) { 1315 + mntput(ss_mnt); 1316 + return; 1317 + } 1318 + spin_lock(&nn->nfsd_ssc_lock); 1319 + timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout); 1320 + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { 1321 + if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) { 1322 + list_del(&ni->nsui_list); 1323 + /* 1324 + * vfsmount can be shared by multiple exports, 1325 + * decrement refcnt. If the count drops to 1 it 1326 + * will be unmounted when nsui_expire expires. 1327 + */ 1328 + refcount_dec(&ni->nsui_refcnt); 1329 + ni->nsui_expire = jiffies + timeout; 1330 + list_add_tail(&ni->nsui_list, &nn->nfsd_ssc_mount_list); 1331 + found = true; 1332 + break; 1333 + } 1334 + } 1335 + spin_unlock(&nn->nfsd_ssc_lock); 1336 + if (!found) { 1337 + mntput(ss_mnt); 1338 + return; 1339 + } 1403 1340 } 1404 1341 1405 1342 #else /* CONFIG_NFSD_V4_2_INTER_SSC */
+71
fs/nfsd/nfs4state.c
··· 44 44 #include <linux/jhash.h> 45 45 #include <linux/string_helpers.h> 46 46 #include <linux/fsnotify.h> 47 + #include <linux/nfs_ssc.h> 47 48 #include "xdr4.h" 48 49 #include "xdr4cb.h" 49 50 #include "vfs.h" ··· 5481 5480 return false; 5482 5481 } 5483 5482 5483 + #ifdef CONFIG_NFSD_V4_2_INTER_SSC 5484 + void nfsd4_ssc_init_umount_work(struct nfsd_net *nn) 5485 + { 5486 + spin_lock_init(&nn->nfsd_ssc_lock); 5487 + INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list); 5488 + init_waitqueue_head(&nn->nfsd_ssc_waitq); 5489 + } 5490 + EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work); 5491 + 5492 + /* 5493 + * This is called when nfsd is being shutdown, after all inter_ssc 5494 + * cleanup were done, to destroy the ssc delayed unmount list. 5495 + */ 5496 + static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn) 5497 + { 5498 + struct nfsd4_ssc_umount_item *ni = 0; 5499 + struct nfsd4_ssc_umount_item *tmp; 5500 + 5501 + spin_lock(&nn->nfsd_ssc_lock); 5502 + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { 5503 + list_del(&ni->nsui_list); 5504 + spin_unlock(&nn->nfsd_ssc_lock); 5505 + mntput(ni->nsui_vfsmount); 5506 + kfree(ni); 5507 + spin_lock(&nn->nfsd_ssc_lock); 5508 + } 5509 + spin_unlock(&nn->nfsd_ssc_lock); 5510 + } 5511 + 5512 + static void nfsd4_ssc_expire_umount(struct nfsd_net *nn) 5513 + { 5514 + bool do_wakeup = false; 5515 + struct nfsd4_ssc_umount_item *ni = 0; 5516 + struct nfsd4_ssc_umount_item *tmp; 5517 + 5518 + spin_lock(&nn->nfsd_ssc_lock); 5519 + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { 5520 + if (time_after(jiffies, ni->nsui_expire)) { 5521 + if (refcount_read(&ni->nsui_refcnt) > 1) 5522 + continue; 5523 + 5524 + /* mark being unmount */ 5525 + ni->nsui_busy = true; 5526 + spin_unlock(&nn->nfsd_ssc_lock); 5527 + mntput(ni->nsui_vfsmount); 5528 + spin_lock(&nn->nfsd_ssc_lock); 5529 + 5530 + /* waiters need to start from begin of list */ 5531 + list_del(&ni->nsui_list); 5532 + kfree(ni); 5533 + 5534 + /* wakeup ssc_connect waiters */ 5535 + do_wakeup = true; 5536 + continue; 5537 + } 5538 + break; 5539 + } 5540 + if (do_wakeup) 5541 + wake_up_all(&nn->nfsd_ssc_waitq); 5542 + spin_unlock(&nn->nfsd_ssc_lock); 5543 + } 5544 + #endif 5545 + 5484 5546 static time64_t 5485 5547 nfs4_laundromat(struct nfsd_net *nn) 5486 5548 { ··· 5653 5589 list_del_init(&nbl->nbl_lru); 5654 5590 free_blocked_lock(nbl); 5655 5591 } 5592 + #ifdef CONFIG_NFSD_V4_2_INTER_SSC 5593 + /* service the server-to-server copy delayed unmount list */ 5594 + nfsd4_ssc_expire_umount(nn); 5595 + #endif 5656 5596 out: 5657 5597 return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); 5658 5598 } ··· 7574 7506 7575 7507 nfsd4_client_tracking_exit(net); 7576 7508 nfs4_state_destroy_net(net); 7509 + #ifdef CONFIG_NFSD_V4_2_INTER_SSC 7510 + nfsd4_ssc_shutdown_umount(nn); 7511 + #endif 7577 7512 } 7578 7513 7579 7514 void
+4
fs/nfsd/nfsd.h
··· 484 484 extern int nfsd4_is_junction(struct dentry *dentry); 485 485 extern int register_cld_notifier(void); 486 486 extern void unregister_cld_notifier(void); 487 + #ifdef CONFIG_NFSD_V4_2_INTER_SSC 488 + extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); 489 + #endif 490 + 487 491 #else /* CONFIG_NFSD_V4 */ 488 492 static inline int nfsd4_is_junction(struct dentry *dentry) 489 493 {
+3
fs/nfsd/nfssvc.c
··· 403 403 if (ret) 404 404 goto out_filecache; 405 405 406 + #ifdef CONFIG_NFSD_V4_2_INTER_SSC 407 + nfsd4_ssc_init_umount_work(nn); 408 + #endif 406 409 nn->nfsd_net_up = true; 407 410 return 0; 408 411
+14
include/linux/nfs_ssc.h
··· 8 8 */ 9 9 10 10 #include <linux/nfs_fs.h> 11 + #include <linux/sunrpc/svc.h> 11 12 12 13 extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl; 13 14 ··· 53 52 if (nfs_ssc_client_tbl.ssc_nfs4_ops) 54 53 (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep); 55 54 } 55 + 56 + struct nfsd4_ssc_umount_item { 57 + struct list_head nsui_list; 58 + bool nsui_busy; 59 + /* 60 + * nsui_refcnt inited to 2, 1 on list and 1 for consumer. Entry 61 + * is removed when refcnt drops to 1 and nsui_expire expires. 62 + */ 63 + refcount_t nsui_refcnt; 64 + unsigned long nsui_expire; 65 + struct vfsmount *nsui_vfsmount; 66 + char nsui_ipaddr[RPC_MAX_ADDRBUFLEN]; 67 + }; 56 68 #endif 57 69 58 70 /*