Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge patch series "Fixup NLM and kNFSD file lock callbacks"

Benjamin Coddington <bcodding@redhat.com> says:

Last year both GFS2 and OCFS2 had some work done to make their locking more
robust when exported over NFS. Unfortunately, part of that work caused both
NLM (for NFS v3 exports) and kNFSD (for NFSv4.1+ exports) to no longer send
lock notifications to clients.

This in itself is not a huge problem because most NFS clients will still
poll the server in order to acquire a conflicted lock, but now that I've
noticed it I can't help but try to fix it because there are big advantages
for setups that might depend on timely lock notifications, and we've
supported that as a feature for a long time.

Its important for NLM and kNFSD that they do not block their kernel threads
inside filesystem's file_lock implementations because that can produce
deadlocks. We used to make sure of this by only trusting that
posix_lock_file() can correctly handle blocking lock calls asynchronously,
so the lock managers would only setup their file_lock requests for async
callbacks if the filesystem did not define its own lock() file operation.

However, when GFS2 and OCFS2 grew the capability to correctly
handle blocking lock requests asynchronously, they started signalling this
behavior with EXPORT_OP_ASYNC_LOCK, and the check for also trusting
posix_lock_file() was inadvertently dropped, so now most filesystems no
longer produce lock notifications when exported over NFS.

I tried to fix this by simply including the old check for lock(), but the
resulting include mess and layering violations was more than I could accept.
There's a much cleaner way presented here using an fop_flag, which while
potentially flag-greedy, greatly simplifies the problem and grooms the
way for future uses by both filesystems and lock managers alike.

* patches from https://lore.kernel.org/r/cover.1726083391.git.bcodding@redhat.com:
exportfs: Remove EXPORT_OP_ASYNC_LOCK
NLM/NFSD: Fix lock notifications for async-capable filesystems
gfs2/ocfs2: set FOP_ASYNC_LOCK
fs: Introduce FOP_ASYNC_LOCK
NFS: trace: show TIMEDOUT instead of 0x6e
nfsd: use system_unbound_wq for nfsd_file_gc_worker()
nfsd: count nfsd_file allocations
nfsd: fix refcount leak when file is unhashed after being found
nfsd: remove unneeded EEXIST error check in nfsd_do_file_acquire
nfsd: add list_head nf_gc to struct nfsd_file

Link: https://lore.kernel.org/r/cover.1726083391.git.bcodding@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>

+18 -41
-7
Documentation/filesystems/nfs/exporting.rst
··· 238 238 all of an inode's dirty data on last close. Exports that behave this 239 239 way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip 240 240 waiting for writeback when closing such files. 241 - 242 - EXPORT_OP_ASYNC_LOCK - Indicates a capable filesystem to do async lock 243 - requests from lockd. Only set EXPORT_OP_ASYNC_LOCK if the filesystem has 244 - it's own ->lock() functionality as core posix_lock_file() implementation 245 - has no async lock request handling yet. For more information about how to 246 - indicate an async lock request from a ->lock() file_operations struct, see 247 - fs/locks.c and comment for the function vfs_lock_file().
-1
fs/gfs2/export.c
··· 190 190 .fh_to_parent = gfs2_fh_to_parent, 191 191 .get_name = gfs2_get_name, 192 192 .get_parent = gfs2_get_parent, 193 - .flags = EXPORT_OP_ASYNC_LOCK, 194 193 }; 195 194
+2
fs/gfs2/file.c
··· 1586 1586 .splice_write = gfs2_file_splice_write, 1587 1587 .setlease = simple_nosetlease, 1588 1588 .fallocate = gfs2_fallocate, 1589 + .fop_flags = FOP_ASYNC_LOCK, 1589 1590 }; 1590 1591 1591 1592 const struct file_operations gfs2_dir_fops = { ··· 1599 1598 .lock = gfs2_lock, 1600 1599 .flock = gfs2_flock, 1601 1600 .llseek = default_llseek, 1601 + .fop_flags = FOP_ASYNC_LOCK, 1602 1602 }; 1603 1603 1604 1604 #endif /* CONFIG_GFS2_FS_LOCKING_DLM */
+3 -4
fs/lockd/svclock.c
··· 30 30 #include <linux/sunrpc/svc_xprt.h> 31 31 #include <linux/lockd/nlm.h> 32 32 #include <linux/lockd/lockd.h> 33 - #include <linux/exportfs.h> 34 33 35 34 #define NLMDBG_FACILITY NLMDBG_SVCLOCK 36 35 ··· 480 481 struct nlm_host *host, struct nlm_lock *lock, int wait, 481 482 struct nlm_cookie *cookie, int reclaim) 482 483 { 483 - struct inode *inode = nlmsvc_file_inode(file); 484 + struct inode *inode __maybe_unused = nlmsvc_file_inode(file); 484 485 struct nlm_block *block = NULL; 485 486 int error; 486 487 int mode; ··· 495 496 (long long)lock->fl.fl_end, 496 497 wait); 497 498 498 - if (!exportfs_lock_op_is_async(inode->i_sb->s_export_op)) { 499 + if (!locks_can_async_lock(nlmsvc_file_file(file)->f_op)) { 499 500 async_block = wait; 500 501 wait = 0; 501 502 } ··· 549 550 * requests on the underlaying ->lock() implementation but 550 551 * only one nlm_block to being granted by lm_grant(). 551 552 */ 552 - if (exportfs_lock_op_is_async(inode->i_sb->s_export_op) && 553 + if (locks_can_async_lock(nlmsvc_file_file(file)->f_op) && 553 554 !list_empty(&block->b_list)) { 554 555 spin_unlock(&nlm_blocked_lock); 555 556 ret = nlm_lck_blocked;
+4 -15
fs/nfsd/nfs4state.c
··· 7968 7968 fp = lock_stp->st_stid.sc_file; 7969 7969 switch (lock->lk_type) { 7970 7970 case NFS4_READW_LT: 7971 - if (nfsd4_has_session(cstate) || 7972 - exportfs_lock_op_is_async(sb->s_export_op)) 7973 - flags |= FL_SLEEP; 7974 7971 fallthrough; 7975 7972 case NFS4_READ_LT: 7976 7973 spin_lock(&fp->fi_lock); ··· 7978 7981 type = F_RDLCK; 7979 7982 break; 7980 7983 case NFS4_WRITEW_LT: 7981 - if (nfsd4_has_session(cstate) || 7982 - exportfs_lock_op_is_async(sb->s_export_op)) 7983 - flags |= FL_SLEEP; 7984 7984 fallthrough; 7985 7985 case NFS4_WRITE_LT: 7986 7986 spin_lock(&fp->fi_lock); ··· 7997 8003 goto out; 7998 8004 } 7999 8005 8000 - /* 8001 - * Most filesystems with their own ->lock operations will block 8002 - * the nfsd thread waiting to acquire the lock. That leads to 8003 - * deadlocks (we don't want every nfsd thread tied up waiting 8004 - * for file locks), so don't attempt blocking lock notifications 8005 - * on those filesystems: 8006 - */ 8007 - if (!exportfs_lock_op_is_async(sb->s_export_op)) 8008 - flags &= ~FL_SLEEP; 8006 + if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) && 8007 + nfsd4_has_session(cstate) && 8008 + locks_can_async_lock(nf->nf_file->f_op)) 8009 + flags |= FL_SLEEP; 8009 8010 8010 8011 nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); 8011 8012 if (!nbl) {
-1
fs/ocfs2/export.c
··· 280 280 .fh_to_dentry = ocfs2_fh_to_dentry, 281 281 .fh_to_parent = ocfs2_fh_to_parent, 282 282 .get_parent = ocfs2_get_parent, 283 - .flags = EXPORT_OP_ASYNC_LOCK, 284 283 };
+2
fs/ocfs2/file.c
··· 2801 2801 .splice_write = iter_file_splice_write, 2802 2802 .fallocate = ocfs2_fallocate, 2803 2803 .remap_file_range = ocfs2_remap_file_range, 2804 + .fop_flags = FOP_ASYNC_LOCK, 2804 2805 }; 2805 2806 2806 2807 WRAP_DIR_ITER(ocfs2_readdir) // FIXME! ··· 2818 2817 #endif 2819 2818 .lock = ocfs2_lock, 2820 2819 .flock = ocfs2_flock, 2820 + .fop_flags = FOP_ASYNC_LOCK, 2821 2821 }; 2822 2822 2823 2823 /*
-13
include/linux/exportfs.h
··· 250 250 unsigned long flags; 251 251 }; 252 252 253 - /** 254 - * exportfs_lock_op_is_async() - export op supports async lock operation 255 - * @export_ops: the nfs export operations to check 256 - * 257 - * Returns true if the nfs export_operations structure has 258 - * EXPORT_OP_ASYNC_LOCK in their flags set 259 - */ 260 - static inline bool 261 - exportfs_lock_op_is_async(const struct export_operations *export_ops) 262 - { 263 - return export_ops->flags & EXPORT_OP_ASYNC_LOCK; 264 - } 265 - 266 253 extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, 267 254 int *max_len, struct inode *parent, 268 255 int flags);
+5
include/linux/filelock.h
··· 180 180 wake_up(&fl->c.flc_wait); 181 181 } 182 182 183 + static inline bool locks_can_async_lock(const struct file_operations *fops) 184 + { 185 + return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK; 186 + } 187 + 183 188 /* fs/locks.c */ 184 189 void locks_free_lock_context(struct inode *inode); 185 190 void locks_free_lock(struct file_lock *fl);
+2
include/linux/fs.h
··· 2116 2116 #define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4)) 2117 2117 /* Treat loff_t as unsigned (e.g., /dev/mem) */ 2118 2118 #define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5)) 2119 + /* Supports asynchronous lock callbacks */ 2120 + #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) 2119 2121 2120 2122 /* Wrap a directory iterator that needs exclusive inode access */ 2121 2123 int wrap_directory_iterator(struct file *, struct dir_context *,