Merge patch series "Fixup NLM and kNFSD file lock callbacks"

Benjamin Coddington <bcodding@redhat.com> says:

Last year both GFS2 and OCFS2 had some work done to make their locking more
robust when exported over NFS.  Unfortunately, part of that work caused both
NLM (for NFS v3 exports) and kNFSD (for NFSv4.1+ exports) to no longer send
lock notifications to clients.

This in itself is not a huge problem because most NFS clients will still
poll the server in order to acquire a conflicted lock, but now that I've
noticed it I can't help but try to fix it because there are big advantages
for setups that might depend on timely lock notifications, and we've
supported that as a feature for a long time.

Its important for NLM and kNFSD that they do not block their kernel threads
inside filesystem's file_lock implementations because that can produce
deadlocks.  We used to make sure of this by only trusting that
posix_lock_file() can correctly handle blocking lock calls asynchronously,
so the lock managers would only setup their file_lock requests for async
callbacks if the filesystem did not define its own lock() file operation.

However, when GFS2 and OCFS2 grew the capability to correctly
handle blocking lock requests asynchronously, they started signalling this
behavior with EXPORT_OP_ASYNC_LOCK, and the check for also trusting
posix_lock_file() was inadvertently dropped, so now most filesystems no
longer produce lock notifications when exported over NFS.

I tried to fix this by simply including the old check for lock(), but the
resulting include mess and layering violations was more than I could accept.
There's a much cleaner way presented here using an fop_flag, which while
potentially flag-greedy, greatly simplifies the problem and grooms the
way for future uses by both filesystems and lock managers alike.

* patches from https://lore.kernel.org/r/cover.1726083391.git.bcodding@redhat.com:
  exportfs: Remove EXPORT_OP_ASYNC_LOCK
  NLM/NFSD: Fix lock notifications for async-capable filesystems
  gfs2/ocfs2: set FOP_ASYNC_LOCK
  fs: Introduce FOP_ASYNC_LOCK
  NFS: trace: show TIMEDOUT instead of 0x6e
  nfsd: use system_unbound_wq for nfsd_file_gc_worker()
  nfsd: count nfsd_file allocations
  nfsd: fix refcount leak when file is unhashed after being found
  nfsd: remove unneeded EEXIST error check in nfsd_do_file_acquire
  nfsd: add list_head nf_gc to struct nfsd_file

Link: https://lore.kernel.org/r/cover.1726083391.git.bcodding@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2024-09-12 14:39:13 +02:00
commit 09ee2a670d
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
10 changed files with 18 additions and 41 deletions

View File

@ -238,10 +238,3 @@ following flags are defined:
all of an inode's dirty data on last close. Exports that behave this
way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip
waiting for writeback when closing such files.
EXPORT_OP_ASYNC_LOCK - Indicates a capable filesystem to do async lock
requests from lockd. Only set EXPORT_OP_ASYNC_LOCK if the filesystem has
it's own ->lock() functionality as core posix_lock_file() implementation
has no async lock request handling yet. For more information about how to
indicate an async lock request from a ->lock() file_operations struct, see
fs/locks.c and comment for the function vfs_lock_file().

View File

@ -190,6 +190,5 @@ const struct export_operations gfs2_export_ops = {
.fh_to_parent = gfs2_fh_to_parent,
.get_name = gfs2_get_name,
.get_parent = gfs2_get_parent,
.flags = EXPORT_OP_ASYNC_LOCK,
};

View File

@ -1586,6 +1586,7 @@ const struct file_operations gfs2_file_fops = {
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
.fop_flags = FOP_ASYNC_LOCK,
};
const struct file_operations gfs2_dir_fops = {
@ -1598,6 +1599,7 @@ const struct file_operations gfs2_dir_fops = {
.lock = gfs2_lock,
.flock = gfs2_flock,
.llseek = default_llseek,
.fop_flags = FOP_ASYNC_LOCK,
};
#endif /* CONFIG_GFS2_FS_LOCKING_DLM */

View File

@ -30,7 +30,6 @@
#include <linux/sunrpc/svc_xprt.h>
#include <linux/lockd/nlm.h>
#include <linux/lockd/lockd.h>
#include <linux/exportfs.h>
#define NLMDBG_FACILITY NLMDBG_SVCLOCK
@ -481,7 +480,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_host *host, struct nlm_lock *lock, int wait,
struct nlm_cookie *cookie, int reclaim)
{
struct inode *inode = nlmsvc_file_inode(file);
struct inode *inode __maybe_unused = nlmsvc_file_inode(file);
struct nlm_block *block = NULL;
int error;
int mode;
@ -496,7 +495,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
(long long)lock->fl.fl_end,
wait);
if (!exportfs_lock_op_is_async(inode->i_sb->s_export_op)) {
if (!locks_can_async_lock(nlmsvc_file_file(file)->f_op)) {
async_block = wait;
wait = 0;
}
@ -550,7 +549,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
* requests on the underlaying ->lock() implementation but
* only one nlm_block to being granted by lm_grant().
*/
if (exportfs_lock_op_is_async(inode->i_sb->s_export_op) &&
if (locks_can_async_lock(nlmsvc_file_file(file)->f_op) &&
!list_empty(&block->b_list)) {
spin_unlock(&nlm_blocked_lock);
ret = nlm_lck_blocked;

View File

@ -7968,9 +7968,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
if (nfsd4_has_session(cstate) ||
exportfs_lock_op_is_async(sb->s_export_op))
flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
@ -7981,9 +7978,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
if (nfsd4_has_session(cstate) ||
exportfs_lock_op_is_async(sb->s_export_op))
flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
@ -8003,15 +7997,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
/*
* Most filesystems with their own ->lock operations will block
* the nfsd thread waiting to acquire the lock. That leads to
* deadlocks (we don't want every nfsd thread tied up waiting
* for file locks), so don't attempt blocking lock notifications
* on those filesystems:
*/
if (!exportfs_lock_op_is_async(sb->s_export_op))
flags &= ~FL_SLEEP;
if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) &&
nfsd4_has_session(cstate) &&
locks_can_async_lock(nf->nf_file->f_op))
flags |= FL_SLEEP;
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
if (!nbl) {

View File

@ -280,5 +280,4 @@ const struct export_operations ocfs2_export_ops = {
.fh_to_dentry = ocfs2_fh_to_dentry,
.fh_to_parent = ocfs2_fh_to_parent,
.get_parent = ocfs2_get_parent,
.flags = EXPORT_OP_ASYNC_LOCK,
};

View File

@ -2801,6 +2801,7 @@ const struct file_operations ocfs2_fops = {
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
.remap_file_range = ocfs2_remap_file_range,
.fop_flags = FOP_ASYNC_LOCK,
};
WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
@ -2817,6 +2818,7 @@ const struct file_operations ocfs2_dops = {
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
.fop_flags = FOP_ASYNC_LOCK,
};
/*

View File

@ -250,19 +250,6 @@ struct export_operations {
unsigned long flags;
};
/**
* exportfs_lock_op_is_async() - export op supports async lock operation
* @export_ops: the nfs export operations to check
*
* Returns true if the nfs export_operations structure has
* EXPORT_OP_ASYNC_LOCK in their flags set
*/
static inline bool
exportfs_lock_op_is_async(const struct export_operations *export_ops)
{
return export_ops->flags & EXPORT_OP_ASYNC_LOCK;
}
extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
int *max_len, struct inode *parent,
int flags);

View File

@ -180,6 +180,11 @@ static inline void locks_wake_up(struct file_lock *fl)
wake_up(&fl->c.flc_wait);
}
static inline bool locks_can_async_lock(const struct file_operations *fops)
{
return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK;
}
/* fs/locks.c */
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);

View File

@ -2116,6 +2116,8 @@ struct file_operations {
#define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4))
/* Treat loff_t as unsigned (e.g., /dev/mem) */
#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5))
/* Supports asynchronous lock callbacks */
#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,