NFS: swap IO handling is slightly different for O_DIRECT IO

1/ Taking the i_rwsem for swap IO triggers lockdep warnings regarding
   possible deadlocks with "fs_reclaim".  These deadlocks could, I believe,
   eventuate if a buffered read on the swapfile was attempted.

   We don't need coherence with the page cache for a swap file, and
   buffered writes are forbidden anyway.  There is no other need for
   i_rwsem during direct IO.  So never take it for swap_rw()

2/ generic_write_checks() explicitly forbids writes to swap, and
   performs checks that are not needed for swap.  So bypass it
   for swap_rw().

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
NeilBrown 2022-03-07 10:41:44 +11:00 committed by Trond Myklebust
parent 4dc73c6791
commit 64158668ac
3 changed files with 34 additions and 20 deletions

View File

@ -173,8 +173,8 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
if (iov_iter_rw(iter) == READ) if (iov_iter_rw(iter) == READ)
return nfs_file_direct_read(iocb, iter); return nfs_file_direct_read(iocb, iter, true);
return nfs_file_direct_write(iocb, iter); return nfs_file_direct_write(iocb, iter, true);
} }
static void nfs_direct_release_pages(struct page **pages, unsigned int npages) static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@ -425,6 +425,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* nfs_file_direct_read - file direct read operation for NFS files * nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block * @iocb: target I/O control block
* @iter: vector of user buffers into which to read data * @iter: vector of user buffers into which to read data
* @swap: flag indicating this is swap IO, not O_DIRECT IO
* *
* We use this function for direct reads instead of calling * We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if * generic_file_aio_read() in order to avoid gfar's check to see if
@ -440,7 +441,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* client must read the updated atime from the server back into its * client must read the updated atime from the server back into its
* cache. * cache.
*/ */
ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
bool swap)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
@ -482,12 +484,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
if (iter_is_iovec(iter)) if (iter_is_iovec(iter))
dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
nfs_start_io_direct(inode); if (!swap)
nfs_start_io_direct(inode);
NFS_I(inode)->read_io += count; NFS_I(inode)->read_io += count;
requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
nfs_end_io_direct(inode); if (!swap)
nfs_end_io_direct(inode);
if (requested > 0) { if (requested > 0) {
result = nfs_direct_wait(dreq); result = nfs_direct_wait(dreq);
@ -876,6 +880,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* nfs_file_direct_write - file direct write operation for NFS files * nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block * @iocb: target I/O control block
* @iter: vector of user buffers from which to write data * @iter: vector of user buffers from which to write data
* @swap: flag indicating this is swap IO, not O_DIRECT IO
* *
* We use this function for direct writes instead of calling * We use this function for direct writes instead of calling
* generic_file_aio_write() in order to avoid taking the inode * generic_file_aio_write() in order to avoid taking the inode
@ -892,7 +897,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* Note that O_APPEND is not supported for NFS direct writes, as there * Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol. * is no atomic O_APPEND write facility in the NFS protocol.
*/ */
ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
bool swap)
{ {
ssize_t result, requested; ssize_t result, requested;
size_t count; size_t count;
@ -906,7 +912,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
file, iov_iter_count(iter), (long long) iocb->ki_pos); file, iov_iter_count(iter), (long long) iocb->ki_pos);
result = generic_write_checks(iocb, iter); if (swap)
/* bypass generic checks */
result = iov_iter_count(iter);
else
result = generic_write_checks(iocb, iter);
if (result <= 0) if (result <= 0)
return result; return result;
count = result; count = result;
@ -937,17 +947,21 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
dreq->iocb = iocb; dreq->iocb = iocb;
pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode);
nfs_start_io_direct(inode); if (swap) {
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos);
} else {
nfs_start_io_direct(inode);
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); requested = nfs_direct_write_schedule_iovec(dreq, iter, pos);
if (mapping->nrpages) { if (mapping->nrpages) {
invalidate_inode_pages2_range(mapping, invalidate_inode_pages2_range(mapping,
pos >> PAGE_SHIFT, end); pos >> PAGE_SHIFT, end);
}
nfs_end_io_direct(inode);
} }
nfs_end_io_direct(inode);
if (requested > 0) { if (requested > 0) {
result = nfs_direct_wait(dreq); result = nfs_direct_wait(dreq);
if (result > 0) { if (result > 0) {

View File

@ -157,7 +157,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
ssize_t result; ssize_t result;
if (iocb->ki_flags & IOCB_DIRECT) if (iocb->ki_flags & IOCB_DIRECT)
return nfs_file_direct_read(iocb, to); return nfs_file_direct_read(iocb, to, false);
dprintk("NFS: read(%pD2, %zu@%lu)\n", dprintk("NFS: read(%pD2, %zu@%lu)\n",
iocb->ki_filp, iocb->ki_filp,
@ -623,7 +623,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
return result; return result;
if (iocb->ki_flags & IOCB_DIRECT) if (iocb->ki_flags & IOCB_DIRECT)
return nfs_file_direct_write(iocb, from); return nfs_file_direct_write(iocb, from, false);
dprintk("NFS: write(%pD2, %zu@%Ld)\n", dprintk("NFS: write(%pD2, %zu@%Ld)\n",
file, iov_iter_count(from), (long long) iocb->ki_pos); file, iov_iter_count(from), (long long) iocb->ki_pos);

View File

@ -508,10 +508,10 @@ static inline const struct cred *nfs_file_cred(struct file *file)
* linux/fs/nfs/direct.c * linux/fs/nfs/direct.c
*/ */
extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *);
extern ssize_t nfs_file_direct_read(struct kiocb *iocb, ssize_t nfs_file_direct_read(struct kiocb *iocb,
struct iov_iter *iter); struct iov_iter *iter, bool swap);
extern ssize_t nfs_file_direct_write(struct kiocb *iocb, ssize_t nfs_file_direct_write(struct kiocb *iocb,
struct iov_iter *iter); struct iov_iter *iter, bool swap);
/* /*
* linux/fs/nfs/dir.c * linux/fs/nfs/dir.c