for-6.5/splice-2023-06-23

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmSV8QgQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpupIEADKEZvpxDyaxHjYZFFeoSJRkh+AEJHe0Xtr
 J5vUL8t8zmAV3F7i8XaoAEcR0dC0VQcoTc8fAOty71+5hsc7gvtyyNjqU/YWRVqK
 Xr+VJuSJ+OGx3MzpRWEkepagfPyqP5cyyCOK6gqIgqzc3IwqkR/3QHVRc6oR8YbY
 AQd7tqm2fQXK9WDHEy5hcaQeqb9uKZjQQoZejpPPerpJM+9RMgKxpCGtnLLIUhr/
 sgl7KyLIQPBmveO2vfOR+dmsJBqsLqneqkXDKMAIfpeVEEkHHAlCH4E5Ne1XUS+s
 ie4If+reuyn1Ktt5Ry1t7w2wr8cX1fcay3K28tgwjE2Bvremc5YnYgb3pyUDW38f
 tXXkpg/eTXd/Pn0Crpagoa9zJ927tt5JXIO1/PagPEP1XOqUuthshDFsrVqfqbs+
 36gqX2JWB4NJTg9B9KBHA3+iVCJyZLjUqOqws7hOJOvhQytZVm/IwkGBg1Slhe1a
 J5WemBlqX8lTgXz0nM7cOhPYTZeKe6hazCcb5VwxTUTj9SGyYtsMfqqTwRJO9kiF
 j1VzbOAgExDYe+GvfqOFPh9VqZho66+DyOD/Xtca4eH7oYyHSmP66o8nhRyPBPZA
 maBxQhUkPQn4/V/0fL2TwIdWYKsbj8bUyINKPZ2L35YfeICiaYIctTwNJxtRmItB
 M3VxWD3GZQ==
 =KhW4
 -----END PGP SIGNATURE-----

Merge tag 'for-6.5/splice-2023-06-23' of git://git.kernel.dk/linux

Pull splice updates from Jens Axboe:
 "This kills off ITER_PIPE to avoid a race between truncate,
  iov_iter_revert() on the pipe and an as-yet incomplete DMA to a bio
  with unpinned/unref'ed pages from an O_DIRECT splice read. This causes
  memory corruption.

  Instead, we either use (a) filemap_splice_read(), which invokes the
  buffered file reading code and splices from the pagecache into the
  pipe; (b) copy_splice_read(), which bulk-allocates a buffer, reads
  into it and then pushes the filled pages into the pipe; or (c) handle
  it in filesystem-specific code.

  Summary:

   - Rename direct_splice_read() to copy_splice_read()

   - Simplify the calculations for the number of pages to be reclaimed
     in copy_splice_read()

   - Turn do_splice_to() into a helper, vfs_splice_read(), so that it
     can be used by overlayfs and coda to perform the checks on the
     lower fs

   - Make vfs_splice_read() jump to copy_splice_read() to handle
     direct-I/O and DAX

   - Provide shmem with its own splice_read to handle non-existent pages
     in the pagecache. We don't want a ->read_folio() as we don't want
     to populate holes, but filemap_get_pages() requires it

   - Provide overlayfs with its own splice_read to call down to a lower
     layer as overlayfs doesn't provide ->read_folio()

   - Provide coda with its own splice_read to call down to a lower layer
     as coda doesn't provide ->read_folio()

   - Direct ->splice_read to copy_splice_read() in tty, procfs, kernfs
     and random files as they just copy to the output buffer and don't
     splice pages

   - Provide wrappers for afs, ceph, ecryptfs, ext4, f2fs, nfs, ntfs3,
     ocfs2, orangefs, xfs and zonefs to do locking and/or revalidation

   - Make cifs use filemap_splice_read()

   - Replace pointers to generic_file_splice_read() with pointers to
     filemap_splice_read() as DIO and DAX are handled in the caller;
     filesystems can still provide their own alternate ->splice_read()
     op

   - Remove generic_file_splice_read()

   - Remove ITER_PIPE and its paraphernalia as generic_file_splice_read
     was the only user"

* tag 'for-6.5/splice-2023-06-23' of git://git.kernel.dk/linux: (31 commits)
  splice: kdoc for filemap_splice_read() and copy_splice_read()
  iov_iter: Kill ITER_PIPE
  splice: Remove generic_file_splice_read()
  splice: Use filemap_splice_read() instead of generic_file_splice_read()
  cifs: Use filemap_splice_read()
  trace: Convert trace/seq to use copy_splice_read()
  zonefs: Provide a splice-read wrapper
  xfs: Provide a splice-read wrapper
  orangefs: Provide a splice-read wrapper
  ocfs2: Provide a splice-read wrapper
  ntfs3: Provide a splice-read wrapper
  nfs: Provide a splice-read wrapper
  f2fs: Provide a splice-read wrapper
  ext4: Provide a splice-read wrapper
  ecryptfs: Provide a splice-read wrapper
  ceph: Provide a splice-read wrapper
  afs: Provide a splice-read wrapper
  9p: Add splice_read wrapper
  net: Make sock_splice_read() use copy_splice_read() by default
  tty, proc, kernfs, random: Use copy_splice_read()
  ...
This commit is contained in:
Linus Torvalds 2023-06-26 11:52:12 -07:00
commit 3eccc0c886
68 changed files with 694 additions and 621 deletions

View File

@ -701,7 +701,7 @@ const struct file_operations def_blk_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
};

View File

@ -1546,7 +1546,7 @@ const struct file_operations random_fops = {
.compat_ioctl = compat_ptr_ioctl,
.fasync = random_fasync,
.llseek = noop_llseek,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
};
@ -1557,7 +1557,7 @@ const struct file_operations urandom_fops = {
.compat_ioctl = compat_ptr_ioctl,
.fasync = random_fasync,
.llseek = noop_llseek,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
};

View File

@ -466,7 +466,7 @@ static const struct file_operations tty_fops = {
.llseek = no_llseek,
.read_iter = tty_read,
.write_iter = tty_write,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
.poll = tty_poll,
.unlocked_ioctl = tty_ioctl,
@ -481,7 +481,7 @@ static const struct file_operations console_fops = {
.llseek = no_llseek,
.read_iter = tty_read,
.write_iter = redirected_tty_write,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
.poll = tty_poll,
.unlocked_ioctl = tty_ioctl,

View File

@ -374,6 +374,28 @@ v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
/*
* v9fs_file_splice_read - splice-read from a file
* @in: The 9p file to read from
* @ppos: Where to find/update the file position
* @pipe: The pipe to splice into
* @len: The maximum amount of data to splice
* @flags: SPLICE_F_* flags
*/
static ssize_t v9fs_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct p9_fid *fid = in->private_data;
p9_debug(P9_DEBUG_VFS, "fid %d count %zu offset %lld\n",
fid->fid, len, *ppos);
if (fid->mode & P9L_DIRECT)
return copy_splice_read(in, ppos, pipe, len, flags);
return filemap_splice_read(in, ppos, pipe, len, flags);
}
/**
* v9fs_file_write_iter - write to a file
* @iocb: The operation parameters
@ -569,7 +591,7 @@ const struct file_operations v9fs_file_operations = {
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
.mmap = generic_file_readonly_mmap,
.splice_read = generic_file_splice_read,
.splice_read = v9fs_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync,
};
@ -583,7 +605,7 @@ const struct file_operations v9fs_file_operations_dotl = {
.lock = v9fs_file_lock_dotl,
.flock = v9fs_file_flock_dotl,
.mmap = v9fs_file_mmap,
.splice_read = generic_file_splice_read,
.splice_read = v9fs_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync_dotl,
};

View File

@ -28,7 +28,7 @@ const struct file_operations adfs_file_operations = {
.mmap = generic_file_mmap,
.fsync = generic_file_fsync,
.write_iter = generic_file_write_iter,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};
const struct inode_operations adfs_file_inode_operations = {

View File

@ -1001,7 +1001,7 @@ const struct file_operations affs_file_operations = {
.open = affs_file_open,
.release = affs_file_release,
.fsync = affs_file_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};
const struct inode_operations affs_file_inode_operations = {

View File

@ -25,6 +25,9 @@ static void afs_invalidate_folio(struct folio *folio, size_t offset,
static bool afs_release_folio(struct folio *folio, gfp_t gfp_flags);
static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
static void afs_vm_open(struct vm_area_struct *area);
static void afs_vm_close(struct vm_area_struct *area);
static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff);
@ -36,7 +39,7 @@ const struct file_operations afs_file_operations = {
.read_iter = afs_file_read_iter,
.write_iter = afs_file_write,
.mmap = afs_file_mmap,
.splice_read = generic_file_splice_read,
.splice_read = afs_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = afs_fsync,
.lock = afs_lock,
@ -587,3 +590,18 @@ static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
return generic_file_read_iter(iocb, iter);
}
static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(in));
struct afs_file *af = in->private_data;
int ret;
ret = afs_validate(vnode, af->key);
if (ret < 0)
return ret;
return filemap_splice_read(in, ppos, pipe, len, flags);
}

View File

@ -27,7 +27,7 @@ const struct file_operations bfs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};
static int bfs_move_block(unsigned long from, unsigned long to,

View File

@ -3817,7 +3817,7 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
.read_iter = btrfs_file_read_iter,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.write_iter = btrfs_file_write_iter,
.splice_write = iter_file_splice_write,
.mmap = btrfs_file_mmap,

View File

@ -1745,6 +1745,69 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
/*
* Wrap filemap_splice_read with checks for cap bits on the inode.
* Atomically grab references, so that those bits are not released
* back to the MDS mid-read.
*/
static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct ceph_file_info *fi = in->private_data;
struct inode *inode = file_inode(in);
struct ceph_inode_info *ci = ceph_inode(inode);
ssize_t ret;
int want = 0, got = 0;
CEPH_DEFINE_RW_CONTEXT(rw_ctx, 0);
dout("splice_read %p %llx.%llx %llu~%zu trying to get caps on %p\n",
inode, ceph_vinop(inode), *ppos, len, inode);
if (ceph_inode_is_shutdown(inode))
return -ESTALE;
if (ceph_has_inline_data(ci) ||
(fi->flags & CEPH_F_SYNC))
return copy_splice_read(in, ppos, pipe, len, flags);
ceph_start_io_read(inode);
want = CEPH_CAP_FILE_CACHE;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want |= CEPH_CAP_FILE_LAZYIO;
ret = ceph_get_caps(in, CEPH_CAP_FILE_RD, want, -1, &got);
if (ret < 0)
goto out_end;
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) == 0) {
dout("splice_read/sync %p %llx.%llx %llu~%zu got cap refs on %s\n",
inode, ceph_vinop(inode), *ppos, len,
ceph_cap_string(got));
ceph_put_cap_refs(ci, got);
ceph_end_io_read(inode);
return copy_splice_read(in, ppos, pipe, len, flags);
}
dout("splice_read %p %llx.%llx %llu~%zu got cap refs on %s\n",
inode, ceph_vinop(inode), *ppos, len, ceph_cap_string(got));
rw_ctx.caps = got;
ceph_add_rw_context(fi, &rw_ctx);
ret = filemap_splice_read(in, ppos, pipe, len, flags);
ceph_del_rw_context(fi, &rw_ctx);
dout("splice_read %p %llx.%llx dropping cap refs on %s = %zd\n",
inode, ceph_vinop(inode), ceph_cap_string(got), ret);
ceph_put_cap_refs(ci, got);
out_end:
ceph_end_io_read(inode);
return ret;
}
/*
* Take cap references to avoid releasing caps to MDS mid-write.
*
@ -2593,7 +2656,7 @@ const struct file_operations ceph_file_fops = {
.lock = ceph_lock,
.setlease = simple_nosetlease,
.flock = ceph_flock,
.splice_read = generic_file_splice_read,
.splice_read = ceph_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = ceph_ioctl,
.compat_ioctl = compat_ptr_ioctl,

View File

@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/uio.h>
#include <linux/splice.h>
#include <linux/coda.h>
#include "coda_psdev.h"
@ -94,6 +95,32 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
static ssize_t
coda_file_splice_read(struct file *coda_file, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct inode *coda_inode = file_inode(coda_file);
struct coda_file_info *cfi = coda_ftoc(coda_file);
struct file *in = cfi->cfi_container;
loff_t ki_pos = *ppos;
ssize_t ret;
ret = venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode),
&cfi->cfi_access_intent,
len, ki_pos, CODA_ACCESS_TYPE_READ);
if (ret)
goto finish_read;
ret = vfs_splice_read(in, ppos, pipe, len, flags);
finish_read:
venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode),
&cfi->cfi_access_intent,
len, ki_pos, CODA_ACCESS_TYPE_READ_FINISH);
return ret;
}
static void
coda_vm_open(struct vm_area_struct *vma)
{
@ -302,5 +329,5 @@ const struct file_operations coda_file_operations = {
.open = coda_open,
.release = coda_release,
.fsync = coda_fsync,
.splice_read = generic_file_splice_read,
.splice_read = coda_file_splice_read,
};

View File

@ -473,7 +473,7 @@ static unsigned int cramfs_physmem_mmap_capabilities(struct file *file)
static const struct file_operations cramfs_physmem_fops = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.mmap = cramfs_physmem_mmap,
#ifndef CONFIG_MMU
.get_unmapped_area = cramfs_physmem_get_unmapped_area,

View File

@ -44,6 +44,31 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
return rc;
}
/*
* ecryptfs_splice_read_update_atime
*
* filemap_splice_read updates the atime of upper layer inode. But, it
* doesn't give us a chance to update the atime of the lower layer inode. This
* function is a wrapper to generic_file_read. It updates the atime of the
* lower level inode if generic_file_read returns without any errors. This is
* to be used only for file reads. The function to be used for directory reads
* is ecryptfs_read.
*/
static ssize_t ecryptfs_splice_read_update_atime(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
ssize_t rc;
const struct path *path;
rc = filemap_splice_read(in, ppos, pipe, len, flags);
if (rc >= 0) {
path = ecryptfs_dentry_to_lower_path(in->f_path.dentry);
touch_atime(path);
}
return rc;
}
struct ecryptfs_getdents_callback {
struct dir_context ctx;
struct dir_context *caller;
@ -414,5 +439,5 @@ const struct file_operations ecryptfs_main_fops = {
.release = ecryptfs_release,
.fsync = ecryptfs_fsync,
.fasync = ecryptfs_fasync,
.splice_read = generic_file_splice_read,
.splice_read = ecryptfs_splice_read_update_atime,
};

View File

@ -448,5 +448,5 @@ const struct file_operations erofs_file_fops = {
.llseek = generic_file_llseek,
.read_iter = erofs_file_read_iter,
.mmap = erofs_file_mmap,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};

View File

@ -389,7 +389,7 @@ const struct file_operations exfat_file_operations = {
#endif
.mmap = generic_file_mmap,
.fsync = exfat_file_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
};

View File

@ -192,7 +192,7 @@ const struct file_operations ext2_file_operations = {
.release = ext2_release_file,
.fsync = ext2_fsync,
.get_unmapped_area = thp_get_unmapped_area,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
};

View File

@ -147,6 +147,17 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return generic_file_read_iter(iocb, to);
}
static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct inode *inode = file_inode(in);
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
return filemap_splice_read(in, ppos, pipe, len, flags);
}
/*
* Called when an inode is released. Note that this is different
* from ext4_file_open: open gets called at every open, but release
@ -957,7 +968,7 @@ const struct file_operations ext4_file_operations = {
.release = ext4_release_file,
.fsync = ext4_sync_file,
.get_unmapped_area = thp_get_unmapped_area,
.splice_read = generic_file_splice_read,
.splice_read = ext4_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ext4_fallocate,
};

View File

@ -4367,22 +4367,23 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
static void f2fs_trace_rw_file_path(struct kiocb *iocb, size_t count, int rw)
static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count,
int rw)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct inode *inode = file_inode(file);
char *buf, *path;
buf = f2fs_getname(F2FS_I_SB(inode));
if (!buf)
return;
path = dentry_path_raw(file_dentry(iocb->ki_filp), buf, PATH_MAX);
path = dentry_path_raw(file_dentry(file), buf, PATH_MAX);
if (IS_ERR(path))
goto free_buf;
if (rw == WRITE)
trace_f2fs_datawrite_start(inode, iocb->ki_pos, count,
trace_f2fs_datawrite_start(inode, pos, count,
current->pid, path, current->comm);
else
trace_f2fs_dataread_start(inode, iocb->ki_pos, count,
trace_f2fs_dataread_start(inode, pos, count,
current->pid, path, current->comm);
free_buf:
f2fs_putname(buf);
@ -4398,7 +4399,8 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return -EOPNOTSUPP;
if (trace_f2fs_dataread_start_enabled())
f2fs_trace_rw_file_path(iocb, iov_iter_count(to), READ);
f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
iov_iter_count(to), READ);
if (f2fs_should_use_dio(inode, iocb, to)) {
ret = f2fs_dio_read_iter(iocb, to);
@ -4413,6 +4415,30 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct inode *inode = file_inode(in);
const loff_t pos = *ppos;
ssize_t ret;
if (!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;
if (trace_f2fs_dataread_start_enabled())
f2fs_trace_rw_file_path(in, pos, len, READ);
ret = filemap_splice_read(in, ppos, pipe, len, flags);
if (ret > 0)
f2fs_update_iostat(F2FS_I_SB(inode), inode,
APP_BUFFERED_READ_IO, ret);
if (trace_f2fs_dataread_end_enabled())
trace_f2fs_dataread_end(inode, pos, ret);
return ret;
}
static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
@ -4714,7 +4740,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ret = preallocated;
} else {
if (trace_f2fs_datawrite_start_enabled())
f2fs_trace_rw_file_path(iocb, orig_count, WRITE);
f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
orig_count, WRITE);
/* Do the actual write. */
ret = dio ?
@ -4919,7 +4946,7 @@ const struct file_operations f2fs_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = f2fs_compat_ioctl,
#endif
.splice_read = generic_file_splice_read,
.splice_read = f2fs_file_splice_read,
.splice_write = iter_file_splice_write,
.fadvise = f2fs_file_fadvise,
};

View File

@ -209,7 +209,7 @@ const struct file_operations fat_file_operations = {
.unlocked_ioctl = fat_generic_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.fsync = fat_file_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = fat_fallocate,
};

View File

@ -3252,7 +3252,7 @@ static const struct file_operations fuse_file_operations = {
.lock = fuse_file_lock,
.get_unmapped_area = thp_get_unmapped_area,
.flock = fuse_file_flock,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = fuse_file_ioctl,
.compat_ioctl = fuse_file_compat_ioctl,

View File

@ -1579,7 +1579,7 @@ const struct file_operations gfs2_file_fops = {
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
@ -1610,7 +1610,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.open = gfs2_open,
.release = gfs2_release,
.fsync = gfs2_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,

View File

@ -694,7 +694,7 @@ static const struct file_operations hfs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.fsync = hfs_file_fsync,
.open = hfs_file_open,
.release = hfs_file_release,

View File

@ -372,7 +372,7 @@ static const struct file_operations hfsplus_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.fsync = hfsplus_file_fsync,
.open = hfsplus_file_open,
.release = hfsplus_file_release,

View File

@ -381,7 +381,7 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
static const struct file_operations hostfs_file_fops = {
.llseek = generic_file_llseek,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,

View File

@ -259,7 +259,7 @@ const struct file_operations hpfs_file_ops =
.mmap = generic_file_mmap,
.release = hpfs_file_release,
.fsync = hpfs_file_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.unlocked_ioctl = hpfs_ioctl,
.compat_ioctl = compat_ptr_ioctl,
};

View File

@ -56,7 +56,7 @@ const struct file_operations jffs2_file_operations =
.unlocked_ioctl=jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
};

View File

@ -144,7 +144,7 @@ const struct file_operations jfs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fsync = jfs_fsync,
.release = jfs_release,

View File

@ -1011,7 +1011,7 @@ const struct file_operations kernfs_file_fops = {
.release = kernfs_fop_release,
.poll = kernfs_fop_poll,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
};

View File

@ -19,7 +19,7 @@ const struct file_operations minix_file_operations = {
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};
static int minix_setattr(struct mnt_idmap *idmap,

View File

@ -178,6 +178,27 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
}
EXPORT_SYMBOL_GPL(nfs_file_read);
ssize_t
nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct inode *inode = file_inode(in);
ssize_t result;
dprintk("NFS: splice_read(%pD2, %zu@%llu)\n", in, len, *ppos);
nfs_start_io_read(inode);
result = nfs_revalidate_mapping(inode, in->f_mapping);
if (!result) {
result = filemap_splice_read(in, ppos, pipe, len, flags);
if (result > 0)
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
}
nfs_end_io_read(inode);
return result;
}
EXPORT_SYMBOL_GPL(nfs_file_splice_read);
int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
@ -879,7 +900,7 @@ const struct file_operations nfs_file_operations = {
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = generic_file_splice_read,
.splice_read = nfs_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,

View File

@ -416,6 +416,8 @@ static inline __u32 nfs_access_xattr_mask(const struct nfs_server *server)
int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
loff_t nfs_file_llseek(struct file *, loff_t, int);
ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
ssize_t nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
int nfs_file_mmap(struct file *, struct vm_area_struct *);
ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
int nfs_file_release(struct inode *, struct file *);

View File

@ -454,7 +454,7 @@ const struct file_operations nfs4_file_operations = {
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = generic_file_splice_read,
.splice_read = nfs_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = nfs4_setlease,

View File

@ -140,7 +140,7 @@ const struct file_operations nilfs_file_operations = {
.open = generic_file_open,
/* .release = nilfs_release_file, */
.fsync = nilfs_sync_file,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
};

View File

@ -1992,7 +1992,7 @@ const struct file_operations ntfs_file_ops = {
#endif /* NTFS_RW */
.mmap = generic_file_mmap,
.open = ntfs_file_open,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};
const struct inode_operations ntfs_file_inode_ops = {

View File

@ -744,6 +744,35 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
return generic_file_read_iter(iocb, iter);
}
static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct inode *inode = in->f_mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
if (is_encrypted(ni)) {
ntfs_inode_warn(inode, "encrypted i/o not supported");
return -EOPNOTSUPP;
}
#ifndef CONFIG_NTFS3_LZX_XPRESS
if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) {
ntfs_inode_warn(
inode,
"activate CONFIG_NTFS3_LZX_XPRESS to read external compressed files");
return -EOPNOTSUPP;
}
#endif
if (is_dedup(ni)) {
ntfs_inode_warn(inode, "read deduplicated not supported");
return -EOPNOTSUPP;
}
return filemap_splice_read(in, ppos, pipe, len, flags);
}
/*
* ntfs_get_frame_pages
*
@ -1159,7 +1188,7 @@ const struct file_operations ntfs_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ntfs_compat_ioctl,
#endif
.splice_read = generic_file_splice_read,
.splice_read = ntfs_file_splice_read,
.mmap = ntfs_file_mmap,
.open = ntfs_file_open,
.fsync = generic_file_fsync,

View File

@ -2558,7 +2558,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
*
* Take and drop the meta data lock to update inode fields
* like i_size. This allows the checks down below
* generic_file_read_iter() a chance of actually working.
* copy_splice_read() a chance of actually working.
*/
ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
!nowait);
@ -2587,6 +2587,43 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
return ret;
}
static ssize_t ocfs2_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct inode *inode = file_inode(in);
ssize_t ret = 0;
int lock_level = 0;
trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
in->f_path.dentry->d_name.len,
in->f_path.dentry->d_name.name,
flags);
/*
* We're fine letting folks race truncates and extending writes with
* read across the cluster, just like they can locally. Hence no
* rw_lock during read.
*
* Take and drop the meta data lock to update inode fields like i_size.
* This allows the checks down below filemap_splice_read() a chance of
* actually working.
*/
ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level, 1);
if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret);
goto bail;
}
ocfs2_inode_unlock(inode, lock_level);
ret = filemap_splice_read(in, ppos, pipe, len, flags);
trace_filemap_splice_read_ret(ret);
bail:
return ret;
}
/* Refer generic_file_llseek_unlocked() */
static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
{
@ -2750,7 +2787,7 @@ const struct file_operations ocfs2_fops = {
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
.splice_read = generic_file_splice_read,
.splice_read = ocfs2_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
.remap_file_range = ocfs2_remap_file_range,
@ -2796,7 +2833,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
.compat_ioctl = ocfs2_compat_ioctl,
#endif
.flock = ocfs2_flock,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
.remap_file_range = ocfs2_remap_file_range,

View File

@ -1319,6 +1319,8 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
DEFINE_OCFS2_FILE_OPS(ocfs2_file_read_iter);
DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_truncate_file_error);
@ -1470,6 +1472,7 @@ TRACE_EVENT(ocfs2_prepare_inode_for_write,
);
DEFINE_OCFS2_INT_EVENT(generic_file_read_iter_ret);
DEFINE_OCFS2_INT_EVENT(filemap_splice_read_ret);
/* End of trace events for fs/ocfs2/file.c. */

View File

@ -334,7 +334,7 @@ const struct file_operations omfs_file_operations = {
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};
static int omfs_setattr(struct mnt_idmap *idmap,

View File

@ -337,6 +337,26 @@ static ssize_t orangefs_file_read_iter(struct kiocb *iocb,
return ret;
}
static ssize_t orangefs_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct inode *inode = file_inode(in);
ssize_t ret;
orangefs_stats.reads++;
down_read(&inode->i_rwsem);
ret = orangefs_revalidate_mapping(inode);
if (ret)
goto out;
ret = filemap_splice_read(in, ppos, pipe, len, flags);
out:
up_read(&inode->i_rwsem);
return ret;
}
static ssize_t orangefs_file_write_iter(struct kiocb *iocb,
struct iov_iter *iter)
{
@ -556,7 +576,7 @@ const struct file_operations orangefs_file_operations = {
.lock = orangefs_lock,
.mmap = orangefs_file_mmap,
.open = generic_file_open,
.splice_read = generic_file_splice_read,
.splice_read = orangefs_file_splice_read,
.splice_write = iter_file_splice_write,
.flush = orangefs_flush,
.release = orangefs_file_release,

View File

@ -419,6 +419,27 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
return ret;
}
static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
const struct cred *old_cred;
struct fd real;
ssize_t ret;
ret = ovl_real_fdget(in, &real);
if (ret)
return ret;
old_cred = ovl_override_creds(file_inode(in)->i_sb);
ret = vfs_splice_read(real.file, ppos, pipe, len, flags);
revert_creds(old_cred);
ovl_file_accessed(in);
fdput(real);
return ret;
}
/*
* Calling iter_file_splice_write() directly from overlay's f_op may deadlock
* due to lock order inversion between pipe->mutex in iter_file_splice_write()
@ -695,7 +716,7 @@ const struct file_operations ovl_file_operations = {
.fallocate = ovl_fallocate,
.fadvise = ovl_fadvise,
.flush = ovl_flush,
.splice_read = generic_file_splice_read,
.splice_read = ovl_splice_read,
.splice_write = ovl_splice_write,
.copy_file_range = ovl_copy_file_range,

View File

@ -591,7 +591,7 @@ static const struct file_operations proc_iter_file_ops = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
.write = proc_reg_write,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.mmap = proc_reg_mmap,
@ -617,7 +617,7 @@ static const struct file_operations proc_reg_file_ops_compat = {
static const struct file_operations proc_iter_file_ops_compat = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,

View File

@ -868,7 +868,7 @@ static const struct file_operations proc_sys_file_operations = {
.poll = proc_sys_poll,
.read_iter = proc_sys_read,
.write_iter = proc_sys_write,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
.llseek = default_llseek,
};

View File

@ -324,7 +324,7 @@ static int mountstats_open(struct inode *inode, struct file *file)
const struct file_operations proc_mounts_operations = {
.open = mounts_open,
.read_iter = seq_read_iter,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.llseek = seq_lseek,
.release = mounts_release,
.poll = mounts_poll,
@ -333,7 +333,7 @@ const struct file_operations proc_mounts_operations = {
const struct file_operations proc_mountinfo_operations = {
.open = mountinfo_open,
.read_iter = seq_read_iter,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.llseek = seq_lseek,
.release = mounts_release,
.poll = mounts_poll,
@ -342,7 +342,7 @@ const struct file_operations proc_mountinfo_operations = {
const struct file_operations proc_mountstats_operations = {
.open = mountstats_open,
.read_iter = seq_read_iter,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.llseek = seq_lseek,
.release = mounts_release,
};

View File

@ -43,7 +43,7 @@ const struct file_operations ramfs_file_operations = {
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
.get_unmapped_area = ramfs_mmu_get_unmapped_area,

View File

@ -43,7 +43,7 @@ const struct file_operations ramfs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
};

View File

@ -29,7 +29,7 @@ const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
.mmap = generic_file_readonly_mmap,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};
EXPORT_SYMBOL(generic_ro_fops);

View File

@ -247,7 +247,7 @@ const struct file_operations reiserfs_file_operations = {
.fsync = reiserfs_sync_file,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
};

View File

@ -78,7 +78,7 @@ static unsigned romfs_mmap_capabilities(struct file *file)
const struct file_operations romfs_ro_fops = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.mmap = romfs_mmap,
.get_unmapped_area = romfs_get_unmapped_area,
.mmap_capabilities = romfs_mmap_capabilities,

View File

@ -1376,7 +1376,7 @@ const struct file_operations cifs_file_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
.splice_read = cifs_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
@ -1396,7 +1396,7 @@ const struct file_operations cifs_file_strict_ops = {
.fsync = cifs_strict_fsync,
.flush = cifs_flush,
.mmap = cifs_file_strict_mmap,
.splice_read = cifs_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
@ -1416,7 +1416,7 @@ const struct file_operations cifs_file_direct_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
.splice_read = direct_splice_read,
.splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
@ -1434,7 +1434,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
.splice_read = cifs_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
@ -1452,7 +1452,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.fsync = cifs_strict_fsync,
.flush = cifs_flush,
.mmap = cifs_file_strict_mmap,
.splice_read = cifs_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
@ -1470,7 +1470,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
.splice_read = direct_splice_read,
.splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,

View File

@ -100,9 +100,6 @@ extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from);
extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
extern ssize_t cifs_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
extern int cifs_flock(struct file *pfile, int cmd, struct file_lock *plock);
extern int cifs_lock(struct file *, int, struct file_lock *);
extern int cifs_fsync(struct file *, loff_t, loff_t, int);

View File

@ -5083,19 +5083,3 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.launder_folio = cifs_launder_folio,
.migrate_folio = filemap_migrate_folio,
};
/*
* Splice data from a file into a pipe.
*/
ssize_t cifs_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes))
return 0;
if (unlikely(!len))
return 0;
if (in->f_flags & O_DIRECT)
return direct_splice_read(in, ppos, pipe, len, flags);
return filemap_splice_read(in, ppos, pipe, len, flags);
}

View File

@ -299,20 +299,36 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)
kfree(spd->partial);
}
/*
* Splice data from an O_DIRECT file into pages and then add them to the output
* pipe.
/**
* copy_splice_read - Copy data from a file and splice the copy into a pipe
* @in: The file to read from
* @ppos: Pointer to the file position to read from
* @pipe: The pipe to splice into
* @len: The amount to splice
* @flags: The SPLICE_F_* flags
*
* This function allocates a bunch of pages sufficient to hold the requested
* amount of data (but limited by the remaining pipe capacity), passes it to
* the file's ->read_iter() to read into and then splices the used pages into
* the pipe.
*
* Return: On success, the number of bytes read will be returned and *@ppos
* will be updated if appropriate; 0 will be returned if there is no more data
* to be read; -EAGAIN will be returned if the pipe had no space, and some
* other negative error code will be returned on error. A short read may occur
* if the pipe has insufficient space, we reach the end of the data or we hit a
* hole.
*/
ssize_t direct_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
ssize_t copy_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct iov_iter to;
struct bio_vec *bv;
struct kiocb kiocb;
struct page **pages;
ssize_t ret;
size_t used, npages, chunk, remain, reclaim;
size_t used, npages, chunk, remain, keep = 0;
int i;
/* Work out how much data we can actually add into the pipe */
@ -326,7 +342,7 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos,
if (!bv)
return -ENOMEM;
pages = (void *)(bv + npages);
pages = (struct page **)(bv + npages);
npages = alloc_pages_bulk_array(GFP_USER, npages, pages);
if (!npages) {
kfree(bv);
@ -349,11 +365,8 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos,
kiocb.ki_pos = *ppos;
ret = call_read_iter(in, &kiocb, &to);
reclaim = npages * PAGE_SIZE;
remain = 0;
if (ret > 0) {
reclaim -= ret;
remain = ret;
keep = DIV_ROUND_UP(ret, PAGE_SIZE);
*ppos = kiocb.ki_pos;
file_accessed(in);
} else if (ret < 0) {
@ -366,14 +379,12 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos,
}
/* Free any pages that didn't get touched at all. */
reclaim /= PAGE_SIZE;
if (reclaim) {
npages -= reclaim;
release_pages(pages + npages, reclaim);
}
if (keep < npages)
release_pages(pages + keep, npages - keep);
/* Push the remaining pages into the pipe. */
for (i = 0; i < npages; i++) {
remain = ret;
for (i = 0; i < keep; i++) {
struct pipe_buffer *buf = pipe_head_buf(pipe);
chunk = min_t(size_t, remain, PAGE_SIZE);
@ -390,50 +401,7 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos,
kfree(bv);
return ret;
}
EXPORT_SYMBOL(direct_splice_read);
/**
* generic_file_splice_read - splice data from file to a pipe
* @in: file to splice from
* @ppos: position in @in
* @pipe: pipe to splice to
* @len: number of bytes to splice
* @flags: splice modifier flags
*
* Description:
* Will read pages from given file and fill them into a pipe. Can be
* used as long as it has more or less sane ->read_iter().
*
*/
ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
struct iov_iter to;
struct kiocb kiocb;
int ret;
iov_iter_pipe(&to, ITER_DEST, pipe, len);
init_sync_kiocb(&kiocb, in);
kiocb.ki_pos = *ppos;
ret = call_read_iter(in, &kiocb, &to);
if (ret > 0) {
*ppos = kiocb.ki_pos;
file_accessed(in);
} else if (ret < 0) {
/* free what was emitted */
pipe_discard_from(pipe, to.start_head);
/*
* callers of ->splice_read() expect -EAGAIN on
* "can't put anything in there", rather than -EFAULT.
*/
if (ret == -EFAULT)
ret = -EAGAIN;
}
return ret;
}
EXPORT_SYMBOL(generic_file_splice_read);
EXPORT_SYMBOL(copy_splice_read);
const struct pipe_buf_operations default_pipe_buf_ops = {
.release = generic_pipe_buf_release,
@ -873,18 +841,32 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
return out->f_op->splice_write(pipe, out, ppos, len, flags);
}
/*
* Attempt to initiate a splice from a file to a pipe.
/**
* vfs_splice_read - Read data from a file and splice it into a pipe
* @in: File to splice from
* @ppos: Input file offset
* @pipe: Pipe to splice to
* @len: Number of bytes to splice
* @flags: Splice modifier flags (SPLICE_F_*)
*
* Splice the requested amount of data from the input file to the pipe. This
* is synchronous as the caller must hold the pipe lock across the entire
* operation.
*
* If successful, it returns the amount of data spliced, 0 if it hit the EOF or
* a hole and a negative error code otherwise.
*/
static long do_splice_to(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
long vfs_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
unsigned int p_space;
int ret;
if (unlikely(!(in->f_mode & FMODE_READ)))
return -EBADF;
if (!len)
return 0;
/* Don't try to read more the pipe has space for. */
p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail);
@ -899,8 +881,15 @@ static long do_splice_to(struct file *in, loff_t *ppos,
if (unlikely(!in->f_op->splice_read))
return warn_unsupported(in, "read");
/*
* O_DIRECT and DAX don't deal with the pagecache, so we allocate a
* buffer, copy into it and splice that into the pipe.
*/
if ((in->f_flags & O_DIRECT) || IS_DAX(in->f_mapping->host))
return copy_splice_read(in, ppos, pipe, len, flags);
return in->f_op->splice_read(in, ppos, pipe, len, flags);
}
EXPORT_SYMBOL_GPL(vfs_splice_read);
/**
* splice_direct_to_actor - splices data directly between two non-pipes
@ -970,7 +959,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
size_t read_len;
loff_t pos = sd->pos, prev_pos = pos;
ret = do_splice_to(in, &pos, pipe, len, flags);
ret = vfs_splice_read(in, &pos, pipe, len, flags);
if (unlikely(ret <= 0))
goto out_release;
@ -1118,7 +1107,7 @@ long splice_file_to_pipe(struct file *in,
pipe_lock(opipe);
ret = wait_for_space(opipe, flags);
if (!ret)
ret = do_splice_to(in, offset, opipe, len, flags);
ret = vfs_splice_read(in, offset, opipe, len, flags);
pipe_unlock(opipe);
if (ret > 0)
wakeup_pipe_readers(opipe);

View File

@ -26,7 +26,7 @@ const struct file_operations sysv_file_operations = {
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};
static int sysv_setattr(struct mnt_idmap *idmap,

View File

@ -1669,7 +1669,7 @@ const struct file_operations ubifs_file_operations = {
.mmap = ubifs_file_mmap,
.fsync = ubifs_fsync,
.unlocked_ioctl = ubifs_ioctl,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.open = fscrypt_file_open,
#ifdef CONFIG_COMPAT

View File

@ -209,7 +209,7 @@ const struct file_operations udf_file_operations = {
.write_iter = udf_file_write_iter,
.release = udf_release_file,
.fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
};

View File

@ -41,5 +41,5 @@ const struct file_operations ufs_file_operations = {
.mmap = generic_file_mmap,
.open = generic_file_open,
.fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};

View File

@ -217,7 +217,7 @@ const struct file_operations vboxsf_reg_fops = {
.open = vboxsf_file_open,
.release = vboxsf_file_release,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_read = filemap_splice_read,
};
const struct inode_operations vboxsf_reg_iops = {

View File

@ -306,6 +306,34 @@ xfs_file_read_iter(
return ret;
}
STATIC ssize_t
xfs_file_splice_read(
struct file *in,
loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len,
unsigned int flags)
{
struct inode *inode = file_inode(in);
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
ssize_t ret = 0;
XFS_STATS_INC(mp, xs_read_calls);
if (xfs_is_shutdown(mp))
return -EIO;
trace_xfs_file_splice_read(ip, *ppos, len);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
ret = filemap_splice_read(in, ppos, pipe, len, flags);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
if (ret > 0)
XFS_STATS_ADD(mp, xs_read_bytes, ret);
return ret;
}
/*
* Common pre-write limit and setup checks.
*
@ -1423,7 +1451,7 @@ const struct file_operations xfs_file_operations = {
.llseek = xfs_file_llseek,
.read_iter = xfs_file_read_iter,
.write_iter = xfs_file_write_iter,
.splice_read = generic_file_splice_read,
.splice_read = xfs_file_splice_read,
.splice_write = iter_file_splice_write,
.iopoll = iocb_bio_iopoll,
.unlocked_ioctl = xfs_file_ioctl,

View File

@ -1445,7 +1445,6 @@ DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_dax_write);
DEFINE_RW_EVENT(xfs_reflink_bounce_dio_write);
DECLARE_EVENT_CLASS(xfs_imap_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
int whichfork, struct xfs_bmbt_irec *irec),
@ -1535,6 +1534,7 @@ DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write);
DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten);
DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append);
DEFINE_SIMPLE_IO_EVENT(xfs_file_splice_read);
DECLARE_EVENT_CLASS(xfs_itrunc_class,
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),

View File

@ -753,6 +753,44 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
static ssize_t zonefs_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct inode *inode = file_inode(in);
struct zonefs_inode_info *zi = ZONEFS_I(inode);
struct zonefs_zone *z = zonefs_inode_zone(inode);
loff_t isize;
ssize_t ret = 0;
/* Offline zones cannot be read */
if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
return -EPERM;
if (*ppos >= z->z_capacity)
return 0;
inode_lock_shared(inode);
/* Limit read operations to written data */
mutex_lock(&zi->i_truncate_mutex);
isize = i_size_read(inode);
if (*ppos >= isize)
len = 0;
else
len = min_t(loff_t, len, isize - *ppos);
mutex_unlock(&zi->i_truncate_mutex);
if (len > 0) {
ret = filemap_splice_read(in, ppos, pipe, len, flags);
if (ret == -EIO)
zonefs_io_error(inode, false);
}
inode_unlock_shared(inode);
return ret;
}
/*
* Write open accounting is done only for sequential files.
*/
@ -898,7 +936,7 @@ const struct file_operations zonefs_file_operations = {
.llseek = zonefs_file_llseek,
.read_iter = zonefs_file_read_iter,
.write_iter = zonefs_file_write_iter,
.splice_read = generic_file_splice_read,
.splice_read = zonefs_file_splice_read,
.splice_write = iter_file_splice_write,
.iopoll = iocb_bio_iopoll,
};

View File

@ -2784,11 +2784,9 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
ssize_t direct_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
ssize_t copy_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,

View File

@ -76,6 +76,9 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *,
struct splice_pipe_desc *);
extern ssize_t add_to_pipe(struct pipe_inode_info *,
struct pipe_buffer *);
long vfs_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *);
extern long do_splice(struct file *in, loff_t *off_in,

View File

@ -11,7 +11,6 @@
#include <uapi/linux/uio.h>
struct page;
struct pipe_inode_info;
typedef unsigned int __bitwise iov_iter_extraction_t;
@ -25,7 +24,6 @@ enum iter_type {
ITER_IOVEC,
ITER_KVEC,
ITER_BVEC,
ITER_PIPE,
ITER_XARRAY,
ITER_DISCARD,
ITER_UBUF,
@ -74,7 +72,6 @@ struct iov_iter {
const struct kvec *kvec;
const struct bio_vec *bvec;
struct xarray *xarray;
struct pipe_inode_info *pipe;
void __user *ubuf;
};
size_t count;
@ -82,10 +79,6 @@ struct iov_iter {
};
union {
unsigned long nr_segs;
struct {
unsigned int head;
unsigned int start_head;
};
loff_t xarray_start;
};
};
@ -133,11 +126,6 @@ static inline bool iov_iter_is_bvec(const struct iov_iter *i)
return iov_iter_type(i) == ITER_BVEC;
}
static inline bool iov_iter_is_pipe(const struct iov_iter *i)
{
return iov_iter_type(i) == ITER_PIPE;
}
static inline bool iov_iter_is_discard(const struct iov_iter *i)
{
return iov_iter_type(i) == ITER_DISCARD;
@ -286,8 +274,6 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec
unsigned long nr_segs, size_t count);
void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
unsigned long nr_segs, size_t count);
void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
size_t count);
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
loff_t start, size_t count);

View File

@ -5199,7 +5199,7 @@ static const struct file_operations tracing_fops = {
.open = tracing_open,
.read = seq_read,
.read_iter = seq_read_iter,
.splice_read = generic_file_splice_read,
.splice_read = copy_splice_read,
.write = tracing_write_stub,
.llseek = tracing_lseek,
.release = tracing_release,

View File

@ -14,8 +14,6 @@
#include <linux/scatterlist.h>
#include <linux/instrumented.h>
#define PIPE_PARANOIA /* for now */
/* covers ubuf and kbuf alike */
#define iterate_buf(i, n, base, len, off, __p, STEP) { \
size_t __maybe_unused off = 0; \
@ -198,150 +196,6 @@ static int copyin(void *to, const void __user *from, size_t n)
return res;
}
#ifdef PIPE_PARANOIA
static bool sanity(const struct iov_iter *i)
{
struct pipe_inode_info *pipe = i->pipe;
unsigned int p_head = pipe->head;
unsigned int p_tail = pipe->tail;
unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
unsigned int i_head = i->head;
unsigned int idx;
if (i->last_offset) {
struct pipe_buffer *p;
if (unlikely(p_occupancy == 0))
goto Bad; // pipe must be non-empty
if (unlikely(i_head != p_head - 1))
goto Bad; // must be at the last buffer...
p = pipe_buf(pipe, i_head);
if (unlikely(p->offset + p->len != abs(i->last_offset)))
goto Bad; // ... at the end of segment
} else {
if (i_head != p_head)
goto Bad; // must be right after the last buffer
}
return true;
Bad:
printk(KERN_ERR "idx = %d, offset = %d\n", i_head, i->last_offset);
printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
p_head, p_tail, pipe->ring_size);
for (idx = 0; idx < pipe->ring_size; idx++)
printk(KERN_ERR "[%p %p %d %d]\n",
pipe->bufs[idx].ops,
pipe->bufs[idx].page,
pipe->bufs[idx].offset,
pipe->bufs[idx].len);
WARN_ON(1);
return false;
}
#else
#define sanity(i) true
#endif
static struct page *push_anon(struct pipe_inode_info *pipe, unsigned size)
{
struct page *page = alloc_page(GFP_USER);
if (page) {
struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++);
*buf = (struct pipe_buffer) {
.ops = &default_pipe_buf_ops,
.page = page,
.offset = 0,
.len = size
};
}
return page;
}
static void push_page(struct pipe_inode_info *pipe, struct page *page,
unsigned int offset, unsigned int size)
{
struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++);
*buf = (struct pipe_buffer) {
.ops = &page_cache_pipe_buf_ops,
.page = page,
.offset = offset,
.len = size
};
get_page(page);
}
static inline int last_offset(const struct pipe_buffer *buf)
{
if (buf->ops == &default_pipe_buf_ops)
return buf->len; // buf->offset is 0 for those
else
return -(buf->offset + buf->len);
}
static struct page *append_pipe(struct iov_iter *i, size_t size,
unsigned int *off)
{
struct pipe_inode_info *pipe = i->pipe;
int offset = i->last_offset;
struct pipe_buffer *buf;
struct page *page;
if (offset > 0 && offset < PAGE_SIZE) {
// some space in the last buffer; add to it
buf = pipe_buf(pipe, pipe->head - 1);
size = min_t(size_t, size, PAGE_SIZE - offset);
buf->len += size;
i->last_offset += size;
i->count -= size;
*off = offset;
return buf->page;
}
// OK, we need a new buffer
*off = 0;
size = min_t(size_t, size, PAGE_SIZE);
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
return NULL;
page = push_anon(pipe, size);
if (!page)
return NULL;
i->head = pipe->head - 1;
i->last_offset = size;
i->count -= size;
return page;
}
static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i)
{
struct pipe_inode_info *pipe = i->pipe;
unsigned int head = pipe->head;
if (unlikely(bytes > i->count))
bytes = i->count;
if (unlikely(!bytes))
return 0;
if (!sanity(i))
return 0;
if (offset && i->last_offset == -offset) { // could we merge it?
struct pipe_buffer *buf = pipe_buf(pipe, head - 1);
if (buf->page == page) {
buf->len += bytes;
i->last_offset -= bytes;
i->count -= bytes;
return bytes;
}
}
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
return 0;
push_page(pipe, page, offset, bytes);
i->last_offset = -(offset + bytes);
i->head = head;
i->count -= bytes;
return bytes;
}
/*
* fault_in_iov_iter_readable - fault in iov iterator for reading
* @i: iterator
@ -446,46 +300,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
}
EXPORT_SYMBOL(iov_iter_init);
// returns the offset in partial buffer (if any)
static inline unsigned int pipe_npages(const struct iov_iter *i, int *npages)
{
struct pipe_inode_info *pipe = i->pipe;
int used = pipe->head - pipe->tail;
int off = i->last_offset;
*npages = max((int)pipe->max_usage - used, 0);
if (off > 0 && off < PAGE_SIZE) { // anon and not full
(*npages)++;
return off;
}
return 0;
}
static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
struct iov_iter *i)
{
unsigned int off, chunk;
if (unlikely(bytes > i->count))
bytes = i->count;
if (unlikely(!bytes))
return 0;
if (!sanity(i))
return 0;
for (size_t n = bytes; n; n -= chunk) {
struct page *page = append_pipe(i, n, &off);
chunk = min_t(size_t, n, PAGE_SIZE - off);
if (!page)
return bytes - n;
memcpy_to_page(page, off, addr, chunk);
addr += chunk;
}
return bytes;
}
static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
__wsum sum, size_t off)
{
@ -493,44 +307,10 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
return csum_block_add(sum, next, off);
}
static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
struct iov_iter *i, __wsum *sump)
{
__wsum sum = *sump;
size_t off = 0;
unsigned int chunk, r;
if (unlikely(bytes > i->count))
bytes = i->count;
if (unlikely(!bytes))
return 0;
if (!sanity(i))
return 0;
while (bytes) {
struct page *page = append_pipe(i, bytes, &r);
char *p;
if (!page)
break;
chunk = min_t(size_t, bytes, PAGE_SIZE - r);
p = kmap_local_page(page);
sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off);
kunmap_local(p);
off += chunk;
bytes -= chunk;
}
*sump = sum;
return off;
}
size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
if (WARN_ON_ONCE(i->data_source))
return 0;
if (unlikely(iov_iter_is_pipe(i)))
return copy_pipe_to_iter(addr, bytes, i);
if (user_backed_iter(i))
might_fault();
iterate_and_advance(i, bytes, base, len, off,
@ -552,42 +332,6 @@ static int copyout_mc(void __user *to, const void *from, size_t n)
return n;
}
static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
struct iov_iter *i)
{
size_t xfer = 0;
unsigned int off, chunk;
if (unlikely(bytes > i->count))
bytes = i->count;
if (unlikely(!bytes))
return 0;
if (!sanity(i))
return 0;
while (bytes) {
struct page *page = append_pipe(i, bytes, &off);
unsigned long rem;
char *p;
if (!page)
break;
chunk = min_t(size_t, bytes, PAGE_SIZE - off);
p = kmap_local_page(page);
rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
chunk -= rem;
kunmap_local(p);
xfer += chunk;
bytes -= chunk;
if (rem) {
iov_iter_revert(i, rem);
break;
}
}
return xfer;
}
/**
* _copy_mc_to_iter - copy to iter with source memory error exception handling
* @addr: source kernel address
@ -607,9 +351,8 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
* alignment and poison alignment assumptions to avoid re-triggering
* hardware exceptions.
*
* * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
* Compare to copy_to_iter() where only ITER_IOVEC attempts might return
* a short copy.
* * ITER_KVEC and ITER_BVEC can return short copies. Compare to
* copy_to_iter() where only ITER_IOVEC attempts might return a short copy.
*
* Return: number of bytes copied (may be %0)
*/
@ -617,8 +360,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
if (WARN_ON_ONCE(i->data_source))
return 0;
if (unlikely(iov_iter_is_pipe(i)))
return copy_mc_pipe_to_iter(addr, bytes, i);
if (user_backed_iter(i))
might_fault();
__iterate_and_advance(i, bytes, base, len, off,
@ -732,8 +473,6 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
return 0;
if (WARN_ON_ONCE(i->data_source))
return 0;
if (unlikely(iov_iter_is_pipe(i)))
return copy_page_to_iter_pipe(page, offset, bytes, i);
page += offset / PAGE_SIZE; // first subpage
offset %= PAGE_SIZE;
while (1) {
@ -764,8 +503,6 @@ size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t byte
return 0;
if (WARN_ON_ONCE(i->data_source))
return 0;
if (unlikely(iov_iter_is_pipe(i)))
return copy_page_to_iter_pipe(page, offset, bytes, i);
page += offset / PAGE_SIZE; // first subpage
offset %= PAGE_SIZE;
while (1) {
@ -818,36 +555,8 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
}
EXPORT_SYMBOL(copy_page_from_iter);
static size_t pipe_zero(size_t bytes, struct iov_iter *i)
{
unsigned int chunk, off;
if (unlikely(bytes > i->count))
bytes = i->count;
if (unlikely(!bytes))
return 0;
if (!sanity(i))
return 0;
for (size_t n = bytes; n; n -= chunk) {
struct page *page = append_pipe(i, n, &off);
char *p;
if (!page)
return bytes - n;
chunk = min_t(size_t, n, PAGE_SIZE - off);
p = kmap_local_page(page);
memset(p + off, 0, chunk);
kunmap_local(p);
}
return bytes;
}
size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
{
if (unlikely(iov_iter_is_pipe(i)))
return pipe_zero(bytes, i);
iterate_and_advance(i, bytes, base, len, count,
clear_user(base, len),
memset(base, 0, len)
@ -878,32 +587,6 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
}
EXPORT_SYMBOL(copy_page_from_iter_atomic);
static void pipe_advance(struct iov_iter *i, size_t size)
{
struct pipe_inode_info *pipe = i->pipe;
int off = i->last_offset;
if (!off && !size) {
pipe_discard_from(pipe, i->start_head); // discard everything
return;
}
i->count -= size;
while (1) {
struct pipe_buffer *buf = pipe_buf(pipe, i->head);
if (off) /* make it relative to the beginning of buffer */
size += abs(off) - buf->offset;
if (size <= buf->len) {
buf->len = size;
i->last_offset = last_offset(buf);
break;
}
size -= buf->len;
i->head++;
off = 0;
}
pipe_discard_from(pipe, i->head + 1); // discard everything past this one
}
static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
{
const struct bio_vec *bvec, *end;
@ -955,8 +638,6 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
iov_iter_iovec_advance(i, size);
} else if (iov_iter_is_bvec(i)) {
iov_iter_bvec_advance(i, size);
} else if (iov_iter_is_pipe(i)) {
pipe_advance(i, size);
} else if (iov_iter_is_discard(i)) {
i->count -= size;
}
@ -970,26 +651,6 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
if (WARN_ON(unroll > MAX_RW_COUNT))
return;
i->count += unroll;
if (unlikely(iov_iter_is_pipe(i))) {
struct pipe_inode_info *pipe = i->pipe;
unsigned int head = pipe->head;
while (head > i->start_head) {
struct pipe_buffer *b = pipe_buf(pipe, --head);
if (unroll < b->len) {
b->len -= unroll;
i->last_offset = last_offset(b);
i->head = head;
return;
}
unroll -= b->len;
pipe_buf_release(pipe, b);
pipe->head--;
}
i->last_offset = 0;
i->head = head;
return;
}
if (unlikely(iov_iter_is_discard(i)))
return;
if (unroll <= i->iov_offset) {
@ -1079,24 +740,6 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
}
EXPORT_SYMBOL(iov_iter_bvec);
void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
struct pipe_inode_info *pipe,
size_t count)
{
BUG_ON(direction != READ);
WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
*i = (struct iov_iter){
.iter_type = ITER_PIPE,
.data_source = false,
.pipe = pipe,
.head = pipe->head,
.start_head = pipe->head,
.last_offset = 0,
.count = count
};
}
EXPORT_SYMBOL(iov_iter_pipe);
/**
* iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
* @i: The iterator to initialise.
@ -1224,19 +867,6 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
if (iov_iter_is_bvec(i))
return iov_iter_aligned_bvec(i, addr_mask, len_mask);
if (iov_iter_is_pipe(i)) {
size_t size = i->count;
if (size & len_mask)
return false;
if (size && i->last_offset > 0) {
if (i->last_offset & addr_mask)
return false;
}
return true;
}
if (iov_iter_is_xarray(i)) {
if (i->count & len_mask)
return false;
@ -1307,14 +937,6 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
if (iov_iter_is_bvec(i))
return iov_iter_alignment_bvec(i);
if (iov_iter_is_pipe(i)) {
size_t size = i->count;
if (size && i->last_offset > 0)
return size | i->last_offset;
return size;
}
if (iov_iter_is_xarray(i))
return (i->xarray_start + i->iov_offset) | i->count;
@ -1367,36 +989,6 @@ static int want_pages_array(struct page ***res, size_t size,
return count;
}
static ssize_t pipe_get_pages(struct iov_iter *i,
struct page ***pages, size_t maxsize, unsigned maxpages,
size_t *start)
{
unsigned int npages, count, off, chunk;
struct page **p;
size_t left;
if (!sanity(i))
return -EFAULT;
*start = off = pipe_npages(i, &npages);
if (!npages)
return -EFAULT;
count = want_pages_array(pages, maxsize, off, min(npages, maxpages));
if (!count)
return -ENOMEM;
p = *pages;
for (npages = 0, left = maxsize ; npages < count; npages++, left -= chunk) {
struct page *page = append_pipe(i, left, &off);
if (!page)
break;
chunk = min_t(size_t, left, PAGE_SIZE - off);
get_page(*p++ = page);
}
if (!npages)
return -EFAULT;
return maxsize - left;
}
static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
pgoff_t index, unsigned int nr_pages)
{
@ -1547,8 +1139,6 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
}
return maxsize;
}
if (iov_iter_is_pipe(i))
return pipe_get_pages(i, pages, maxsize, maxpages, start);
if (iov_iter_is_xarray(i))
return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
return -EFAULT;
@ -1638,9 +1228,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
}
sum = csum_shift(csstate->csum, csstate->off);
if (unlikely(iov_iter_is_pipe(i)))
bytes = csum_and_copy_to_pipe_iter(addr, bytes, i, &sum);
else iterate_and_advance(i, bytes, base, len, off, ({
iterate_and_advance(i, bytes, base, len, off, ({
next = csum_and_copy_to_user(addr + off, base, len);
sum = csum_block_add(sum, next, off);
next ? 0 : len;
@ -1725,15 +1313,6 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
return iov_npages(i, maxpages);
if (iov_iter_is_bvec(i))
return bvec_npages(i, maxpages);
if (iov_iter_is_pipe(i)) {
int npages;
if (!sanity(i))
return 0;
pipe_npages(i, &npages);
return min(npages, maxpages);
}
if (iov_iter_is_xarray(i)) {
unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
@ -1746,10 +1325,6 @@ EXPORT_SYMBOL(iov_iter_npages);
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
{
*new = *old;
if (unlikely(iov_iter_is_pipe(new))) {
WARN_ON(1);
return NULL;
}
if (iov_iter_is_bvec(new))
return new->bvec = kmemdup(new->bvec,
new->nr_segs * sizeof(struct bio_vec),

View File

@ -2693,8 +2693,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
if (unlikely(iocb->ki_pos >= i_size_read(inode)))
break;
error = filemap_get_pages(iocb, iter->count, &fbatch,
iov_iter_is_pipe(iter));
error = filemap_get_pages(iocb, iter->count, &fbatch, false);
if (error < 0)
break;
@ -2878,9 +2877,24 @@ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
return spliced;
}
/*
* Splice folios from the pagecache of a buffered (ie. non-O_DIRECT) file into
* a pipe.
/**
* filemap_splice_read - Splice data from a file's pagecache into a pipe
* @in: The file to read from
* @ppos: Pointer to the file position to read from
* @pipe: The pipe to splice into
* @len: The amount to splice
* @flags: The SPLICE_F_* flags
*
* This function gets folios from a file's pagecache and splices them into the
* pipe. Readahead will be called as necessary to fill more folios. This may
* be used for blockdevs also.
*
* Return: On success, the number of bytes read will be returned and *@ppos
* will be updated if appropriate; 0 will be returned if there is no more data
* to be read; -EAGAIN will be returned if the pipe had no space, and some
* other negative error code will be returned on error. A short read may occur
* if the pipe has insufficient space, we reach the end of the data or we hit a
* hole.
*/
ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
@ -2893,6 +2907,9 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
bool writably_mapped;
int i, error = 0;
if (unlikely(*ppos >= in->f_mapping->host->i_sb->s_maxbytes))
return 0;
init_sync_kiocb(&iocb, in);
iocb.ki_pos = *ppos;
@ -2906,7 +2923,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
do {
cond_resched();
if (*ppos >= i_size_read(file_inode(in)))
if (*ppos >= i_size_read(in->f_mapping->host))
break;
iocb.ki_pos = *ppos;
@ -2922,7 +2939,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
* part of the page is not copied back to userspace (unless
* another truncate extends the file - this is desired though).
*/
isize = i_size_read(file_inode(in));
isize = i_size_read(in->f_mapping->host);
if (unlikely(*ppos >= isize))
break;
end_offset = min_t(loff_t, isize, *ppos + len);

View File

@ -2731,6 +2731,138 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return retval ? retval : error;
}
static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
return true;
}
static void zero_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
}
static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
return false;
}
static const struct pipe_buf_operations zero_pipe_buf_ops = {
.release = zero_pipe_buf_release,
.try_steal = zero_pipe_buf_try_steal,
.get = zero_pipe_buf_get,
};
static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe,
loff_t fpos, size_t size)
{
size_t offset = fpos & ~PAGE_MASK;
size = min_t(size_t, size, PAGE_SIZE - offset);
if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
struct pipe_buffer *buf = pipe_head_buf(pipe);
*buf = (struct pipe_buffer) {
.ops = &zero_pipe_buf_ops,
.page = ZERO_PAGE(0),
.offset = offset,
.len = size,
};
pipe->head++;
}
return size;
}
static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct inode *inode = file_inode(in);
struct address_space *mapping = inode->i_mapping;
struct folio *folio = NULL;
size_t total_spliced = 0, used, npages, n, part;
loff_t isize;
int error = 0;
/* Work out how much data we can actually add into the pipe */
used = pipe_occupancy(pipe->head, pipe->tail);
npages = max_t(ssize_t, pipe->max_usage - used, 0);
len = min_t(size_t, len, npages * PAGE_SIZE);
do {
if (*ppos >= i_size_read(inode))
break;
error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, SGP_READ);
if (error) {
if (error == -EINVAL)
error = 0;
break;
}
if (folio) {
folio_unlock(folio);
if (folio_test_hwpoison(folio)) {
error = -EIO;
break;
}
}
/*
* i_size must be checked after we know the pages are Uptodate.
*
* Checking i_size after the check allows us to calculate
* the correct value for "nr", which means the zero-filled
* part of the page is not copied back to userspace (unless
* another truncate extends the file - this is desired though).
*/
isize = i_size_read(inode);
if (unlikely(*ppos >= isize))
break;
part = min_t(loff_t, isize - *ppos, len);
if (folio) {
/*
* If users can be writing to this page using arbitrary
* virtual addresses, take care about potential aliasing
* before reading the page on the kernel side.
*/
if (mapping_writably_mapped(mapping))
flush_dcache_folio(folio);
folio_mark_accessed(folio);
/*
* Ok, we have the page, and it's up-to-date, so we can
* now splice it into the pipe.
*/
n = splice_folio_into_pipe(pipe, folio, *ppos, part);
folio_put(folio);
folio = NULL;
} else {
n = splice_zeropage_into_pipe(pipe, *ppos, len);
}
if (!n)
break;
len -= n;
total_spliced += n;
*ppos += n;
in->f_ra.prev_pos = *ppos;
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
break;
cond_resched();
} while (len);
if (folio)
folio_put(folio);
file_accessed(in);
return total_spliced ? total_spliced : error;
}
static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
{
struct address_space *mapping = file->f_mapping;
@ -3971,7 +4103,7 @@ static const struct file_operations shmem_file_operations = {
.read_iter = shmem_file_read_iter,
.write_iter = generic_file_write_iter,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_read = shmem_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = shmem_fallocate,
#endif

View File

@ -1093,7 +1093,7 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct socket *sock = file->private_data;
if (unlikely(!sock->ops->splice_read))
return generic_file_splice_read(file, ppos, pipe, len, flags);
return copy_splice_read(file, ppos, pipe, len, flags);
return sock->ops->splice_read(sock, ppos, pipe, len, flags);
}