fs: use do_splice_direct() for nfsd/ksmbd server-side-copy

nfsd/ksmbd call vfs_copy_file_range() with flag COPY_FILE_SPLICE to
perform kernel copy between two files on any two filesystems.

Splicing input file, while holding file_start_write() on the output file
which is on a different sb, posses a risk for fanotify related deadlocks.

We only need to call splice_file_range() from within the context of
->copy_file_range() filesystem methods with file_start_write() held.

To avoid the possible deadlocks, always use do_splice_direct() instead of
splice_file_range() for the kernel copy fallback in vfs_copy_file_range()
without holding file_start_write().

Reported-and-tested-by: Bert Karwatzki <spasswolf@web.de>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/r/20231130141624.3338942-4-amir73il@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Amir Goldstein 2023-11-30 16:16:24 +02:00 committed by Christian Brauner
parent da40448ce4
commit 7306512686

View File

@ -1421,6 +1421,10 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags) size_t len, unsigned int flags)
{ {
/* May only be called from within ->copy_file_range() methods */
if (WARN_ON_ONCE(flags))
return -EINVAL;
return splice_file_range(file_in, &pos_in, file_out, &pos_out, return splice_file_range(file_in, &pos_in, file_out, &pos_out,
min_t(size_t, len, MAX_RW_COUNT)); min_t(size_t, len, MAX_RW_COUNT));
} }
@ -1510,6 +1514,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
{ {
ssize_t ret; ssize_t ret;
bool splice = flags & COPY_FILE_SPLICE; bool splice = flags & COPY_FILE_SPLICE;
bool samesb = file_inode(file_in)->i_sb == file_inode(file_out)->i_sb;
if (flags & ~COPY_FILE_SPLICE) if (flags & ~COPY_FILE_SPLICE)
return -EINVAL; return -EINVAL;
@ -1541,19 +1546,24 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
ret = file_out->f_op->copy_file_range(file_in, pos_in, ret = file_out->f_op->copy_file_range(file_in, pos_in,
file_out, pos_out, file_out, pos_out,
len, flags); len, flags);
goto done; } else if (!splice && file_in->f_op->remap_file_range && samesb) {
}
if (!splice && file_in->f_op->remap_file_range &&
file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
ret = file_in->f_op->remap_file_range(file_in, pos_in, ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out, file_out, pos_out,
min_t(loff_t, MAX_RW_COUNT, len), min_t(loff_t, MAX_RW_COUNT, len),
REMAP_FILE_CAN_SHORTEN); REMAP_FILE_CAN_SHORTEN);
if (ret > 0) /* fallback to splice */
goto done; if (ret <= 0)
splice = true;
} else if (samesb) {
/* Fallback to splice for same sb copy for backward compat */
splice = true;
} }
file_end_write(file_out);
if (!splice)
goto done;
/* /*
* We can get here for same sb copy of filesystems that do not implement * We can get here for same sb copy of filesystems that do not implement
* ->copy_file_range() in case filesystem does not support clone or in * ->copy_file_range() in case filesystem does not support clone or in
@ -1565,11 +1575,16 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* and which filesystems do not, that will allow userspace tools to * and which filesystems do not, that will allow userspace tools to
* make consistent desicions w.r.t using copy_file_range(). * make consistent desicions w.r.t using copy_file_range().
* *
* We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE
* for server-side-copy between any two sb.
*
* In any case, we call do_splice_direct() and not splice_file_range(),
* without file_start_write() held, to avoid possible deadlocks related
* to splicing from input file, while file_start_write() is held on
* the output file on a different sb.
*/ */
ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
flags); min_t(size_t, len, MAX_RW_COUNT), 0);
done: done:
if (ret > 0) { if (ret > 0) {
fsnotify_access(file_in); fsnotify_access(file_in);
@ -1581,8 +1596,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
inc_syscr(current); inc_syscr(current);
inc_syscw(current); inc_syscw(current);
file_end_write(file_out);
return ret; return ret;
} }
EXPORT_SYMBOL(vfs_copy_file_range); EXPORT_SYMBOL(vfs_copy_file_range);