vfs-6.13-rc1.fixes

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZ0YDhQAKCRCRxhvAZXjc
 ouy/AQDds2VXT3baRn5mvLOnWaN9tez+TnPLUKbS8m4srJUrGgD/SQkYc14vANGL
 iIw6oAhDhdzrjrm0rxr2COXah6me7g0=
 =Xu0X
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.13-rc1.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

 - Fix a few iomap bugs

 - Fix a wrong argument in backing file callback

 - Fix security mount option retrieval in statmount()

 - Cleanup how statmount() handles unescaped options

 - Add a missing inode_owner_or_capable() check for setting write hints

 - Clear the return value in read_kcore_iter() after a successful
   iov_iter_zero()

 - Fix a mount_setattr() selftest

 - Fix function signature in mount api documentation

 - Remove duplicate include header in the fscache code

* tag 'vfs-6.13-rc1.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  fs/backing_file: fix wrong argument in callback
  fs_parser: update mount_api doc to match function signature
  fs: require inode_owner_or_capable for F_SET_RW_HINT
  fs/proc/kcore.c: Clear ret value in read_kcore_iter after successful iov_iter_zero
  statmount: fix security option retrieval
  statmount: clean up unescaped option handling
  fscache: Remove duplicate included header
  iomap: elide flush from partial eof zero range
  iomap: lift zeroed mapping handling into iomap_zero_range()
  iomap: reset per-iter state on non-error iter advances
  iomap: warn on zero range of a post-eof folio
  selftests/mount_setattr: Fix failures on 64K PAGE_SIZE kernels
This commit is contained in:
Linus Torvalds 2024-11-27 08:11:46 -08:00
commit 7d4050728c
9 changed files with 81 additions and 79 deletions

View File

@ -770,7 +770,8 @@ process the parameters it is given.
* :: * ::
bool fs_validate_description(const struct fs_parameter_description *desc); bool fs_validate_description(const char *name,
const struct fs_parameter_description *desc);
This performs some validation checks on a parameter description. It This performs some validation checks on a parameter description. It
returns true if the description is good and false if it is not. It will returns true if the description is good and false if it is not. It will

View File

@ -327,6 +327,7 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
struct backing_file_ctx *ctx) struct backing_file_ctx *ctx)
{ {
const struct cred *old_cred; const struct cred *old_cred;
struct file *user_file = vma->vm_file;
int ret; int ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
@ -342,7 +343,7 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
revert_creds_light(old_cred); revert_creds_light(old_cred);
if (ctx->accessed) if (ctx->accessed)
ctx->accessed(vma->vm_file); ctx->accessed(user_file);
return ret; return ret;
} }

View File

@ -374,6 +374,9 @@ static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
u64 __user *argp = (u64 __user *)arg; u64 __user *argp = (u64 __user *)arg;
u64 hint; u64 hint;
if (!inode_owner_or_capable(file_mnt_idmap(file), inode))
return -EPERM;
if (copy_from_user(&hint, argp, sizeof(hint))) if (copy_from_user(&hint, argp, sizeof(hint)))
return -EFAULT; return -EFAULT;
if (!rw_hint_valid(hint)) if (!rw_hint_valid(hint))

View File

@ -1350,40 +1350,12 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
return filemap_write_and_wait_range(mapping, i->pos, end); return filemap_write_and_wait_range(mapping, i->pos, end);
} }
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
bool *range_dirty)
{ {
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos; loff_t pos = iter->pos;
loff_t length = iomap_length(iter); loff_t length = iomap_length(iter);
loff_t written = 0; loff_t written = 0;
/*
* We must zero subranges of unwritten mappings that might be dirty in
* pagecache from previous writes. We only know whether the entire range
* was clean or not, however, and dirty folios may have been written
* back or reclaimed at any point after mapping lookup.
*
* The easiest way to deal with this is to flush pagecache to trigger
* any pending unwritten conversions and then grab the updated extents
* from the fs. The flush may change the current mapping, so mark it
* stale for the iterator to remap it for the next pass to handle
* properly.
*
* Note that holes are treated the same as unwritten because zero range
* is (ab)used for partial folio zeroing in some cases. Hole backed
* post-eof ranges can be dirtied via mapped write and the flush
* triggers writeback time post-eof zeroing.
*/
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) {
if (*range_dirty) {
*range_dirty = false;
return iomap_zero_iter_flush_and_stale(iter);
}
/* range is clean and already zeroed, nothing to do */
return length;
}
do { do {
struct folio *folio; struct folio *folio;
int status; int status;
@ -1397,6 +1369,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
if (iter->iomap.flags & IOMAP_F_STALE) if (iter->iomap.flags & IOMAP_F_STALE)
break; break;
/* warn about zeroing folios beyond eof that won't write back */
WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
offset = offset_in_folio(folio, pos); offset = offset_in_folio(folio, pos);
if (bytes > folio_size(folio) - offset) if (bytes > folio_size(folio) - offset)
bytes = folio_size(folio) - offset; bytes = folio_size(folio) - offset;
@ -1429,28 +1403,58 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
.len = len, .len = len,
.flags = IOMAP_ZERO, .flags = IOMAP_ZERO,
}; };
struct address_space *mapping = inode->i_mapping;
unsigned int blocksize = i_blocksize(inode);
unsigned int off = pos & (blocksize - 1);
loff_t plen = min_t(loff_t, len, blocksize - off);
int ret; int ret;
bool range_dirty; bool range_dirty;
/* /*
* Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but * Zero range can skip mappings that are zero on disk so long as
* pagecache must be flushed to ensure stale data from previous * pagecache is clean. If pagecache was dirty prior to zero range, the
* buffered writes is not exposed. A flush is only required for certain * mapping converts on writeback completion and so must be zeroed.
* types of mappings, but checking pagecache after mapping lookup is
* racy with writeback and reclaim.
* *
* Therefore, check the entire range first and pass along whether any * The simplest way to deal with this across a range is to flush
* part of it is dirty. If so and an underlying mapping warrants it, * pagecache and process the updated mappings. To avoid excessive
* flush the cache at that point. This trades off the occasional false * flushing on partial eof zeroing, special case it to zero the
* positive (and spurious flush, if the dirty data and mapping don't * unaligned start portion if already dirty in pagecache.
* happen to overlap) for simplicity in handling a relatively uncommon */
* situation. if (off &&
filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) {
iter.len = plen;
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_zero_iter(&iter, did_zero);
iter.len = len - (iter.pos - pos);
if (ret || !iter.len)
return ret;
}
/*
* To avoid an unconditional flush, check pagecache state and only flush
* if dirty and the fs returns a mapping that might convert on
* writeback.
*/ */
range_dirty = filemap_range_needs_writeback(inode->i_mapping, range_dirty = filemap_range_needs_writeback(inode->i_mapping,
pos, pos + len - 1); iter.pos, iter.pos + iter.len - 1);
while ((ret = iomap_iter(&iter, ops)) > 0) {
const struct iomap *srcmap = iomap_iter_srcmap(&iter);
while ((ret = iomap_iter(&iter, ops)) > 0) if (srcmap->type == IOMAP_HOLE ||
iter.processed = iomap_zero_iter(&iter, did_zero, &range_dirty); srcmap->type == IOMAP_UNWRITTEN) {
loff_t proc = iomap_length(&iter);
if (range_dirty) {
range_dirty = false;
proc = iomap_zero_iter_flush_and_stale(&iter);
}
iter.processed = proc;
continue;
}
iter.processed = iomap_zero_iter(&iter, did_zero);
}
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(iomap_zero_range); EXPORT_SYMBOL_GPL(iomap_zero_range);

View File

@ -22,26 +22,25 @@
static inline int iomap_iter_advance(struct iomap_iter *iter) static inline int iomap_iter_advance(struct iomap_iter *iter)
{ {
bool stale = iter->iomap.flags & IOMAP_F_STALE; bool stale = iter->iomap.flags & IOMAP_F_STALE;
int ret = 1;
/* handle the previous iteration (if any) */ /* handle the previous iteration (if any) */
if (iter->iomap.length) { if (iter->iomap.length) {
if (iter->processed < 0) if (iter->processed < 0)
return iter->processed; return iter->processed;
if (!iter->processed && !stale)
return 0;
if (WARN_ON_ONCE(iter->processed > iomap_length(iter))) if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
return -EIO; return -EIO;
iter->pos += iter->processed; iter->pos += iter->processed;
iter->len -= iter->processed; iter->len -= iter->processed;
if (!iter->len) if (!iter->len || (!iter->processed && !stale))
return 0; ret = 0;
} }
/* clear the state for the next iteration */ /* clear the per iteration state */
iter->processed = 0; iter->processed = 0;
memset(&iter->iomap, 0, sizeof(iter->iomap)); memset(&iter->iomap, 0, sizeof(iter->iomap));
memset(&iter->srcmap, 0, sizeof(iter->srcmap)); memset(&iter->srcmap, 0, sizeof(iter->srcmap));
return 1; return ret;
} }
static inline void iomap_iter_done(struct iomap_iter *iter) static inline void iomap_iter_done(struct iomap_iter *iter)

View File

@ -5057,21 +5057,32 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
return 0; return 0;
} }
static inline int statmount_opt_unescape(struct seq_file *seq, char *buf_start) static inline int statmount_opt_process(struct seq_file *seq, size_t start)
{ {
char *buf_end, *opt_start, *opt_end; char *buf_end, *opt_end, *src, *dst;
int count = 0; int count = 0;
if (unlikely(seq_has_overflowed(seq)))
return -EAGAIN;
buf_end = seq->buf + seq->count; buf_end = seq->buf + seq->count;
dst = seq->buf + start;
src = dst + 1; /* skip initial comma */
if (src >= buf_end) {
seq->count = start;
return 0;
}
*buf_end = '\0'; *buf_end = '\0';
for (opt_start = buf_start + 1; opt_start < buf_end; opt_start = opt_end + 1) { for (; src < buf_end; src = opt_end + 1) {
opt_end = strchrnul(opt_start, ','); opt_end = strchrnul(src, ',');
*opt_end = '\0'; *opt_end = '\0';
buf_start += string_unescape(opt_start, buf_start, 0, UNESCAPE_OCTAL) + 1; dst += string_unescape(src, dst, 0, UNESCAPE_OCTAL) + 1;
if (WARN_ON_ONCE(++count == INT_MAX)) if (WARN_ON_ONCE(++count == INT_MAX))
return -EOVERFLOW; return -EOVERFLOW;
} }
seq->count = buf_start - 1 - seq->buf; seq->count = dst - 1 - seq->buf;
return count; return count;
} }
@ -5080,24 +5091,16 @@ static int statmount_opt_array(struct kstatmount *s, struct seq_file *seq)
struct vfsmount *mnt = s->mnt; struct vfsmount *mnt = s->mnt;
struct super_block *sb = mnt->mnt_sb; struct super_block *sb = mnt->mnt_sb;
size_t start = seq->count; size_t start = seq->count;
char *buf_start;
int err; int err;
if (!sb->s_op->show_options) if (!sb->s_op->show_options)
return 0; return 0;
buf_start = seq->buf + start;
err = sb->s_op->show_options(seq, mnt->mnt_root); err = sb->s_op->show_options(seq, mnt->mnt_root);
if (err) if (err)
return err; return err;
if (unlikely(seq_has_overflowed(seq))) err = statmount_opt_process(seq, start);
return -EAGAIN;
if (seq->count == start)
return 0;
err = statmount_opt_unescape(seq, buf_start);
if (err < 0) if (err < 0)
return err; return err;
@ -5110,22 +5113,13 @@ static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq)
struct vfsmount *mnt = s->mnt; struct vfsmount *mnt = s->mnt;
struct super_block *sb = mnt->mnt_sb; struct super_block *sb = mnt->mnt_sb;
size_t start = seq->count; size_t start = seq->count;
char *buf_start;
int err; int err;
buf_start = seq->buf + start;
err = security_sb_show_options(seq, sb); err = security_sb_show_options(seq, sb);
if (!err) if (err)
return err; return err;
if (unlikely(seq_has_overflowed(seq))) err = statmount_opt_process(seq, start);
return -EAGAIN;
if (seq->count == start)
return 0;
err = statmount_opt_unescape(seq, buf_start);
if (err < 0) if (err < 0)
return err; return err;

View File

@ -9,7 +9,6 @@
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/bvec.h> #include <linux/bvec.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/uio.h>
#include "internal.h" #include "internal.h"
/** /**

View File

@ -600,6 +600,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
} }
ret = 0;
/* /*
* We know the bounce buffer is safe to copy from, so * We know the bounce buffer is safe to copy from, so
* use _copy_to_iter() directly. * use _copy_to_iter() directly.

View File

@ -1026,7 +1026,7 @@ FIXTURE_SETUP(mount_setattr_idmapped)
"size=100000,mode=700"), 0); "size=100000,mode=700"), 0);
ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
"size=100000,mode=700"), 0); "size=2m,mode=700"), 0);
ASSERT_EQ(mkdir("/mnt/A", 0777), 0); ASSERT_EQ(mkdir("/mnt/A", 0777), 0);