diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst index 317934c9e8fc..d92c276f1575 100644 --- a/Documentation/filesystems/mount_api.rst +++ b/Documentation/filesystems/mount_api.rst @@ -770,7 +770,8 @@ process the parameters it is given. * :: - bool fs_validate_description(const struct fs_parameter_description *desc); + bool fs_validate_description(const char *name, + const struct fs_parameter_description *desc); This performs some validation checks on a parameter description. It returns true if the description is good and false if it is not. It will diff --git a/fs/backing-file.c b/fs/backing-file.c index 526ddb4d6f76..cbdad8b68474 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -327,6 +327,7 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma, struct backing_file_ctx *ctx) { const struct cred *old_cred; + struct file *user_file = vma->vm_file; int ret; if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) @@ -342,7 +343,7 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma, revert_creds_light(old_cred); if (ctx->accessed) - ctx->accessed(vma->vm_file); + ctx->accessed(user_file); return ret; } diff --git a/fs/fcntl.c b/fs/fcntl.c index ac77dd912412..49884fa3c81d 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -374,6 +374,9 @@ static long fcntl_set_rw_hint(struct file *file, unsigned int cmd, u64 __user *argp = (u64 __user *)arg; u64 hint; + if (!inode_owner_or_capable(file_mnt_idmap(file), inode)) + return -EPERM; + if (copy_from_user(&hint, argp, sizeof(hint))) return -EFAULT; if (!rw_hint_valid(hint)) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d42f01e0fc1c..955f19e27e47 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1350,40 +1350,12 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i) return filemap_write_and_wait_range(mapping, i->pos, end); } -static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, - bool *range_dirty) +static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) { - const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos = iter->pos; loff_t length = iomap_length(iter); loff_t written = 0; - /* - * We must zero subranges of unwritten mappings that might be dirty in - * pagecache from previous writes. We only know whether the entire range - * was clean or not, however, and dirty folios may have been written - * back or reclaimed at any point after mapping lookup. - * - * The easiest way to deal with this is to flush pagecache to trigger - * any pending unwritten conversions and then grab the updated extents - * from the fs. The flush may change the current mapping, so mark it - * stale for the iterator to remap it for the next pass to handle - * properly. - * - * Note that holes are treated the same as unwritten because zero range - * is (ab)used for partial folio zeroing in some cases. Hole backed - * post-eof ranges can be dirtied via mapped write and the flush - * triggers writeback time post-eof zeroing. - */ - if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) { - if (*range_dirty) { - *range_dirty = false; - return iomap_zero_iter_flush_and_stale(iter); - } - /* range is clean and already zeroed, nothing to do */ - return length; - } - do { struct folio *folio; int status; @@ -1397,6 +1369,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, if (iter->iomap.flags & IOMAP_F_STALE) break; + /* warn about zeroing folios beyond eof that won't write back */ + WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size); offset = offset_in_folio(folio, pos); if (bytes > folio_size(folio) - offset) bytes = folio_size(folio) - offset; @@ -1429,28 +1403,58 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, .len = len, .flags = IOMAP_ZERO, }; + struct address_space *mapping = inode->i_mapping; + unsigned int blocksize = i_blocksize(inode); + unsigned int off = pos & (blocksize - 1); + loff_t plen = min_t(loff_t, len, blocksize - off); int ret; bool range_dirty; /* - * Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but - * pagecache must be flushed to ensure stale data from previous - * buffered writes is not exposed. A flush is only required for certain - * types of mappings, but checking pagecache after mapping lookup is - * racy with writeback and reclaim. + * Zero range can skip mappings that are zero on disk so long as + * pagecache is clean. If pagecache was dirty prior to zero range, the + * mapping converts on writeback completion and so must be zeroed. * - * Therefore, check the entire range first and pass along whether any - * part of it is dirty. If so and an underlying mapping warrants it, - * flush the cache at that point. This trades off the occasional false - * positive (and spurious flush, if the dirty data and mapping don't - * happen to overlap) for simplicity in handling a relatively uncommon - * situation. + * The simplest way to deal with this across a range is to flush + * pagecache and process the updated mappings. To avoid excessive + * flushing on partial eof zeroing, special case it to zero the + * unaligned start portion if already dirty in pagecache. + */ + if (off && + filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) { + iter.len = plen; + while ((ret = iomap_iter(&iter, ops)) > 0) + iter.processed = iomap_zero_iter(&iter, did_zero); + + iter.len = len - (iter.pos - pos); + if (ret || !iter.len) + return ret; + } + + /* + * To avoid an unconditional flush, check pagecache state and only flush + * if dirty and the fs returns a mapping that might convert on + * writeback. */ range_dirty = filemap_range_needs_writeback(inode->i_mapping, - pos, pos + len - 1); + iter.pos, iter.pos + iter.len - 1); + while ((ret = iomap_iter(&iter, ops)) > 0) { + const struct iomap *srcmap = iomap_iter_srcmap(&iter); - while ((ret = iomap_iter(&iter, ops)) > 0) - iter.processed = iomap_zero_iter(&iter, did_zero, &range_dirty); + if (srcmap->type == IOMAP_HOLE || + srcmap->type == IOMAP_UNWRITTEN) { + loff_t proc = iomap_length(&iter); + + if (range_dirty) { + range_dirty = false; + proc = iomap_zero_iter_flush_and_stale(&iter); + } + iter.processed = proc; + continue; + } + + iter.processed = iomap_zero_iter(&iter, did_zero); + } return ret; } EXPORT_SYMBOL_GPL(iomap_zero_range); diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c index 79a0614eaab7..3790918646af 100644 --- a/fs/iomap/iter.c +++ b/fs/iomap/iter.c @@ -22,26 +22,25 @@ static inline int iomap_iter_advance(struct iomap_iter *iter) { bool stale = iter->iomap.flags & IOMAP_F_STALE; + int ret = 1; /* handle the previous iteration (if any) */ if (iter->iomap.length) { if (iter->processed < 0) return iter->processed; - if (!iter->processed && !stale) - return 0; if (WARN_ON_ONCE(iter->processed > iomap_length(iter))) return -EIO; iter->pos += iter->processed; iter->len -= iter->processed; - if (!iter->len) - return 0; + if (!iter->len || (!iter->processed && !stale)) + ret = 0; } - /* clear the state for the next iteration */ + /* clear the per iteration state */ iter->processed = 0; memset(&iter->iomap, 0, sizeof(iter->iomap)); memset(&iter->srcmap, 0, sizeof(iter->srcmap)); - return 1; + return ret; } static inline void iomap_iter_done(struct iomap_iter *iter) diff --git a/fs/namespace.c b/fs/namespace.c index 6b0a17487d0f..23e81c2a1e3f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5057,21 +5057,32 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq) return 0; } -static inline int statmount_opt_unescape(struct seq_file *seq, char *buf_start) +static inline int statmount_opt_process(struct seq_file *seq, size_t start) { - char *buf_end, *opt_start, *opt_end; + char *buf_end, *opt_end, *src, *dst; int count = 0; + if (unlikely(seq_has_overflowed(seq))) + return -EAGAIN; + buf_end = seq->buf + seq->count; + dst = seq->buf + start; + src = dst + 1; /* skip initial comma */ + + if (src >= buf_end) { + seq->count = start; + return 0; + } + *buf_end = '\0'; - for (opt_start = buf_start + 1; opt_start < buf_end; opt_start = opt_end + 1) { - opt_end = strchrnul(opt_start, ','); + for (; src < buf_end; src = opt_end + 1) { + opt_end = strchrnul(src, ','); *opt_end = '\0'; - buf_start += string_unescape(opt_start, buf_start, 0, UNESCAPE_OCTAL) + 1; + dst += string_unescape(src, dst, 0, UNESCAPE_OCTAL) + 1; if (WARN_ON_ONCE(++count == INT_MAX)) return -EOVERFLOW; } - seq->count = buf_start - 1 - seq->buf; + seq->count = dst - 1 - seq->buf; return count; } @@ -5080,24 +5091,16 @@ static int statmount_opt_array(struct kstatmount *s, struct seq_file *seq) struct vfsmount *mnt = s->mnt; struct super_block *sb = mnt->mnt_sb; size_t start = seq->count; - char *buf_start; int err; if (!sb->s_op->show_options) return 0; - buf_start = seq->buf + start; err = sb->s_op->show_options(seq, mnt->mnt_root); if (err) return err; - if (unlikely(seq_has_overflowed(seq))) - return -EAGAIN; - - if (seq->count == start) - return 0; - - err = statmount_opt_unescape(seq, buf_start); + err = statmount_opt_process(seq, start); if (err < 0) return err; @@ -5110,22 +5113,13 @@ static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq) struct vfsmount *mnt = s->mnt; struct super_block *sb = mnt->mnt_sb; size_t start = seq->count; - char *buf_start; int err; - buf_start = seq->buf + start; - err = security_sb_show_options(seq, sb); - if (!err) + if (err) return err; - if (unlikely(seq_has_overflowed(seq))) - return -EAGAIN; - - if (seq->count == start) - return 0; - - err = statmount_opt_unescape(seq, buf_start); + err = statmount_opt_process(seq, start); if (err < 0) return err; diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c index 38637e5c9b57..b1722a82c03d 100644 --- a/fs/netfs/fscache_io.c +++ b/fs/netfs/fscache_io.c @@ -9,7 +9,6 @@ #include #include #include -#include #include "internal.h" /** diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 7a85735d584f..e376f48c4b8b 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -600,6 +600,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) ret = -EFAULT; goto out; } + ret = 0; /* * We know the bounce buffer is safe to copy from, so * use _copy_to_iter() directly. diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 68801e1a9ec2..70f65eb320a7 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -1026,7 +1026,7 @@ FIXTURE_SETUP(mount_setattr_idmapped) "size=100000,mode=700"), 0); ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, - "size=100000,mode=700"), 0); + "size=2m,mode=700"), 0); ASSERT_EQ(mkdir("/mnt/A", 0777), 0);