From db1faacb3a27863647845eb5bd17521e77ce430d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:43 +0200 Subject: [PATCH 01/19] adi: remove unused f_version It's not used for adi so don't bother with it at all. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-2-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- drivers/char/adi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/adi.c b/drivers/char/adi.c index 751d7cc0da1b..c091a0282ad0 100644 --- a/drivers/char/adi.c +++ b/drivers/char/adi.c @@ -196,7 +196,6 @@ static loff_t adi_llseek(struct file *file, loff_t offset, int whence) if (offset != file->f_pos) { file->f_pos = offset; - file->f_version = 0; ret = offset; } From 387b499b789ce60c195db510b53f54ea728e10e3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:44 +0200 Subject: [PATCH 02/19] ceph: remove unused f_version It's not used for ceph so don't bother with it at all. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-3-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/ceph/dir.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 18c72b305858..ddec8c9244ee 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -707,7 +707,6 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) if (offset != file->f_pos) { file->f_pos = offset; - file->f_version = 0; dfi->file_info.flags &= ~CEPH_F_ATEND; } retval = offset; From 1c5a54af4717ff54b640986768b901aa6c294297 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:45 +0200 Subject: [PATCH 03/19] s390: remove unused f_version It's not used so don't bother with it at all. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-4-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- drivers/s390/char/hmcdrv_dev.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/s390/char/hmcdrv_dev.c b/drivers/s390/char/hmcdrv_dev.c index 8d50c894711f..e069dd685899 100644 --- a/drivers/s390/char/hmcdrv_dev.c +++ b/drivers/s390/char/hmcdrv_dev.c @@ -186,9 +186,6 @@ static loff_t hmcdrv_dev_seek(struct file *fp, loff_t pos, int whence) if (pos < 0) return -EINVAL; - if (fp->f_pos != pos) - ++fp->f_version; - fp->f_pos = pos; return pos; } From d095a5be7533aa3e72f7358ddbcd595d67d56ff1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:46 +0200 Subject: [PATCH 04/19] fs: add vfs_setpos_cookie() Add a new helper and make vfs_setpos() call it. We will use it in follow-up patches. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-5-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/read_write.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 90e283b31ca1..66ff52860496 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -39,6 +39,34 @@ static inline bool unsigned_offsets(struct file *file) return file->f_mode & FMODE_UNSIGNED_OFFSET; } +/** + * vfs_setpos_cookie - update the file offset for lseek and reset cookie + * @file: file structure in question + * @offset: file offset to seek to + * @maxsize: maximum file size + * @cookie: cookie to reset + * + * Update the file offset to the value specified by @offset if the given + * offset is valid and it is not equal to the current file offset and + * reset the specified cookie to indicate that a seek happened. + * + * Return the specified offset on success and -EINVAL on invalid offset. + */ +static loff_t vfs_setpos_cookie(struct file *file, loff_t offset, + loff_t maxsize, u64 *cookie) +{ + if (offset < 0 && !unsigned_offsets(file)) + return -EINVAL; + if (offset > maxsize) + return -EINVAL; + + if (offset != file->f_pos) { + file->f_pos = offset; + *cookie = 0; + } + return offset; +} + /** * vfs_setpos - update the file offset for lseek * @file: file structure in question @@ -53,16 +81,7 @@ static inline bool unsigned_offsets(struct file *file) */ loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) { - if (offset < 0 && !unsigned_offsets(file)) - return -EINVAL; - if (offset > maxsize) - return -EINVAL; - - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } - return offset; + return vfs_setpos_cookie(file, offset, maxsize, &file->f_version); } EXPORT_SYMBOL(vfs_setpos); From b8c7451928ab2698653966a54ab5fff2fce484ff Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:47 +0200 Subject: [PATCH 05/19] fs: add must_set_pos() Add a new must_set_pos() helper. We will use it in follow-up patches. Temporarily mark it as unused. This is only done to keep the diff small and reviewable. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-6-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/read_write.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/fs/read_write.c b/fs/read_write.c index 66ff52860496..5434467f5c05 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -85,6 +85,60 @@ loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) } EXPORT_SYMBOL(vfs_setpos); +/** + * must_set_pos - check whether f_pos has to be updated + * @file: file to seek on + * @offset: offset to use + * @whence: type of seek operation + * @eof: end of file + * + * Check whether f_pos needs to be updated and update @offset according + * to @whence. + * + * Return: 0 if f_pos doesn't need to be updated, 1 if f_pos has to be + * updated, and negative error code on failure. + */ +static __maybe_unused int must_set_pos(struct file *file, loff_t *offset, + int whence, loff_t eof) +{ + switch (whence) { + case SEEK_END: + *offset += eof; + break; + case SEEK_CUR: + /* + * Here we special-case the lseek(fd, 0, SEEK_CUR) + * position-querying operation. Avoid rewriting the "same" + * f_pos value back to the file because a concurrent read(), + * write() or lseek() might have altered it + */ + if (*offset == 0) { + *offset = file->f_pos; + return 0; + } + break; + case SEEK_DATA: + /* + * In the generic case the entire file is data, so as long as + * offset isn't at the end of the file then the offset is data. + */ + if ((unsigned long long)*offset >= eof) + return -ENXIO; + break; + case SEEK_HOLE: + /* + * There is a virtual hole at the end of the file, so as long as + * offset isn't i_size or larger, return i_size. + */ + if ((unsigned long long)*offset >= eof) + return -ENXIO; + *offset = eof; + break; + } + + return 1; +} + /** * generic_file_llseek_size - generic llseek implementation for regular files * @file: file structure to seek on From ed904935c3992ec4178a38f61eca8ce7303ae1a0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:48 +0200 Subject: [PATCH 06/19] fs: use must_set_pos() Make generic_file_llseek_size() use must_set_pos(). We'll use must_set_pos() in another helper in a minutes. Remove __maybe_unused from must_set_pos() now that it's used. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-7-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/read_write.c | 52 +++++++++++++------------------------------------ 1 file changed, 14 insertions(+), 38 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 5434467f5c05..b07e48cd81d1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -98,8 +98,7 @@ EXPORT_SYMBOL(vfs_setpos); * Return: 0 if f_pos doesn't need to be updated, 1 if f_pos has to be * updated, and negative error code on failure. */ -static __maybe_unused int must_set_pos(struct file *file, loff_t *offset, - int whence, loff_t eof) +static int must_set_pos(struct file *file, loff_t *offset, int whence, loff_t eof) { switch (whence) { case SEEK_END: @@ -159,45 +158,22 @@ loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof) { - switch (whence) { - case SEEK_END: - offset += eof; - break; - case SEEK_CUR: - /* - * Here we special-case the lseek(fd, 0, SEEK_CUR) - * position-querying operation. Avoid rewriting the "same" - * f_pos value back to the file because a concurrent read(), - * write() or lseek() might have altered it - */ - if (offset == 0) - return file->f_pos; - /* - * f_lock protects against read/modify/write race with other - * SEEK_CURs. Note that parallel writes and reads behave - * like SEEK_SET. - */ - spin_lock(&file->f_lock); - offset = vfs_setpos(file, file->f_pos + offset, maxsize); - spin_unlock(&file->f_lock); + int ret; + + ret = must_set_pos(file, &offset, whence, eof); + if (ret < 0) + return ret; + if (ret == 0) return offset; - case SEEK_DATA: + + if (whence == SEEK_CUR) { /* - * In the generic case the entire file is data, so as long as - * offset isn't at the end of the file then the offset is data. + * f_lock protects against read/modify/write race with + * other SEEK_CURs. Note that parallel writes and reads + * behave like SEEK_SET. */ - if ((unsigned long long)offset >= eof) - return -ENXIO; - break; - case SEEK_HOLE: - /* - * There is a virtual hole at the end of the file, so as long as - * offset isn't i_size or larger, return i_size. - */ - if ((unsigned long long)offset >= eof) - return -ENXIO; - offset = eof; - break; + guard(spinlock)(&file->f_lock); + return vfs_setpos(file, file->f_pos + offset, maxsize); } return vfs_setpos(file, offset, maxsize); From d688d65a847ff910146eff51e70f4b7049895eb5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:49 +0200 Subject: [PATCH 07/19] fs: add generic_llseek_cookie() This is similar to generic_file_llseek() but allows the caller to specify a cookie that will be updated to indicate that a seek happened. Caller's requiring that information in their readdir implementations can use that. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-8-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/read_write.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 2 ++ 2 files changed, 47 insertions(+) diff --git a/fs/read_write.c b/fs/read_write.c index b07e48cd81d1..fb519e55c8e8 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -180,6 +180,51 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, } EXPORT_SYMBOL(generic_file_llseek_size); +/** + * generic_llseek_cookie - versioned llseek implementation + * @file: file structure to seek on + * @offset: file offset to seek to + * @whence: type of seek + * @cookie: cookie to update + * + * See generic_file_llseek for a general description and locking assumptions. + * + * In contrast to generic_file_llseek, this function also resets a + * specified cookie to indicate a seek took place. + */ +loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence, + u64 *cookie) +{ + struct inode *inode = file->f_mapping->host; + loff_t maxsize = inode->i_sb->s_maxbytes; + loff_t eof = i_size_read(inode); + int ret; + + if (WARN_ON_ONCE(!cookie)) + return -EINVAL; + + /* + * Require that this is only used for directories that guarantee + * synchronization between readdir and seek so that an update to + * @cookie is correctly synchronized with concurrent readdir. + */ + if (WARN_ON_ONCE(!(file->f_mode & FMODE_ATOMIC_POS))) + return -EINVAL; + + ret = must_set_pos(file, &offset, whence, eof); + if (ret < 0) + return ret; + if (ret == 0) + return offset; + + /* No need to hold f_lock because we know that f_pos_lock is held. */ + if (whence == SEEK_CUR) + return vfs_setpos_cookie(file, file->f_pos + offset, maxsize, cookie); + + return vfs_setpos_cookie(file, offset, maxsize, cookie); +} +EXPORT_SYMBOL(generic_llseek_cookie); + /** * generic_file_llseek - generic llseek implementation for regular files * @file: file structure to seek on diff --git a/include/linux/fs.h b/include/linux/fs.h index 58c91a52cad1..3e6b3c1afb31 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3202,6 +3202,8 @@ extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); +loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence, + u64 *cookie); extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); From bad74142a04bf9b161237f52667473a111181b83 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:50 +0200 Subject: [PATCH 08/19] affs: store cookie in private data Store the cookie to detect concurrent seeks on directories in file->private_data. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-9-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/affs/dir.c | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/fs/affs/dir.c b/fs/affs/dir.c index b2bf7016e1b3..bd40d5f08810 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -17,13 +17,44 @@ #include #include "affs.h" +struct affs_dir_data { + unsigned long ino; + u64 cookie; +}; + static int affs_readdir(struct file *, struct dir_context *); +static loff_t affs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct affs_dir_data *data = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &data->cookie); +} + +static int affs_dir_open(struct inode *inode, struct file *file) +{ + struct affs_dir_data *data; + + data = kzalloc(sizeof(struct affs_dir_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + file->private_data = data; + return 0; +} + +static int affs_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + const struct file_operations affs_dir_operations = { + .open = affs_dir_open, .read = generic_read_dir, - .llseek = generic_file_llseek, + .llseek = affs_dir_llseek, .iterate_shared = affs_readdir, .fsync = affs_file_fsync, + .release = affs_dir_release, }; /* @@ -45,6 +76,7 @@ static int affs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); + struct affs_dir_data *data = file->private_data; struct super_block *sb = inode->i_sb; struct buffer_head *dir_bh = NULL; struct buffer_head *fh_bh = NULL; @@ -59,7 +91,7 @@ affs_readdir(struct file *file, struct dir_context *ctx) pr_debug("%s(ino=%lu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos); if (ctx->pos < 2) { - file->private_data = (void *)0; + data->ino = 0; if (!dir_emit_dots(file, ctx)) return 0; } @@ -80,8 +112,8 @@ affs_readdir(struct file *file, struct dir_context *ctx) /* If the directory hasn't changed since the last call to readdir(), * we can jump directly to where we left off. */ - ino = (u32)(long)file->private_data; - if (ino && inode_eq_iversion(inode, file->f_version)) { + ino = data->ino; + if (ino && inode_eq_iversion(inode, data->cookie)) { pr_debug("readdir() left off=%d\n", ino); goto inside; } @@ -131,8 +163,8 @@ affs_readdir(struct file *file, struct dir_context *ctx) } while (ino); } done: - file->f_version = inode_query_iversion(inode); - file->private_data = (void *)(long)ino; + data->cookie = inode_query_iversion(inode); + data->ino = ino; affs_brelse(fh_bh); out_brelse_dir: From 794576e07585bfb31f810e25c58ee0bcfc5e19d9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:51 +0200 Subject: [PATCH 09/19] ext2: store cookie in private data Store the cookie to detect concurrent seeks on directories in file->private_data. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-10-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/ext2/dir.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 087457061c6e..6622c582f550 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -263,7 +263,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); - bool need_revalidate = !inode_eq_iversion(inode, file->f_version); + bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data); bool has_filetype; if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) @@ -290,7 +290,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) offset = ext2_validate_entry(kaddr, offset, chunk_mask); ctx->pos = (n<f_version = inode_query_iversion(inode); + *(u64 *)file->private_data = inode_query_iversion(inode); need_revalidate = false; } de = (ext2_dirent *)(kaddr+offset); @@ -703,8 +703,30 @@ int ext2_empty_dir(struct inode *inode) return 0; } +static int ext2_dir_open(struct inode *inode, struct file *file) +{ + file->private_data = kzalloc(sizeof(u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int ext2_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static loff_t ext2_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_llseek_cookie(file, offset, whence, + (u64 *)file->private_data); +} + const struct file_operations ext2_dir_operations = { - .llseek = generic_file_llseek, + .open = ext2_dir_open, + .release = ext2_dir_release, + .llseek = ext2_dir_llseek, .read = generic_read_dir, .iterate_shared = ext2_readdir, .unlocked_ioctl = ext2_ioctl, From 4f05ee2f82b470c20f7ff260bb0d866425b09d05 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:52 +0200 Subject: [PATCH 10/19] ext4: store cookie in private data Store the cookie to detect concurrent seeks on directories in file->private_data. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-11-6d3e4816aa7b@kernel.org Acked-by: Theodore Ts'o Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/ext4/dir.c | 48 +++++++++++++++++++++++++++--------------------- fs/ext4/ext4.h | 2 ++ fs/ext4/inline.c | 7 ++++--- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ff4514e4626b..13196afe55ce 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -133,6 +133,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); + struct dir_private_info *info = file->private_data; err = fscrypt_prepare_readdir(inode); if (err) @@ -229,7 +230,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (!inode_eq_iversion(inode, file->f_version)) { + if (!inode_eq_iversion(inode, info->cookie)) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ext4_dir_entry_2 *) (bh->b_data + i); @@ -249,7 +250,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) offset = i; ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) | offset; - file->f_version = inode_query_iversion(inode); + info->cookie = inode_query_iversion(inode); } while (ctx->pos < inode->i_size @@ -384,6 +385,7 @@ static inline loff_t ext4_get_htree_eof(struct file *filp) static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; + struct dir_private_info *info = file->private_data; int dx_dir = is_dx_dir(inode); loff_t ret, htree_max = ext4_get_htree_eof(file); @@ -392,7 +394,7 @@ static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) htree_max, htree_max); else ret = ext4_llseek(file, offset, whence); - file->f_version = inode_peek_iversion(inode) - 1; + info->cookie = inode_peek_iversion(inode) - 1; return ret; } @@ -429,18 +431,15 @@ static void free_rb_tree_fname(struct rb_root *root) *root = RB_ROOT; } - -static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp, - loff_t pos) +static void ext4_htree_init_dir_info(struct file *filp, loff_t pos) { - struct dir_private_info *p; + struct dir_private_info *p = filp->private_data; - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return NULL; - p->curr_hash = pos2maj_hash(filp, pos); - p->curr_minor_hash = pos2min_hash(filp, pos); - return p; + if (is_dx_dir(file_inode(filp)) && !p->initialized) { + p->curr_hash = pos2maj_hash(filp, pos); + p->curr_minor_hash = pos2min_hash(filp, pos); + p->initialized = true; + } } void ext4_htree_free_dir_info(struct dir_private_info *p) @@ -552,12 +551,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) struct fname *fname; int ret = 0; - if (!info) { - info = ext4_htree_create_dir_info(file, ctx->pos); - if (!info) - return -ENOMEM; - file->private_data = info; - } + ext4_htree_init_dir_info(file, ctx->pos); if (ctx->pos == ext4_get_htree_eof(file)) return 0; /* EOF */ @@ -590,10 +584,10 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) * cached entries. */ if ((!info->curr_node) || - !inode_eq_iversion(inode, file->f_version)) { + !inode_eq_iversion(inode, info->cookie)) { info->curr_node = NULL; free_rb_tree_fname(&info->root); - file->f_version = inode_query_iversion(inode); + info->cookie = inode_query_iversion(inode); ret = ext4_htree_fill_tree(file, info->curr_hash, info->curr_minor_hash, &info->next_hash); @@ -664,7 +658,19 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, return 0; } +static int ext4_dir_open(struct inode *inode, struct file *file) +{ + struct dir_private_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + file->private_data = info; + return 0; +} + const struct file_operations ext4_dir_operations = { + .open = ext4_dir_open, .llseek = ext4_dir_llseek, .read = generic_read_dir, .iterate_shared = ext4_readdir, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 08acd152261e..d62a4b9b26ce 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2553,6 +2553,8 @@ struct dir_private_info { __u32 curr_hash; __u32 curr_minor_hash; __u32 next_hash; + u64 cookie; + bool initialized; }; /* calculate the first block number of the group */ diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index e7a09a99837b..4282e12dc405 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1460,6 +1460,7 @@ int ext4_read_inline_dir(struct file *file, struct ext4_iloc iloc; void *dir_buf = NULL; int dotdot_offset, dotdot_size, extra_offset, extra_size; + struct dir_private_info *info = file->private_data; ret = ext4_get_inode_loc(inode, &iloc); if (ret) @@ -1503,12 +1504,12 @@ int ext4_read_inline_dir(struct file *file, extra_size = extra_offset + inline_size; /* - * If the version has changed since the last call to + * If the cookie has changed since the last call to * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the inline * dir to make sure. */ - if (!inode_eq_iversion(inode, file->f_version)) { + if (!inode_eq_iversion(inode, info->cookie)) { for (i = 0; i < extra_size && i < offset;) { /* * "." is with offset 0 and @@ -1540,7 +1541,7 @@ int ext4_read_inline_dir(struct file *file, } offset = i; ctx->pos = offset; - file->f_version = inode_query_iversion(inode); + info->cookie = inode_query_iversion(inode); } while (ctx->pos < extra_size) { From 7a7ce8b3ba66754f5d275a71630b4ee8b507d266 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:53 +0200 Subject: [PATCH 11/19] input: remove f_version abuse f_version is removed from struct file. Make input stop abusing f_version for stashing information for poll. Move the input state counter into input_seq_state and allocate it via seq_private_open() and free via seq_release_private(). Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-12-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- drivers/input/input.c | 47 ++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/input/input.c b/drivers/input/input.c index 54c57b267b25..19ea1888da9f 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1079,33 +1079,31 @@ static inline void input_wakeup_procfs_readers(void) wake_up(&input_devices_poll_wait); } +struct input_seq_state { + unsigned short pos; + bool mutex_acquired; + int input_devices_state; +}; + static __poll_t input_proc_devices_poll(struct file *file, poll_table *wait) { + struct seq_file *seq = file->private_data; + struct input_seq_state *state = seq->private; + poll_wait(file, &input_devices_poll_wait, wait); - if (file->f_version != input_devices_state) { - file->f_version = input_devices_state; + if (state->input_devices_state != input_devices_state) { + state->input_devices_state = input_devices_state; return EPOLLIN | EPOLLRDNORM; } return 0; } -union input_seq_state { - struct { - unsigned short pos; - bool mutex_acquired; - }; - void *p; -}; - static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos) { - union input_seq_state *state = (union input_seq_state *)&seq->private; + struct input_seq_state *state = seq->private; int error; - /* We need to fit into seq->private pointer */ - BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private)); - error = mutex_lock_interruptible(&input_mutex); if (error) { state->mutex_acquired = false; @@ -1124,7 +1122,7 @@ static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void input_seq_stop(struct seq_file *seq, void *v) { - union input_seq_state *state = (union input_seq_state *)&seq->private; + struct input_seq_state *state = seq->private; if (state->mutex_acquired) mutex_unlock(&input_mutex); @@ -1210,7 +1208,8 @@ static const struct seq_operations input_devices_seq_ops = { static int input_proc_devices_open(struct inode *inode, struct file *file) { - return seq_open(file, &input_devices_seq_ops); + return seq_open_private(file, &input_devices_seq_ops, + sizeof(struct input_seq_state)); } static const struct proc_ops input_devices_proc_ops = { @@ -1218,17 +1217,14 @@ static const struct proc_ops input_devices_proc_ops = { .proc_poll = input_proc_devices_poll, .proc_read = seq_read, .proc_lseek = seq_lseek, - .proc_release = seq_release, + .proc_release = seq_release_private, }; static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos) { - union input_seq_state *state = (union input_seq_state *)&seq->private; + struct input_seq_state *state = seq->private; int error; - /* We need to fit into seq->private pointer */ - BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private)); - error = mutex_lock_interruptible(&input_mutex); if (error) { state->mutex_acquired = false; @@ -1243,7 +1239,7 @@ static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos) static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - union input_seq_state *state = (union input_seq_state *)&seq->private; + struct input_seq_state *state = seq->private; state->pos = *pos + 1; return seq_list_next(v, &input_handler_list, pos); @@ -1252,7 +1248,7 @@ static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos) static int input_handlers_seq_show(struct seq_file *seq, void *v) { struct input_handler *handler = container_of(v, struct input_handler, node); - union input_seq_state *state = (union input_seq_state *)&seq->private; + struct input_seq_state *state = seq->private; seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name); if (handler->filter) @@ -1273,14 +1269,15 @@ static const struct seq_operations input_handlers_seq_ops = { static int input_proc_handlers_open(struct inode *inode, struct file *file) { - return seq_open(file, &input_handlers_seq_ops); + return seq_open_private(file, &input_handlers_seq_ops, + sizeof(struct input_seq_state)); } static const struct proc_ops input_handlers_proc_ops = { .proc_open = input_proc_handlers_open, .proc_read = seq_read, .proc_lseek = seq_lseek, - .proc_release = seq_release, + .proc_release = seq_release_private, }; static int __init input_proc_init(void) From ceaa5e80db7c73321c89fda39c3c8c78817aecd2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:54 +0200 Subject: [PATCH 12/19] ocfs2: store cookie in private data Store the cookie to detect concurrent seeks on directories in file->private_data. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-13-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/ocfs2/dir.c | 3 ++- fs/ocfs2/file.c | 11 +++++++++-- fs/ocfs2/file.h | 1 + 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f0beb173dbba..ccef3f42b333 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1932,6 +1932,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx) { int error = 0; struct inode *inode = file_inode(file); + struct ocfs2_file_private *fp = file->private_data; int lock_level = 0; trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -1952,7 +1953,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx) goto bail_nolock; } - error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false); + error = ocfs2_dir_foreach_blk(inode, &fp->cookie, ctx, false); ocfs2_inode_unlock(inode, lock_level); if (error) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ccc57038a977..115ab2172820 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2750,6 +2750,13 @@ static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in, return remapped > 0 ? remapped : ret; } +static loff_t ocfs2_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct ocfs2_file_private *fp = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &fp->cookie); +} + const struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, @@ -2797,7 +2804,7 @@ const struct file_operations ocfs2_fops = { WRAP_DIR_ITER(ocfs2_readdir) // FIXME! const struct file_operations ocfs2_dops = { - .llseek = generic_file_llseek, + .llseek = ocfs2_dir_llseek, .read = generic_read_dir, .iterate_shared = shared_ocfs2_readdir, .fsync = ocfs2_sync_file, @@ -2843,7 +2850,7 @@ const struct file_operations ocfs2_fops_no_plocks = { }; const struct file_operations ocfs2_dops_no_plocks = { - .llseek = generic_file_llseek, + .llseek = ocfs2_dir_llseek, .read = generic_read_dir, .iterate_shared = shared_ocfs2_readdir, .fsync = ocfs2_sync_file, diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 8e53e4ac1120..41e65e45a9f3 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -20,6 +20,7 @@ struct ocfs2_alloc_context; enum ocfs2_alloc_restarted; struct ocfs2_file_private { + u64 cookie; struct file *fp_file; struct mutex fp_mutex; struct ocfs2_lock_res fp_flock; From b4dba2efa8106002076b070fdff24ed6bf1ea87b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:55 +0200 Subject: [PATCH 13/19] proc: store cookie in private data Store the cookie to detect concurrent seeks on directories in file->private_data. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-14-6d3e4816aa7b@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/proc/base.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 72a1acd03675..988f6bbfc4bd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3870,12 +3870,12 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; - /* f_version caches the tgid value that the last readdir call couldn't - * return. lseek aka telldir automagically resets f_version to 0. + /* We cache the tgid value that the last readdir call couldn't + * return and lseek resets it to 0. */ ns = proc_pid_ns(inode->i_sb); - tid = (int)file->f_version; - file->f_version = 0; + tid = (int)(intptr_t)file->private_data; + file->private_data = NULL; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { @@ -3890,7 +3890,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first * pid for the next readir call */ - file->f_version = (u64)tid; + file->private_data = (void *)(intptr_t)tid; put_task_struct(task); break; } @@ -3915,6 +3915,24 @@ static int proc_task_getattr(struct mnt_idmap *idmap, return 0; } +/* + * proc_task_readdir() set @file->private_data to a positive integer + * value, so casting that to u64 is safe. generic_llseek_cookie() will + * set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is + * here to catch any unexpected change in behavior either in + * proc_task_readdir() or generic_llseek_cookie(). + */ +static loff_t proc_dir_llseek(struct file *file, loff_t offset, int whence) +{ + u64 cookie = (u64)(intptr_t)file->private_data; + loff_t off; + + off = generic_llseek_cookie(file, offset, whence, &cookie); + WARN_ON_ONCE(cookie > INT_MAX); + file->private_data = (void *)(intptr_t)cookie; /* serialized by f_pos_lock */ + return off; +} + static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, @@ -3925,7 +3943,7 @@ static const struct inode_operations proc_task_inode_operations = { static const struct file_operations proc_task_operations = { .read = generic_read_dir, .iterate_shared = proc_task_readdir, - .llseek = generic_file_llseek, + .llseek = proc_dir_llseek, }; void __init set_proc_pid_nlink(void) From 3dd4624ffcd2e6e5d2cee5a6c234774ce27e1f04 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:56 +0200 Subject: [PATCH 14/19] udf: store cookie in private data Store the cookie to detect concurrent seeks on directories in file->private_data. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-15-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/udf/dir.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/fs/udf/dir.c b/fs/udf/dir.c index f94f45fe2c91..5023dfe191e8 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -60,7 +60,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) * identifying beginning of dir entry (names are under user control), * we need to scan the directory from the beginning. */ - if (!inode_eq_iversion(dir, file->f_version)) { + if (!inode_eq_iversion(dir, *(u64 *)file->private_data)) { emit_pos = nf_pos; nf_pos = 0; } else { @@ -122,15 +122,37 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) udf_fiiter_release(&iter); out: if (pos_valid) - file->f_version = inode_query_iversion(dir); + *(u64 *)file->private_data = inode_query_iversion(dir); kfree(fname); return ret; } +static int udf_dir_open(struct inode *inode, struct file *file) +{ + file->private_data = kzalloc(sizeof(u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int udf_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static loff_t udf_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_llseek_cookie(file, offset, whence, + (u64 *)file->private_data); +} + /* readdir and lookup functions */ const struct file_operations udf_dir_operations = { - .llseek = generic_file_llseek, + .open = udf_dir_open, + .release = udf_dir_release, + .llseek = udf_dir_llseek, .read = generic_read_dir, .iterate_shared = udf_readdir, .unlocked_ioctl = udf_ioctl, From 0bea8287df6c86cacaf34eef167f23dcc5dbcede Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:57 +0200 Subject: [PATCH 15/19] ufs: store cookie in private data Store the cookie to detect concurrent seeks on directories in file->private_data. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-16-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/ufs/dir.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 61f25d3cf3f7..335f0ae529b4 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -435,7 +435,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); - bool need_revalidate = !inode_eq_iversion(inode, file->f_version); + bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data); unsigned flags = UFS_SB(sb)->s_flags; UFSD("BEGIN\n"); @@ -462,7 +462,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask); ctx->pos = (n<f_version = inode_query_iversion(inode); + *(u64 *)file->private_data = inode_query_iversion(inode); need_revalidate = false; } de = (struct ufs_dir_entry *)(kaddr+offset); @@ -646,9 +646,31 @@ int ufs_empty_dir(struct inode * inode) return 0; } +static int ufs_dir_open(struct inode *inode, struct file *file) +{ + file->private_data = kzalloc(sizeof(u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int ufs_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static loff_t ufs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_llseek_cookie(file, offset, whence, + (u64 *)file->private_data); +} + const struct file_operations ufs_dir_operations = { + .open = ufs_dir_open, + .release = ufs_dir_release, .read = generic_read_dir, .iterate_shared = ufs_readdir, .fsync = generic_file_fsync, - .llseek = generic_file_llseek, + .llseek = ufs_dir_llseek, }; From 1146e5a69efca76501378f748388fd7742ad09cf Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:58 +0200 Subject: [PATCH 16/19] ubifs: store cookie in private data Store the cookie to detect concurrent seeks on directories in file->private_data. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-17-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/ubifs/dir.c | 64 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index c77ea57fe696..fda82f3e16e8 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -555,6 +555,11 @@ static unsigned int vfs_dent_type(uint8_t type) return 0; } +struct ubifs_dir_data { + struct ubifs_dent_node *dent; + u64 cookie; +}; + /* * The classical Unix view for directory is that it is a linear array of * (name, inode number) entries. Linux/VFS assumes this model as well. @@ -582,6 +587,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) struct inode *dir = file_inode(file); struct ubifs_info *c = dir->i_sb->s_fs_info; bool encrypted = IS_ENCRYPTED(dir); + struct ubifs_dir_data *data = file->private_data; dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos); @@ -604,27 +610,27 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) fstr_real_len = fstr.len; } - if (file->f_version == 0) { + if (data->cookie == 0) { /* - * The file was seek'ed, which means that @file->private_data + * The file was seek'ed, which means that @data->dent * is now invalid. This may also be just the first * 'ubifs_readdir()' invocation, in which case - * @file->private_data is NULL, and the below code is + * @data->dent is NULL, and the below code is * basically a no-op. */ - kfree(file->private_data); - file->private_data = NULL; + kfree(data->dent); + data->dent = NULL; } /* - * 'generic_file_llseek()' unconditionally sets @file->f_version to - * zero, and we use this for detecting whether the file was seek'ed. + * 'ubifs_dir_llseek()' sets @data->cookie to zero, and we use this + * for detecting whether the file was seek'ed. */ - file->f_version = 1; + data->cookie = 1; /* File positions 0 and 1 correspond to "." and ".." */ if (ctx->pos < 2) { - ubifs_assert(c, !file->private_data); + ubifs_assert(c, !data->dent); if (!dir_emit_dots(file, ctx)) { if (encrypted) fscrypt_fname_free_buffer(&fstr); @@ -641,10 +647,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; } - dent = file->private_data; + dent = data->dent; if (!dent) { /* * The directory was seek'ed to and is now readdir'ed. @@ -658,7 +664,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; } while (1) { @@ -701,15 +707,15 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } - kfree(file->private_data); + kfree(data->dent); ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; cond_resched(); } out: - kfree(file->private_data); - file->private_data = NULL; + kfree(data->dent); + data->dent = NULL; if (encrypted) fscrypt_fname_free_buffer(&fstr); @@ -733,7 +739,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) /* Free saved readdir() state when the directory is closed */ static int ubifs_dir_release(struct inode *dir, struct file *file) { - kfree(file->private_data); + struct ubifs_dir_data *data = file->private_data; + + kfree(data->dent); + kfree(data); file->private_data = NULL; return 0; } @@ -1712,6 +1721,24 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path, return 0; } +static int ubifs_dir_open(struct inode *inode, struct file *file) +{ + struct ubifs_dir_data *data; + + data = kzalloc(sizeof(struct ubifs_dir_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + file->private_data = data; + return 0; +} + +static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct ubifs_dir_data *data = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &data->cookie); +} + const struct inode_operations ubifs_dir_inode_operations = { .lookup = ubifs_lookup, .create = ubifs_create, @@ -1732,7 +1759,8 @@ const struct inode_operations ubifs_dir_inode_operations = { }; const struct file_operations ubifs_dir_operations = { - .llseek = generic_file_llseek, + .open = ubifs_dir_open, + .llseek = ubifs_dir_llseek, .release = ubifs_dir_release, .read = generic_read_dir, .iterate_shared = ubifs_readdir, From 5e9b50dea970ae6d3e1309d4254157099734a2af Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:04:59 +0200 Subject: [PATCH 17/19] fs: add f_pipe Only regular files with FMODE_ATOMIC_POS and directories need f_pos_lock. Place a new f_pipe member in a union with f_pos_lock that they can use and make them stop abusing f_version in follow-up patches. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-18-6d3e4816aa7b@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/file_table.c | 7 +++++++ include/linux/fs.h | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/file_table.c b/fs/file_table.c index 3ef558f27a1c..7ce4d5dac080 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -156,6 +156,13 @@ static int init_file(struct file *f, int flags, const struct cred *cred) } spin_lock_init(&f->f_lock); + /* + * Note that f_pos_lock is only used for files raising + * FMODE_ATOMIC_POS and directories. Other files such as pipes + * don't need it and since f_pos_lock is in a union may reuse + * the space for other purposes. They are expected to initialize + * the respective member when opening the file. + */ mutex_init(&f->f_pos_lock); f->f_flags = flags; f->f_mode = OPEN_FMODE(flags); diff --git a/include/linux/fs.h b/include/linux/fs.h index 3e6b3c1afb31..ca4925008244 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1001,6 +1001,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_cred: stashed credentials of creator/opener * @f_path: path of the file * @f_pos_lock: lock protecting file position + * @f_pipe: specific to pipes * @f_pos: file position * @f_version: file version * @f_security: LSM security context of this file @@ -1026,7 +1027,12 @@ struct file { const struct cred *f_cred; /* --- cacheline 1 boundary (64 bytes) --- */ struct path f_path; - struct mutex f_pos_lock; + union { + /* regular files (with FMODE_ATOMIC_POS) and directories */ + struct mutex f_pos_lock; + /* pipes */ + u64 f_pipe; + }; loff_t f_pos; u64 f_version; /* --- cacheline 2 boundary (128 bytes) --- */ From 5a957bbac3ab9808a8df711a269e4d18f84e9e4a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:05:00 +0200 Subject: [PATCH 18/19] pipe: use f_pipe Pipes use f_version to defer poll notifications until a write has been observed. Since multiple file's refer to the same struct pipe_inode_info in their ->private_data moving it into their isn't feasible since we would need to introduce an additional pointer indirection. However, since pipes don't require f_pos_lock we placed a new f_pipe member into a union with f_pos_lock that pipes can use. This is similar to what we already do for struct inode where we have additional fields per file type. This will allow us to fully remove f_version in the next step. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-19-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/pipe.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 7dff2aa50a6d..d3735f1d6f00 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -686,7 +686,7 @@ pipe_poll(struct file *filp, poll_table *wait) if (filp->f_mode & FMODE_READ) { if (!pipe_empty(head, tail)) mask |= EPOLLIN | EPOLLRDNORM; - if (!pipe->writers && filp->f_version != pipe->w_counter) + if (!pipe->writers && filp->f_pipe != pipe->w_counter) mask |= EPOLLHUP; } @@ -945,6 +945,7 @@ int create_pipe_files(struct file **res, int flags) } f->private_data = inode->i_pipe; + f->f_pipe = 0; res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), &pipefifo_fops); @@ -954,6 +955,7 @@ int create_pipe_files(struct file **res, int flags) return PTR_ERR(res[0]); } res[0]->private_data = inode->i_pipe; + res[0]->f_pipe = 0; res[1] = f; stream_open(inode, res[0]); stream_open(inode, res[1]); @@ -1108,7 +1110,7 @@ static int fifo_open(struct inode *inode, struct file *filp) bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; int ret; - filp->f_version = 0; + filp->f_pipe = 0; spin_lock(&inode->i_lock); if (inode->i_pipe) { @@ -1155,7 +1157,7 @@ static int fifo_open(struct inode *inode, struct file *filp) if ((filp->f_flags & O_NONBLOCK)) { /* suppress EPOLLHUP until we have * seen a writer */ - filp->f_version = pipe->w_counter; + filp->f_pipe = pipe->w_counter; } else { if (wait_for_partner(pipe, &pipe->w_counter)) goto err_rd; From 11068e0b64cbb540b96e577fcca0926242ecaf58 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 30 Aug 2024 15:05:01 +0200 Subject: [PATCH 19/19] fs: remove f_version Now that detecting concurrent seeks is done by the filesystems that require it we can remove f_version and free up 8 bytes for future extensions. Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-20-6d3e4816aa7b@kernel.org Reviewed-by: Jan Kara Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/read_write.c | 9 ++++----- include/linux/fs.h | 4 +--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index fb519e55c8e8..b19cce8e55b9 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -62,7 +62,8 @@ static loff_t vfs_setpos_cookie(struct file *file, loff_t offset, if (offset != file->f_pos) { file->f_pos = offset; - *cookie = 0; + if (cookie) + *cookie = 0; } return offset; } @@ -81,7 +82,7 @@ static loff_t vfs_setpos_cookie(struct file *file, loff_t offset, */ loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) { - return vfs_setpos_cookie(file, offset, maxsize, &file->f_version); + return vfs_setpos_cookie(file, offset, maxsize, NULL); } EXPORT_SYMBOL(vfs_setpos); @@ -364,10 +365,8 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence) } retval = -EINVAL; if (offset >= 0 || unsigned_offsets(file)) { - if (offset != file->f_pos) { + if (offset != file->f_pos) file->f_pos = offset; - file->f_version = 0; - } retval = offset; } out: diff --git a/include/linux/fs.h b/include/linux/fs.h index ca4925008244..7e11ce172140 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1003,7 +1003,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_pos_lock: lock protecting file position * @f_pipe: specific to pipes * @f_pos: file position - * @f_version: file version * @f_security: LSM security context of this file * @f_owner: file owner * @f_wb_err: writeback error @@ -1034,11 +1033,10 @@ struct file { u64 f_pipe; }; loff_t f_pos; - u64 f_version; - /* --- cacheline 2 boundary (128 bytes) --- */ #ifdef CONFIG_SECURITY void *f_security; #endif + /* --- cacheline 2 boundary (128 bytes) --- */ struct fown_struct *f_owner; errseq_t f_wb_err; errseq_t f_sb_err;