Merge patch series "file: remove f_version"

Christian Brauner <brauner@kernel.org> says:

The f_version member in struct file isn't particularly well-defined. It
is mainly used as a cookie to detect concurrent seeks when iterating
directories. But it is also abused by some subsystems for completely
unrelated things.

It is mostly a directory specific thing that doesn't really need to live
in struct file and with its wonky semantics it really lacks a specific
function.

For pipes, f_version is (ab)used to defer poll notifications until a
write has happened. And struct pipe_inode_info is used by multiple
struct files in their ->private_data so there's no chance of pushing
that down into file->private_data without introducing another pointer
indirection.

But this should be a solvable problem. Only regular files with
FMODE_ATOMIC_POS and directories require f_pos_lock. Pipes and other
files don't. So this adds a union into struct file encompassing
f_pos_lock and a pipe specific f_pipe member that pipes can use. This
union of course can be extended to other file types and is similar to
what we do in struct inode already.

* patches from https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-0-6d3e4816aa7b@kernel.org:
  fs: remove f_version
  pipe: use f_pipe
  fs: add f_pipe
  ubifs: store cookie in private data
  ufs: store cookie in private data
  udf: store cookie in private data
  proc: store cookie in private data
  ocfs2: store cookie in private data
  input: remove f_version abuse
  ext4: store cookie in private data
  ext2: store cookie in private data
  affs: store cookie in private data
  fs: add generic_llseek_cookie()
  fs: use must_set_pos()
  fs: add must_set_pos()
  fs: add vfs_setpos_cookie()
  s390: remove unused f_version
  ceph: remove unused f_version
  adi: remove unused f_version
  file: remove pointless comment

Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-0-6d3e4816aa7b@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2024-09-08 08:58:01 +02:00
commit 24a988f75c
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
20 changed files with 414 additions and 152 deletions

View File

@ -196,7 +196,6 @@ static loff_t adi_llseek(struct file *file, loff_t offset, int whence)
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
ret = offset;
}

View File

@ -1079,33 +1079,31 @@ static inline void input_wakeup_procfs_readers(void)
wake_up(&input_devices_poll_wait);
}
struct input_seq_state {
unsigned short pos;
bool mutex_acquired;
int input_devices_state;
};
static __poll_t input_proc_devices_poll(struct file *file, poll_table *wait)
{
struct seq_file *seq = file->private_data;
struct input_seq_state *state = seq->private;
poll_wait(file, &input_devices_poll_wait, wait);
if (file->f_version != input_devices_state) {
file->f_version = input_devices_state;
if (state->input_devices_state != input_devices_state) {
state->input_devices_state = input_devices_state;
return EPOLLIN | EPOLLRDNORM;
}
return 0;
}
union input_seq_state {
struct {
unsigned short pos;
bool mutex_acquired;
};
void *p;
};
static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos)
{
union input_seq_state *state = (union input_seq_state *)&seq->private;
struct input_seq_state *state = seq->private;
int error;
/* We need to fit into seq->private pointer */
BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));
error = mutex_lock_interruptible(&input_mutex);
if (error) {
state->mutex_acquired = false;
@ -1124,7 +1122,7 @@ static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void input_seq_stop(struct seq_file *seq, void *v)
{
union input_seq_state *state = (union input_seq_state *)&seq->private;
struct input_seq_state *state = seq->private;
if (state->mutex_acquired)
mutex_unlock(&input_mutex);
@ -1210,7 +1208,8 @@ static const struct seq_operations input_devices_seq_ops = {
static int input_proc_devices_open(struct inode *inode, struct file *file)
{
return seq_open(file, &input_devices_seq_ops);
return seq_open_private(file, &input_devices_seq_ops,
sizeof(struct input_seq_state));
}
static const struct proc_ops input_devices_proc_ops = {
@ -1218,17 +1217,14 @@ static const struct proc_ops input_devices_proc_ops = {
.proc_poll = input_proc_devices_poll,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = seq_release,
.proc_release = seq_release_private,
};
static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
{
union input_seq_state *state = (union input_seq_state *)&seq->private;
struct input_seq_state *state = seq->private;
int error;
/* We need to fit into seq->private pointer */
BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));
error = mutex_lock_interruptible(&input_mutex);
if (error) {
state->mutex_acquired = false;
@ -1243,7 +1239,7 @@ static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
union input_seq_state *state = (union input_seq_state *)&seq->private;
struct input_seq_state *state = seq->private;
state->pos = *pos + 1;
return seq_list_next(v, &input_handler_list, pos);
@ -1252,7 +1248,7 @@ static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static int input_handlers_seq_show(struct seq_file *seq, void *v)
{
struct input_handler *handler = container_of(v, struct input_handler, node);
union input_seq_state *state = (union input_seq_state *)&seq->private;
struct input_seq_state *state = seq->private;
seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name);
if (handler->filter)
@ -1273,14 +1269,15 @@ static const struct seq_operations input_handlers_seq_ops = {
static int input_proc_handlers_open(struct inode *inode, struct file *file)
{
return seq_open(file, &input_handlers_seq_ops);
return seq_open_private(file, &input_handlers_seq_ops,
sizeof(struct input_seq_state));
}
static const struct proc_ops input_handlers_proc_ops = {
.proc_open = input_proc_handlers_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = seq_release,
.proc_release = seq_release_private,
};
static int __init input_proc_init(void)

View File

@ -186,9 +186,6 @@ static loff_t hmcdrv_dev_seek(struct file *fp, loff_t pos, int whence)
if (pos < 0)
return -EINVAL;
if (fp->f_pos != pos)
++fp->f_version;
fp->f_pos = pos;
return pos;
}

View File

@ -17,13 +17,44 @@
#include <linux/iversion.h>
#include "affs.h"
struct affs_dir_data {
unsigned long ino;
u64 cookie;
};
static int affs_readdir(struct file *, struct dir_context *);
static loff_t affs_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct affs_dir_data *data = file->private_data;
return generic_llseek_cookie(file, offset, whence, &data->cookie);
}
static int affs_dir_open(struct inode *inode, struct file *file)
{
struct affs_dir_data *data;
data = kzalloc(sizeof(struct affs_dir_data), GFP_KERNEL);
if (!data)
return -ENOMEM;
file->private_data = data;
return 0;
}
static int affs_dir_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
const struct file_operations affs_dir_operations = {
.open = affs_dir_open,
.read = generic_read_dir,
.llseek = generic_file_llseek,
.llseek = affs_dir_llseek,
.iterate_shared = affs_readdir,
.fsync = affs_file_fsync,
.release = affs_dir_release,
};
/*
@ -45,6 +76,7 @@ static int
affs_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
struct affs_dir_data *data = file->private_data;
struct super_block *sb = inode->i_sb;
struct buffer_head *dir_bh = NULL;
struct buffer_head *fh_bh = NULL;
@ -59,7 +91,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
pr_debug("%s(ino=%lu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos);
if (ctx->pos < 2) {
file->private_data = (void *)0;
data->ino = 0;
if (!dir_emit_dots(file, ctx))
return 0;
}
@ -80,8 +112,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
/* If the directory hasn't changed since the last call to readdir(),
* we can jump directly to where we left off.
*/
ino = (u32)(long)file->private_data;
if (ino && inode_eq_iversion(inode, file->f_version)) {
ino = data->ino;
if (ino && inode_eq_iversion(inode, data->cookie)) {
pr_debug("readdir() left off=%d\n", ino);
goto inside;
}
@ -131,8 +163,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
} while (ino);
}
done:
file->f_version = inode_query_iversion(inode);
file->private_data = (void *)(long)ino;
data->cookie = inode_query_iversion(inode);
data->ino = ino;
affs_brelse(fh_bh);
out_brelse_dir:

View File

@ -707,7 +707,6 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
dfi->file_info.flags &= ~CEPH_F_ATEND;
}
retval = offset;

View File

@ -263,7 +263,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data);
bool has_filetype;
if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
@ -290,7 +290,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
offset = ext2_validate_entry(kaddr, offset, chunk_mask);
ctx->pos = (n<<PAGE_SHIFT) + offset;
}
file->f_version = inode_query_iversion(inode);
*(u64 *)file->private_data = inode_query_iversion(inode);
need_revalidate = false;
}
de = (ext2_dirent *)(kaddr+offset);
@ -703,8 +703,30 @@ int ext2_empty_dir(struct inode *inode)
return 0;
}
static int ext2_dir_open(struct inode *inode, struct file *file)
{
file->private_data = kzalloc(sizeof(u64), GFP_KERNEL);
if (!file->private_data)
return -ENOMEM;
return 0;
}
static int ext2_dir_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static loff_t ext2_dir_llseek(struct file *file, loff_t offset, int whence)
{
return generic_llseek_cookie(file, offset, whence,
(u64 *)file->private_data);
}
const struct file_operations ext2_dir_operations = {
.llseek = generic_file_llseek,
.open = ext2_dir_open,
.release = ext2_dir_release,
.llseek = ext2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = ext2_readdir,
.unlocked_ioctl = ext2_ioctl,

View File

@ -133,6 +133,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
struct dir_private_info *info = file->private_data;
err = fscrypt_prepare_readdir(inode);
if (err)
@ -229,7 +230,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
* readdir(2), then we might be pointing to an invalid
* dirent right now. Scan from the start of the block
* to make sure. */
if (!inode_eq_iversion(inode, file->f_version)) {
if (!inode_eq_iversion(inode, info->cookie)) {
for (i = 0; i < sb->s_blocksize && i < offset; ) {
de = (struct ext4_dir_entry_2 *)
(bh->b_data + i);
@ -249,7 +250,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
offset = i;
ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1))
| offset;
file->f_version = inode_query_iversion(inode);
info->cookie = inode_query_iversion(inode);
}
while (ctx->pos < inode->i_size
@ -384,6 +385,7 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
struct dir_private_info *info = file->private_data;
int dx_dir = is_dx_dir(inode);
loff_t ret, htree_max = ext4_get_htree_eof(file);
@ -392,7 +394,7 @@ static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
htree_max, htree_max);
else
ret = ext4_llseek(file, offset, whence);
file->f_version = inode_peek_iversion(inode) - 1;
info->cookie = inode_peek_iversion(inode) - 1;
return ret;
}
@ -429,18 +431,15 @@ static void free_rb_tree_fname(struct rb_root *root)
*root = RB_ROOT;
}
static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
loff_t pos)
static void ext4_htree_init_dir_info(struct file *filp, loff_t pos)
{
struct dir_private_info *p;
struct dir_private_info *p = filp->private_data;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return NULL;
if (is_dx_dir(file_inode(filp)) && !p->initialized) {
p->curr_hash = pos2maj_hash(filp, pos);
p->curr_minor_hash = pos2min_hash(filp, pos);
return p;
p->initialized = true;
}
}
void ext4_htree_free_dir_info(struct dir_private_info *p)
@ -552,12 +551,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
struct fname *fname;
int ret = 0;
if (!info) {
info = ext4_htree_create_dir_info(file, ctx->pos);
if (!info)
return -ENOMEM;
file->private_data = info;
}
ext4_htree_init_dir_info(file, ctx->pos);
if (ctx->pos == ext4_get_htree_eof(file))
return 0; /* EOF */
@ -590,10 +584,10 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
* cached entries.
*/
if ((!info->curr_node) ||
!inode_eq_iversion(inode, file->f_version)) {
!inode_eq_iversion(inode, info->cookie)) {
info->curr_node = NULL;
free_rb_tree_fname(&info->root);
file->f_version = inode_query_iversion(inode);
info->cookie = inode_query_iversion(inode);
ret = ext4_htree_fill_tree(file, info->curr_hash,
info->curr_minor_hash,
&info->next_hash);
@ -664,7 +658,19 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
return 0;
}
static int ext4_dir_open(struct inode *inode, struct file *file)
{
struct dir_private_info *info;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
file->private_data = info;
return 0;
}
const struct file_operations ext4_dir_operations = {
.open = ext4_dir_open,
.llseek = ext4_dir_llseek,
.read = generic_read_dir,
.iterate_shared = ext4_readdir,

View File

@ -2553,6 +2553,8 @@ struct dir_private_info {
__u32 curr_hash;
__u32 curr_minor_hash;
__u32 next_hash;
u64 cookie;
bool initialized;
};
/* calculate the first block number of the group */

View File

@ -1460,6 +1460,7 @@ int ext4_read_inline_dir(struct file *file,
struct ext4_iloc iloc;
void *dir_buf = NULL;
int dotdot_offset, dotdot_size, extra_offset, extra_size;
struct dir_private_info *info = file->private_data;
ret = ext4_get_inode_loc(inode, &iloc);
if (ret)
@ -1503,12 +1504,12 @@ int ext4_read_inline_dir(struct file *file,
extra_size = extra_offset + inline_size;
/*
* If the version has changed since the last call to
* If the cookie has changed since the last call to
* readdir(2), then we might be pointing to an invalid
* dirent right now. Scan from the start of the inline
* dir to make sure.
*/
if (!inode_eq_iversion(inode, file->f_version)) {
if (!inode_eq_iversion(inode, info->cookie)) {
for (i = 0; i < extra_size && i < offset;) {
/*
* "." is with offset 0 and
@ -1540,7 +1541,7 @@ int ext4_read_inline_dir(struct file *file,
}
offset = i;
ctx->pos = offset;
file->f_version = inode_query_iversion(inode);
info->cookie = inode_query_iversion(inode);
}
while (ctx->pos < extra_size) {

View File

@ -156,6 +156,13 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
}
spin_lock_init(&f->f_lock);
/*
* Note that f_pos_lock is only used for files raising
* FMODE_ATOMIC_POS and directories. Other files such as pipes
* don't need it and since f_pos_lock is in a union may reuse
* the space for other purposes. They are expected to initialize
* the respective member when opening the file.
*/
mutex_init(&f->f_pos_lock);
f->f_flags = flags;
f->f_mode = OPEN_FMODE(flags);

View File

@ -1932,6 +1932,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
{
int error = 0;
struct inode *inode = file_inode(file);
struct ocfs2_file_private *fp = file->private_data;
int lock_level = 0;
trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
@ -1952,7 +1953,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
goto bail_nolock;
}
error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false);
error = ocfs2_dir_foreach_blk(inode, &fp->cookie, ctx, false);
ocfs2_inode_unlock(inode, lock_level);
if (error)

View File

@ -2750,6 +2750,13 @@ static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
return remapped > 0 ? remapped : ret;
}
static loff_t ocfs2_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct ocfs2_file_private *fp = file->private_data;
return generic_llseek_cookie(file, offset, whence, &fp->cookie);
}
const struct inode_operations ocfs2_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
@ -2797,7 +2804,7 @@ const struct file_operations ocfs2_fops = {
WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
const struct file_operations ocfs2_dops = {
.llseek = generic_file_llseek,
.llseek = ocfs2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
@ -2843,7 +2850,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
};
const struct file_operations ocfs2_dops_no_plocks = {
.llseek = generic_file_llseek,
.llseek = ocfs2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,

View File

@ -20,6 +20,7 @@ struct ocfs2_alloc_context;
enum ocfs2_alloc_restarted;
struct ocfs2_file_private {
u64 cookie;
struct file *fp_file;
struct mutex fp_mutex;
struct ocfs2_lock_res fp_flock;

View File

@ -686,7 +686,7 @@ pipe_poll(struct file *filp, poll_table *wait)
if (filp->f_mode & FMODE_READ) {
if (!pipe_empty(head, tail))
mask |= EPOLLIN | EPOLLRDNORM;
if (!pipe->writers && filp->f_version != pipe->w_counter)
if (!pipe->writers && filp->f_pipe != pipe->w_counter)
mask |= EPOLLHUP;
}
@ -945,6 +945,7 @@ int create_pipe_files(struct file **res, int flags)
}
f->private_data = inode->i_pipe;
f->f_pipe = 0;
res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
&pipefifo_fops);
@ -954,6 +955,7 @@ int create_pipe_files(struct file **res, int flags)
return PTR_ERR(res[0]);
}
res[0]->private_data = inode->i_pipe;
res[0]->f_pipe = 0;
res[1] = f;
stream_open(inode, res[0]);
stream_open(inode, res[1]);
@ -1108,7 +1110,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
int ret;
filp->f_version = 0;
filp->f_pipe = 0;
spin_lock(&inode->i_lock);
if (inode->i_pipe) {
@ -1155,7 +1157,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
if ((filp->f_flags & O_NONBLOCK)) {
/* suppress EPOLLHUP until we have
* seen a writer */
filp->f_version = pipe->w_counter;
filp->f_pipe = pipe->w_counter;
} else {
if (wait_for_partner(pipe, &pipe->w_counter))
goto err_rd;

View File

@ -3870,12 +3870,12 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
if (!dir_emit_dots(file, ctx))
return 0;
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
/* We cache the tgid value that the last readdir call couldn't
* return and lseek resets it to 0.
*/
ns = proc_pid_ns(inode->i_sb);
tid = (int)file->f_version;
file->f_version = 0;
tid = (int)(intptr_t)file->private_data;
file->private_data = NULL;
for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
task;
task = next_tid(task), ctx->pos++) {
@ -3890,7 +3890,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
proc_task_instantiate, task, NULL)) {
/* returning this tgid failed, save it as the first
* pid for the next readir call */
file->f_version = (u64)tid;
file->private_data = (void *)(intptr_t)tid;
put_task_struct(task);
break;
}
@ -3915,6 +3915,24 @@ static int proc_task_getattr(struct mnt_idmap *idmap,
return 0;
}
/*
* proc_task_readdir() set @file->private_data to a positive integer
* value, so casting that to u64 is safe. generic_llseek_cookie() will
* set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is
* here to catch any unexpected change in behavior either in
* proc_task_readdir() or generic_llseek_cookie().
*/
static loff_t proc_dir_llseek(struct file *file, loff_t offset, int whence)
{
u64 cookie = (u64)(intptr_t)file->private_data;
loff_t off;
off = generic_llseek_cookie(file, offset, whence, &cookie);
WARN_ON_ONCE(cookie > INT_MAX);
file->private_data = (void *)(intptr_t)cookie; /* serialized by f_pos_lock */
return off;
}
static const struct inode_operations proc_task_inode_operations = {
.lookup = proc_task_lookup,
.getattr = proc_task_getattr,
@ -3925,7 +3943,7 @@ static const struct inode_operations proc_task_inode_operations = {
static const struct file_operations proc_task_operations = {
.read = generic_read_dir,
.iterate_shared = proc_task_readdir,
.llseek = generic_file_llseek,
.llseek = proc_dir_llseek,
};
void __init set_proc_pid_nlink(void)

View File

@ -39,6 +39,35 @@ static inline bool unsigned_offsets(struct file *file)
return file->f_mode & FMODE_UNSIGNED_OFFSET;
}
/**
* vfs_setpos_cookie - update the file offset for lseek and reset cookie
* @file: file structure in question
* @offset: file offset to seek to
* @maxsize: maximum file size
* @cookie: cookie to reset
*
* Update the file offset to the value specified by @offset if the given
* offset is valid and it is not equal to the current file offset and
* reset the specified cookie to indicate that a seek happened.
*
* Return the specified offset on success and -EINVAL on invalid offset.
*/
static loff_t vfs_setpos_cookie(struct file *file, loff_t offset,
loff_t maxsize, u64 *cookie)
{
if (offset < 0 && !unsigned_offsets(file))
return -EINVAL;
if (offset > maxsize)
return -EINVAL;
if (offset != file->f_pos) {
file->f_pos = offset;
if (cookie)
*cookie = 0;
}
return offset;
}
/**
* vfs_setpos - update the file offset for lseek
* @file: file structure in question
@ -53,19 +82,63 @@ static inline bool unsigned_offsets(struct file *file)
*/
loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
{
if (offset < 0 && !unsigned_offsets(file))
return -EINVAL;
if (offset > maxsize)
return -EINVAL;
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
}
return offset;
return vfs_setpos_cookie(file, offset, maxsize, NULL);
}
EXPORT_SYMBOL(vfs_setpos);
/**
* must_set_pos - check whether f_pos has to be updated
* @file: file to seek on
* @offset: offset to use
* @whence: type of seek operation
* @eof: end of file
*
* Check whether f_pos needs to be updated and update @offset according
* to @whence.
*
* Return: 0 if f_pos doesn't need to be updated, 1 if f_pos has to be
* updated, and negative error code on failure.
*/
static int must_set_pos(struct file *file, loff_t *offset, int whence, loff_t eof)
{
switch (whence) {
case SEEK_END:
*offset += eof;
break;
case SEEK_CUR:
/*
* Here we special-case the lseek(fd, 0, SEEK_CUR)
* position-querying operation. Avoid rewriting the "same"
* f_pos value back to the file because a concurrent read(),
* write() or lseek() might have altered it
*/
if (*offset == 0) {
*offset = file->f_pos;
return 0;
}
break;
case SEEK_DATA:
/*
* In the generic case the entire file is data, so as long as
* offset isn't at the end of the file then the offset is data.
*/
if ((unsigned long long)*offset >= eof)
return -ENXIO;
break;
case SEEK_HOLE:
/*
* There is a virtual hole at the end of the file, so as long as
* offset isn't i_size or larger, return i_size.
*/
if ((unsigned long long)*offset >= eof)
return -ENXIO;
*offset = eof;
break;
}
return 1;
}
/**
* generic_file_llseek_size - generic llseek implementation for regular files
* @file: file structure to seek on
@ -86,51 +159,73 @@ loff_t
generic_file_llseek_size(struct file *file, loff_t offset, int whence,
loff_t maxsize, loff_t eof)
{
switch (whence) {
case SEEK_END:
offset += eof;
break;
case SEEK_CUR:
/*
* Here we special-case the lseek(fd, 0, SEEK_CUR)
* position-querying operation. Avoid rewriting the "same"
* f_pos value back to the file because a concurrent read(),
* write() or lseek() might have altered it
*/
if (offset == 0)
return file->f_pos;
/*
* f_lock protects against read/modify/write race with other
* SEEK_CURs. Note that parallel writes and reads behave
* like SEEK_SET.
*/
spin_lock(&file->f_lock);
offset = vfs_setpos(file, file->f_pos + offset, maxsize);
spin_unlock(&file->f_lock);
int ret;
ret = must_set_pos(file, &offset, whence, eof);
if (ret < 0)
return ret;
if (ret == 0)
return offset;
case SEEK_DATA:
if (whence == SEEK_CUR) {
/*
* In the generic case the entire file is data, so as long as
* offset isn't at the end of the file then the offset is data.
* f_lock protects against read/modify/write race with
* other SEEK_CURs. Note that parallel writes and reads
* behave like SEEK_SET.
*/
if ((unsigned long long)offset >= eof)
return -ENXIO;
break;
case SEEK_HOLE:
/*
* There is a virtual hole at the end of the file, so as long as
* offset isn't i_size or larger, return i_size.
*/
if ((unsigned long long)offset >= eof)
return -ENXIO;
offset = eof;
break;
guard(spinlock)(&file->f_lock);
return vfs_setpos(file, file->f_pos + offset, maxsize);
}
return vfs_setpos(file, offset, maxsize);
}
EXPORT_SYMBOL(generic_file_llseek_size);
/**
* generic_llseek_cookie - versioned llseek implementation
* @file: file structure to seek on
* @offset: file offset to seek to
* @whence: type of seek
* @cookie: cookie to update
*
* See generic_file_llseek for a general description and locking assumptions.
*
* In contrast to generic_file_llseek, this function also resets a
* specified cookie to indicate a seek took place.
*/
loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence,
u64 *cookie)
{
struct inode *inode = file->f_mapping->host;
loff_t maxsize = inode->i_sb->s_maxbytes;
loff_t eof = i_size_read(inode);
int ret;
if (WARN_ON_ONCE(!cookie))
return -EINVAL;
/*
* Require that this is only used for directories that guarantee
* synchronization between readdir and seek so that an update to
* @cookie is correctly synchronized with concurrent readdir.
*/
if (WARN_ON_ONCE(!(file->f_mode & FMODE_ATOMIC_POS)))
return -EINVAL;
ret = must_set_pos(file, &offset, whence, eof);
if (ret < 0)
return ret;
if (ret == 0)
return offset;
/* No need to hold f_lock because we know that f_pos_lock is held. */
if (whence == SEEK_CUR)
return vfs_setpos_cookie(file, file->f_pos + offset, maxsize, cookie);
return vfs_setpos_cookie(file, offset, maxsize, cookie);
}
EXPORT_SYMBOL(generic_llseek_cookie);
/**
* generic_file_llseek - generic llseek implementation for regular files
* @file: file structure to seek on
@ -270,10 +365,8 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
}
retval = -EINVAL;
if (offset >= 0 || unsigned_offsets(file)) {
if (offset != file->f_pos) {
if (offset != file->f_pos)
file->f_pos = offset;
file->f_version = 0;
}
retval = offset;
}
out:

View File

@ -555,6 +555,11 @@ static unsigned int vfs_dent_type(uint8_t type)
return 0;
}
struct ubifs_dir_data {
struct ubifs_dent_node *dent;
u64 cookie;
};
/*
* The classical Unix view for directory is that it is a linear array of
* (name, inode number) entries. Linux/VFS assumes this model as well.
@ -582,6 +587,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
struct inode *dir = file_inode(file);
struct ubifs_info *c = dir->i_sb->s_fs_info;
bool encrypted = IS_ENCRYPTED(dir);
struct ubifs_dir_data *data = file->private_data;
dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos);
@ -604,27 +610,27 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
fstr_real_len = fstr.len;
}
if (file->f_version == 0) {
if (data->cookie == 0) {
/*
* The file was seek'ed, which means that @file->private_data
* The file was seek'ed, which means that @data->dent
* is now invalid. This may also be just the first
* 'ubifs_readdir()' invocation, in which case
* @file->private_data is NULL, and the below code is
* @data->dent is NULL, and the below code is
* basically a no-op.
*/
kfree(file->private_data);
file->private_data = NULL;
kfree(data->dent);
data->dent = NULL;
}
/*
* 'generic_file_llseek()' unconditionally sets @file->f_version to
* zero, and we use this for detecting whether the file was seek'ed.
* 'ubifs_dir_llseek()' sets @data->cookie to zero, and we use this
* for detecting whether the file was seek'ed.
*/
file->f_version = 1;
data->cookie = 1;
/* File positions 0 and 1 correspond to "." and ".." */
if (ctx->pos < 2) {
ubifs_assert(c, !file->private_data);
ubifs_assert(c, !data->dent);
if (!dir_emit_dots(file, ctx)) {
if (encrypted)
fscrypt_fname_free_buffer(&fstr);
@ -641,10 +647,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
}
ctx->pos = key_hash_flash(c, &dent->key);
file->private_data = dent;
data->dent = dent;
}
dent = file->private_data;
dent = data->dent;
if (!dent) {
/*
* The directory was seek'ed to and is now readdir'ed.
@ -658,7 +664,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
goto out;
}
ctx->pos = key_hash_flash(c, &dent->key);
file->private_data = dent;
data->dent = dent;
}
while (1) {
@ -701,15 +707,15 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
goto out;
}
kfree(file->private_data);
kfree(data->dent);
ctx->pos = key_hash_flash(c, &dent->key);
file->private_data = dent;
data->dent = dent;
cond_resched();
}
out:
kfree(file->private_data);
file->private_data = NULL;
kfree(data->dent);
data->dent = NULL;
if (encrypted)
fscrypt_fname_free_buffer(&fstr);
@ -733,7 +739,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
/* Free saved readdir() state when the directory is closed */
static int ubifs_dir_release(struct inode *dir, struct file *file)
{
kfree(file->private_data);
struct ubifs_dir_data *data = file->private_data;
kfree(data->dent);
kfree(data);
file->private_data = NULL;
return 0;
}
@ -1712,6 +1721,24 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path,
return 0;
}
static int ubifs_dir_open(struct inode *inode, struct file *file)
{
struct ubifs_dir_data *data;
data = kzalloc(sizeof(struct ubifs_dir_data), GFP_KERNEL);
if (!data)
return -ENOMEM;
file->private_data = data;
return 0;
}
static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct ubifs_dir_data *data = file->private_data;
return generic_llseek_cookie(file, offset, whence, &data->cookie);
}
const struct inode_operations ubifs_dir_inode_operations = {
.lookup = ubifs_lookup,
.create = ubifs_create,
@ -1732,7 +1759,8 @@ const struct inode_operations ubifs_dir_inode_operations = {
};
const struct file_operations ubifs_dir_operations = {
.llseek = generic_file_llseek,
.open = ubifs_dir_open,
.llseek = ubifs_dir_llseek,
.release = ubifs_dir_release,
.read = generic_read_dir,
.iterate_shared = ubifs_readdir,

View File

@ -60,7 +60,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
* identifying beginning of dir entry (names are under user control),
* we need to scan the directory from the beginning.
*/
if (!inode_eq_iversion(dir, file->f_version)) {
if (!inode_eq_iversion(dir, *(u64 *)file->private_data)) {
emit_pos = nf_pos;
nf_pos = 0;
} else {
@ -122,15 +122,37 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
udf_fiiter_release(&iter);
out:
if (pos_valid)
file->f_version = inode_query_iversion(dir);
*(u64 *)file->private_data = inode_query_iversion(dir);
kfree(fname);
return ret;
}
static int udf_dir_open(struct inode *inode, struct file *file)
{
file->private_data = kzalloc(sizeof(u64), GFP_KERNEL);
if (!file->private_data)
return -ENOMEM;
return 0;
}
static int udf_dir_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static loff_t udf_dir_llseek(struct file *file, loff_t offset, int whence)
{
return generic_llseek_cookie(file, offset, whence,
(u64 *)file->private_data);
}
/* readdir and lookup functions */
const struct file_operations udf_dir_operations = {
.llseek = generic_file_llseek,
.open = udf_dir_open,
.release = udf_dir_release,
.llseek = udf_dir_llseek,
.read = generic_read_dir,
.iterate_shared = udf_readdir,
.unlocked_ioctl = udf_ioctl,

View File

@ -435,7 +435,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data);
unsigned flags = UFS_SB(sb)->s_flags;
UFSD("BEGIN\n");
@ -462,7 +462,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask);
ctx->pos = (n<<PAGE_SHIFT) + offset;
}
file->f_version = inode_query_iversion(inode);
*(u64 *)file->private_data = inode_query_iversion(inode);
need_revalidate = false;
}
de = (struct ufs_dir_entry *)(kaddr+offset);
@ -646,9 +646,31 @@ int ufs_empty_dir(struct inode * inode)
return 0;
}
static int ufs_dir_open(struct inode *inode, struct file *file)
{
file->private_data = kzalloc(sizeof(u64), GFP_KERNEL);
if (!file->private_data)
return -ENOMEM;
return 0;
}
static int ufs_dir_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static loff_t ufs_dir_llseek(struct file *file, loff_t offset, int whence)
{
return generic_llseek_cookie(file, offset, whence,
(u64 *)file->private_data);
}
const struct file_operations ufs_dir_operations = {
.open = ufs_dir_open,
.release = ufs_dir_release,
.read = generic_read_dir,
.iterate_shared = ufs_readdir,
.fsync = generic_file_fsync,
.llseek = generic_file_llseek,
.llseek = ufs_dir_llseek,
};

View File

@ -1001,8 +1001,8 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_cred: stashed credentials of creator/opener
* @f_path: path of the file
* @f_pos_lock: lock protecting file position
* @f_pipe: specific to pipes
* @f_pos: file position
* @f_version: file version
* @f_security: LSM security context of this file
* @f_owner: file owner
* @f_wb_err: writeback error
@ -1026,13 +1026,17 @@ struct file {
const struct cred *f_cred;
/* --- cacheline 1 boundary (64 bytes) --- */
struct path f_path;
union {
/* regular files (with FMODE_ATOMIC_POS) and directories */
struct mutex f_pos_lock;
/* pipes */
u64 f_pipe;
};
loff_t f_pos;
u64 f_version;
/* --- cacheline 2 boundary (128 bytes) --- */
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* --- cacheline 2 boundary (128 bytes) --- */
struct fown_struct *f_owner;
errseq_t f_wb_err;
errseq_t f_sb_err;
@ -3202,6 +3206,8 @@ extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
int whence, loff_t maxsize, loff_t eof);
loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence,
u64 *cookie);
extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
int whence, loff_t size);
extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);