diff --git a/drivers/char/adi.c b/drivers/char/adi.c index 751d7cc0da1b..c091a0282ad0 100644 --- a/drivers/char/adi.c +++ b/drivers/char/adi.c @@ -196,7 +196,6 @@ static loff_t adi_llseek(struct file *file, loff_t offset, int whence) if (offset != file->f_pos) { file->f_pos = offset; - file->f_version = 0; ret = offset; } diff --git a/drivers/input/input.c b/drivers/input/input.c index 54c57b267b25..19ea1888da9f 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1079,33 +1079,31 @@ static inline void input_wakeup_procfs_readers(void) wake_up(&input_devices_poll_wait); } +struct input_seq_state { + unsigned short pos; + bool mutex_acquired; + int input_devices_state; +}; + static __poll_t input_proc_devices_poll(struct file *file, poll_table *wait) { + struct seq_file *seq = file->private_data; + struct input_seq_state *state = seq->private; + poll_wait(file, &input_devices_poll_wait, wait); - if (file->f_version != input_devices_state) { - file->f_version = input_devices_state; + if (state->input_devices_state != input_devices_state) { + state->input_devices_state = input_devices_state; return EPOLLIN | EPOLLRDNORM; } return 0; } -union input_seq_state { - struct { - unsigned short pos; - bool mutex_acquired; - }; - void *p; -}; - static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos) { - union input_seq_state *state = (union input_seq_state *)&seq->private; + struct input_seq_state *state = seq->private; int error; - /* We need to fit into seq->private pointer */ - BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private)); - error = mutex_lock_interruptible(&input_mutex); if (error) { state->mutex_acquired = false; @@ -1124,7 +1122,7 @@ static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void input_seq_stop(struct seq_file *seq, void *v) { - union input_seq_state *state = (union input_seq_state *)&seq->private; + struct input_seq_state *state = seq->private; if (state->mutex_acquired) mutex_unlock(&input_mutex); @@ -1210,7 +1208,8 @@ static const struct seq_operations input_devices_seq_ops = { static int input_proc_devices_open(struct inode *inode, struct file *file) { - return seq_open(file, &input_devices_seq_ops); + return seq_open_private(file, &input_devices_seq_ops, + sizeof(struct input_seq_state)); } static const struct proc_ops input_devices_proc_ops = { @@ -1218,17 +1217,14 @@ static const struct proc_ops input_devices_proc_ops = { .proc_poll = input_proc_devices_poll, .proc_read = seq_read, .proc_lseek = seq_lseek, - .proc_release = seq_release, + .proc_release = seq_release_private, }; static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos) { - union input_seq_state *state = (union input_seq_state *)&seq->private; + struct input_seq_state *state = seq->private; int error; - /* We need to fit into seq->private pointer */ - BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private)); - error = mutex_lock_interruptible(&input_mutex); if (error) { state->mutex_acquired = false; @@ -1243,7 +1239,7 @@ static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos) static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - union input_seq_state *state = (union input_seq_state *)&seq->private; + struct input_seq_state *state = seq->private; state->pos = *pos + 1; return seq_list_next(v, &input_handler_list, pos); @@ -1252,7 +1248,7 @@ static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos) static int input_handlers_seq_show(struct seq_file *seq, void *v) { struct input_handler *handler = container_of(v, struct input_handler, node); - union input_seq_state *state = (union input_seq_state *)&seq->private; + struct input_seq_state *state = seq->private; seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name); if (handler->filter) @@ -1273,14 +1269,15 @@ static const struct seq_operations input_handlers_seq_ops = { static int input_proc_handlers_open(struct inode *inode, struct file *file) { - return seq_open(file, &input_handlers_seq_ops); + return seq_open_private(file, &input_handlers_seq_ops, + sizeof(struct input_seq_state)); } static const struct proc_ops input_handlers_proc_ops = { .proc_open = input_proc_handlers_open, .proc_read = seq_read, .proc_lseek = seq_lseek, - .proc_release = seq_release, + .proc_release = seq_release_private, }; static int __init input_proc_init(void) diff --git a/drivers/s390/char/hmcdrv_dev.c b/drivers/s390/char/hmcdrv_dev.c index 8d50c894711f..e069dd685899 100644 --- a/drivers/s390/char/hmcdrv_dev.c +++ b/drivers/s390/char/hmcdrv_dev.c @@ -186,9 +186,6 @@ static loff_t hmcdrv_dev_seek(struct file *fp, loff_t pos, int whence) if (pos < 0) return -EINVAL; - if (fp->f_pos != pos) - ++fp->f_version; - fp->f_pos = pos; return pos; } diff --git a/fs/affs/dir.c b/fs/affs/dir.c index b2bf7016e1b3..bd40d5f08810 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -17,13 +17,44 @@ #include #include "affs.h" +struct affs_dir_data { + unsigned long ino; + u64 cookie; +}; + static int affs_readdir(struct file *, struct dir_context *); +static loff_t affs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct affs_dir_data *data = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &data->cookie); +} + +static int affs_dir_open(struct inode *inode, struct file *file) +{ + struct affs_dir_data *data; + + data = kzalloc(sizeof(struct affs_dir_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + file->private_data = data; + return 0; +} + +static int affs_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + const struct file_operations affs_dir_operations = { + .open = affs_dir_open, .read = generic_read_dir, - .llseek = generic_file_llseek, + .llseek = affs_dir_llseek, .iterate_shared = affs_readdir, .fsync = affs_file_fsync, + .release = affs_dir_release, }; /* @@ -45,6 +76,7 @@ static int affs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); + struct affs_dir_data *data = file->private_data; struct super_block *sb = inode->i_sb; struct buffer_head *dir_bh = NULL; struct buffer_head *fh_bh = NULL; @@ -59,7 +91,7 @@ affs_readdir(struct file *file, struct dir_context *ctx) pr_debug("%s(ino=%lu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos); if (ctx->pos < 2) { - file->private_data = (void *)0; + data->ino = 0; if (!dir_emit_dots(file, ctx)) return 0; } @@ -80,8 +112,8 @@ affs_readdir(struct file *file, struct dir_context *ctx) /* If the directory hasn't changed since the last call to readdir(), * we can jump directly to where we left off. */ - ino = (u32)(long)file->private_data; - if (ino && inode_eq_iversion(inode, file->f_version)) { + ino = data->ino; + if (ino && inode_eq_iversion(inode, data->cookie)) { pr_debug("readdir() left off=%d\n", ino); goto inside; } @@ -131,8 +163,8 @@ affs_readdir(struct file *file, struct dir_context *ctx) } while (ino); } done: - file->f_version = inode_query_iversion(inode); - file->private_data = (void *)(long)ino; + data->cookie = inode_query_iversion(inode); + data->ino = ino; affs_brelse(fh_bh); out_brelse_dir: diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 18c72b305858..ddec8c9244ee 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -707,7 +707,6 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) if (offset != file->f_pos) { file->f_pos = offset; - file->f_version = 0; dfi->file_info.flags &= ~CEPH_F_ATEND; } retval = offset; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 087457061c6e..6622c582f550 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -263,7 +263,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); - bool need_revalidate = !inode_eq_iversion(inode, file->f_version); + bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data); bool has_filetype; if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) @@ -290,7 +290,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) offset = ext2_validate_entry(kaddr, offset, chunk_mask); ctx->pos = (n<f_version = inode_query_iversion(inode); + *(u64 *)file->private_data = inode_query_iversion(inode); need_revalidate = false; } de = (ext2_dirent *)(kaddr+offset); @@ -703,8 +703,30 @@ int ext2_empty_dir(struct inode *inode) return 0; } +static int ext2_dir_open(struct inode *inode, struct file *file) +{ + file->private_data = kzalloc(sizeof(u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int ext2_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static loff_t ext2_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_llseek_cookie(file, offset, whence, + (u64 *)file->private_data); +} + const struct file_operations ext2_dir_operations = { - .llseek = generic_file_llseek, + .open = ext2_dir_open, + .release = ext2_dir_release, + .llseek = ext2_dir_llseek, .read = generic_read_dir, .iterate_shared = ext2_readdir, .unlocked_ioctl = ext2_ioctl, diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ff4514e4626b..13196afe55ce 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -133,6 +133,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); + struct dir_private_info *info = file->private_data; err = fscrypt_prepare_readdir(inode); if (err) @@ -229,7 +230,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (!inode_eq_iversion(inode, file->f_version)) { + if (!inode_eq_iversion(inode, info->cookie)) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ext4_dir_entry_2 *) (bh->b_data + i); @@ -249,7 +250,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) offset = i; ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) | offset; - file->f_version = inode_query_iversion(inode); + info->cookie = inode_query_iversion(inode); } while (ctx->pos < inode->i_size @@ -384,6 +385,7 @@ static inline loff_t ext4_get_htree_eof(struct file *filp) static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; + struct dir_private_info *info = file->private_data; int dx_dir = is_dx_dir(inode); loff_t ret, htree_max = ext4_get_htree_eof(file); @@ -392,7 +394,7 @@ static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) htree_max, htree_max); else ret = ext4_llseek(file, offset, whence); - file->f_version = inode_peek_iversion(inode) - 1; + info->cookie = inode_peek_iversion(inode) - 1; return ret; } @@ -429,18 +431,15 @@ static void free_rb_tree_fname(struct rb_root *root) *root = RB_ROOT; } - -static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp, - loff_t pos) +static void ext4_htree_init_dir_info(struct file *filp, loff_t pos) { - struct dir_private_info *p; + struct dir_private_info *p = filp->private_data; - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return NULL; - p->curr_hash = pos2maj_hash(filp, pos); - p->curr_minor_hash = pos2min_hash(filp, pos); - return p; + if (is_dx_dir(file_inode(filp)) && !p->initialized) { + p->curr_hash = pos2maj_hash(filp, pos); + p->curr_minor_hash = pos2min_hash(filp, pos); + p->initialized = true; + } } void ext4_htree_free_dir_info(struct dir_private_info *p) @@ -552,12 +551,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) struct fname *fname; int ret = 0; - if (!info) { - info = ext4_htree_create_dir_info(file, ctx->pos); - if (!info) - return -ENOMEM; - file->private_data = info; - } + ext4_htree_init_dir_info(file, ctx->pos); if (ctx->pos == ext4_get_htree_eof(file)) return 0; /* EOF */ @@ -590,10 +584,10 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) * cached entries. */ if ((!info->curr_node) || - !inode_eq_iversion(inode, file->f_version)) { + !inode_eq_iversion(inode, info->cookie)) { info->curr_node = NULL; free_rb_tree_fname(&info->root); - file->f_version = inode_query_iversion(inode); + info->cookie = inode_query_iversion(inode); ret = ext4_htree_fill_tree(file, info->curr_hash, info->curr_minor_hash, &info->next_hash); @@ -664,7 +658,19 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, return 0; } +static int ext4_dir_open(struct inode *inode, struct file *file) +{ + struct dir_private_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + file->private_data = info; + return 0; +} + const struct file_operations ext4_dir_operations = { + .open = ext4_dir_open, .llseek = ext4_dir_llseek, .read = generic_read_dir, .iterate_shared = ext4_readdir, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 08acd152261e..d62a4b9b26ce 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2553,6 +2553,8 @@ struct dir_private_info { __u32 curr_hash; __u32 curr_minor_hash; __u32 next_hash; + u64 cookie; + bool initialized; }; /* calculate the first block number of the group */ diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index e7a09a99837b..4282e12dc405 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1460,6 +1460,7 @@ int ext4_read_inline_dir(struct file *file, struct ext4_iloc iloc; void *dir_buf = NULL; int dotdot_offset, dotdot_size, extra_offset, extra_size; + struct dir_private_info *info = file->private_data; ret = ext4_get_inode_loc(inode, &iloc); if (ret) @@ -1503,12 +1504,12 @@ int ext4_read_inline_dir(struct file *file, extra_size = extra_offset + inline_size; /* - * If the version has changed since the last call to + * If the cookie has changed since the last call to * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the inline * dir to make sure. */ - if (!inode_eq_iversion(inode, file->f_version)) { + if (!inode_eq_iversion(inode, info->cookie)) { for (i = 0; i < extra_size && i < offset;) { /* * "." is with offset 0 and @@ -1540,7 +1541,7 @@ int ext4_read_inline_dir(struct file *file, } offset = i; ctx->pos = offset; - file->f_version = inode_query_iversion(inode); + info->cookie = inode_query_iversion(inode); } while (ctx->pos < extra_size) { diff --git a/fs/file_table.c b/fs/file_table.c index 3ef558f27a1c..7ce4d5dac080 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -156,6 +156,13 @@ static int init_file(struct file *f, int flags, const struct cred *cred) } spin_lock_init(&f->f_lock); + /* + * Note that f_pos_lock is only used for files raising + * FMODE_ATOMIC_POS and directories. Other files such as pipes + * don't need it and since f_pos_lock is in a union may reuse + * the space for other purposes. They are expected to initialize + * the respective member when opening the file. + */ mutex_init(&f->f_pos_lock); f->f_flags = flags; f->f_mode = OPEN_FMODE(flags); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f0beb173dbba..ccef3f42b333 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1932,6 +1932,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx) { int error = 0; struct inode *inode = file_inode(file); + struct ocfs2_file_private *fp = file->private_data; int lock_level = 0; trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -1952,7 +1953,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx) goto bail_nolock; } - error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false); + error = ocfs2_dir_foreach_blk(inode, &fp->cookie, ctx, false); ocfs2_inode_unlock(inode, lock_level); if (error) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ccc57038a977..115ab2172820 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2750,6 +2750,13 @@ static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in, return remapped > 0 ? remapped : ret; } +static loff_t ocfs2_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct ocfs2_file_private *fp = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &fp->cookie); +} + const struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, @@ -2797,7 +2804,7 @@ const struct file_operations ocfs2_fops = { WRAP_DIR_ITER(ocfs2_readdir) // FIXME! const struct file_operations ocfs2_dops = { - .llseek = generic_file_llseek, + .llseek = ocfs2_dir_llseek, .read = generic_read_dir, .iterate_shared = shared_ocfs2_readdir, .fsync = ocfs2_sync_file, @@ -2843,7 +2850,7 @@ const struct file_operations ocfs2_fops_no_plocks = { }; const struct file_operations ocfs2_dops_no_plocks = { - .llseek = generic_file_llseek, + .llseek = ocfs2_dir_llseek, .read = generic_read_dir, .iterate_shared = shared_ocfs2_readdir, .fsync = ocfs2_sync_file, diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 8e53e4ac1120..41e65e45a9f3 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -20,6 +20,7 @@ struct ocfs2_alloc_context; enum ocfs2_alloc_restarted; struct ocfs2_file_private { + u64 cookie; struct file *fp_file; struct mutex fp_mutex; struct ocfs2_lock_res fp_flock; diff --git a/fs/pipe.c b/fs/pipe.c index 7dff2aa50a6d..d3735f1d6f00 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -686,7 +686,7 @@ pipe_poll(struct file *filp, poll_table *wait) if (filp->f_mode & FMODE_READ) { if (!pipe_empty(head, tail)) mask |= EPOLLIN | EPOLLRDNORM; - if (!pipe->writers && filp->f_version != pipe->w_counter) + if (!pipe->writers && filp->f_pipe != pipe->w_counter) mask |= EPOLLHUP; } @@ -945,6 +945,7 @@ int create_pipe_files(struct file **res, int flags) } f->private_data = inode->i_pipe; + f->f_pipe = 0; res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), &pipefifo_fops); @@ -954,6 +955,7 @@ int create_pipe_files(struct file **res, int flags) return PTR_ERR(res[0]); } res[0]->private_data = inode->i_pipe; + res[0]->f_pipe = 0; res[1] = f; stream_open(inode, res[0]); stream_open(inode, res[1]); @@ -1108,7 +1110,7 @@ static int fifo_open(struct inode *inode, struct file *filp) bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; int ret; - filp->f_version = 0; + filp->f_pipe = 0; spin_lock(&inode->i_lock); if (inode->i_pipe) { @@ -1155,7 +1157,7 @@ static int fifo_open(struct inode *inode, struct file *filp) if ((filp->f_flags & O_NONBLOCK)) { /* suppress EPOLLHUP until we have * seen a writer */ - filp->f_version = pipe->w_counter; + filp->f_pipe = pipe->w_counter; } else { if (wait_for_partner(pipe, &pipe->w_counter)) goto err_rd; diff --git a/fs/proc/base.c b/fs/proc/base.c index 72a1acd03675..988f6bbfc4bd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3870,12 +3870,12 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; - /* f_version caches the tgid value that the last readdir call couldn't - * return. lseek aka telldir automagically resets f_version to 0. + /* We cache the tgid value that the last readdir call couldn't + * return and lseek resets it to 0. */ ns = proc_pid_ns(inode->i_sb); - tid = (int)file->f_version; - file->f_version = 0; + tid = (int)(intptr_t)file->private_data; + file->private_data = NULL; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { @@ -3890,7 +3890,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first * pid for the next readir call */ - file->f_version = (u64)tid; + file->private_data = (void *)(intptr_t)tid; put_task_struct(task); break; } @@ -3915,6 +3915,24 @@ static int proc_task_getattr(struct mnt_idmap *idmap, return 0; } +/* + * proc_task_readdir() set @file->private_data to a positive integer + * value, so casting that to u64 is safe. generic_llseek_cookie() will + * set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is + * here to catch any unexpected change in behavior either in + * proc_task_readdir() or generic_llseek_cookie(). + */ +static loff_t proc_dir_llseek(struct file *file, loff_t offset, int whence) +{ + u64 cookie = (u64)(intptr_t)file->private_data; + loff_t off; + + off = generic_llseek_cookie(file, offset, whence, &cookie); + WARN_ON_ONCE(cookie > INT_MAX); + file->private_data = (void *)(intptr_t)cookie; /* serialized by f_pos_lock */ + return off; +} + static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, @@ -3925,7 +3943,7 @@ static const struct inode_operations proc_task_inode_operations = { static const struct file_operations proc_task_operations = { .read = generic_read_dir, .iterate_shared = proc_task_readdir, - .llseek = generic_file_llseek, + .llseek = proc_dir_llseek, }; void __init set_proc_pid_nlink(void) diff --git a/fs/read_write.c b/fs/read_write.c index 90e283b31ca1..b19cce8e55b9 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -39,6 +39,35 @@ static inline bool unsigned_offsets(struct file *file) return file->f_mode & FMODE_UNSIGNED_OFFSET; } +/** + * vfs_setpos_cookie - update the file offset for lseek and reset cookie + * @file: file structure in question + * @offset: file offset to seek to + * @maxsize: maximum file size + * @cookie: cookie to reset + * + * Update the file offset to the value specified by @offset if the given + * offset is valid and it is not equal to the current file offset and + * reset the specified cookie to indicate that a seek happened. + * + * Return the specified offset on success and -EINVAL on invalid offset. + */ +static loff_t vfs_setpos_cookie(struct file *file, loff_t offset, + loff_t maxsize, u64 *cookie) +{ + if (offset < 0 && !unsigned_offsets(file)) + return -EINVAL; + if (offset > maxsize) + return -EINVAL; + + if (offset != file->f_pos) { + file->f_pos = offset; + if (cookie) + *cookie = 0; + } + return offset; +} + /** * vfs_setpos - update the file offset for lseek * @file: file structure in question @@ -53,19 +82,63 @@ static inline bool unsigned_offsets(struct file *file) */ loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) { - if (offset < 0 && !unsigned_offsets(file)) - return -EINVAL; - if (offset > maxsize) - return -EINVAL; - - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } - return offset; + return vfs_setpos_cookie(file, offset, maxsize, NULL); } EXPORT_SYMBOL(vfs_setpos); +/** + * must_set_pos - check whether f_pos has to be updated + * @file: file to seek on + * @offset: offset to use + * @whence: type of seek operation + * @eof: end of file + * + * Check whether f_pos needs to be updated and update @offset according + * to @whence. + * + * Return: 0 if f_pos doesn't need to be updated, 1 if f_pos has to be + * updated, and negative error code on failure. + */ +static int must_set_pos(struct file *file, loff_t *offset, int whence, loff_t eof) +{ + switch (whence) { + case SEEK_END: + *offset += eof; + break; + case SEEK_CUR: + /* + * Here we special-case the lseek(fd, 0, SEEK_CUR) + * position-querying operation. Avoid rewriting the "same" + * f_pos value back to the file because a concurrent read(), + * write() or lseek() might have altered it + */ + if (*offset == 0) { + *offset = file->f_pos; + return 0; + } + break; + case SEEK_DATA: + /* + * In the generic case the entire file is data, so as long as + * offset isn't at the end of the file then the offset is data. + */ + if ((unsigned long long)*offset >= eof) + return -ENXIO; + break; + case SEEK_HOLE: + /* + * There is a virtual hole at the end of the file, so as long as + * offset isn't i_size or larger, return i_size. + */ + if ((unsigned long long)*offset >= eof) + return -ENXIO; + *offset = eof; + break; + } + + return 1; +} + /** * generic_file_llseek_size - generic llseek implementation for regular files * @file: file structure to seek on @@ -86,51 +159,73 @@ loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof) { - switch (whence) { - case SEEK_END: - offset += eof; - break; - case SEEK_CUR: - /* - * Here we special-case the lseek(fd, 0, SEEK_CUR) - * position-querying operation. Avoid rewriting the "same" - * f_pos value back to the file because a concurrent read(), - * write() or lseek() might have altered it - */ - if (offset == 0) - return file->f_pos; - /* - * f_lock protects against read/modify/write race with other - * SEEK_CURs. Note that parallel writes and reads behave - * like SEEK_SET. - */ - spin_lock(&file->f_lock); - offset = vfs_setpos(file, file->f_pos + offset, maxsize); - spin_unlock(&file->f_lock); + int ret; + + ret = must_set_pos(file, &offset, whence, eof); + if (ret < 0) + return ret; + if (ret == 0) return offset; - case SEEK_DATA: + + if (whence == SEEK_CUR) { /* - * In the generic case the entire file is data, so as long as - * offset isn't at the end of the file then the offset is data. + * f_lock protects against read/modify/write race with + * other SEEK_CURs. Note that parallel writes and reads + * behave like SEEK_SET. */ - if ((unsigned long long)offset >= eof) - return -ENXIO; - break; - case SEEK_HOLE: - /* - * There is a virtual hole at the end of the file, so as long as - * offset isn't i_size or larger, return i_size. - */ - if ((unsigned long long)offset >= eof) - return -ENXIO; - offset = eof; - break; + guard(spinlock)(&file->f_lock); + return vfs_setpos(file, file->f_pos + offset, maxsize); } return vfs_setpos(file, offset, maxsize); } EXPORT_SYMBOL(generic_file_llseek_size); +/** + * generic_llseek_cookie - versioned llseek implementation + * @file: file structure to seek on + * @offset: file offset to seek to + * @whence: type of seek + * @cookie: cookie to update + * + * See generic_file_llseek for a general description and locking assumptions. + * + * In contrast to generic_file_llseek, this function also resets a + * specified cookie to indicate a seek took place. + */ +loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence, + u64 *cookie) +{ + struct inode *inode = file->f_mapping->host; + loff_t maxsize = inode->i_sb->s_maxbytes; + loff_t eof = i_size_read(inode); + int ret; + + if (WARN_ON_ONCE(!cookie)) + return -EINVAL; + + /* + * Require that this is only used for directories that guarantee + * synchronization between readdir and seek so that an update to + * @cookie is correctly synchronized with concurrent readdir. + */ + if (WARN_ON_ONCE(!(file->f_mode & FMODE_ATOMIC_POS))) + return -EINVAL; + + ret = must_set_pos(file, &offset, whence, eof); + if (ret < 0) + return ret; + if (ret == 0) + return offset; + + /* No need to hold f_lock because we know that f_pos_lock is held. */ + if (whence == SEEK_CUR) + return vfs_setpos_cookie(file, file->f_pos + offset, maxsize, cookie); + + return vfs_setpos_cookie(file, offset, maxsize, cookie); +} +EXPORT_SYMBOL(generic_llseek_cookie); + /** * generic_file_llseek - generic llseek implementation for regular files * @file: file structure to seek on @@ -270,10 +365,8 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence) } retval = -EINVAL; if (offset >= 0 || unsigned_offsets(file)) { - if (offset != file->f_pos) { + if (offset != file->f_pos) file->f_pos = offset; - file->f_version = 0; - } retval = offset; } out: diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index c77ea57fe696..fda82f3e16e8 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -555,6 +555,11 @@ static unsigned int vfs_dent_type(uint8_t type) return 0; } +struct ubifs_dir_data { + struct ubifs_dent_node *dent; + u64 cookie; +}; + /* * The classical Unix view for directory is that it is a linear array of * (name, inode number) entries. Linux/VFS assumes this model as well. @@ -582,6 +587,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) struct inode *dir = file_inode(file); struct ubifs_info *c = dir->i_sb->s_fs_info; bool encrypted = IS_ENCRYPTED(dir); + struct ubifs_dir_data *data = file->private_data; dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos); @@ -604,27 +610,27 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) fstr_real_len = fstr.len; } - if (file->f_version == 0) { + if (data->cookie == 0) { /* - * The file was seek'ed, which means that @file->private_data + * The file was seek'ed, which means that @data->dent * is now invalid. This may also be just the first * 'ubifs_readdir()' invocation, in which case - * @file->private_data is NULL, and the below code is + * @data->dent is NULL, and the below code is * basically a no-op. */ - kfree(file->private_data); - file->private_data = NULL; + kfree(data->dent); + data->dent = NULL; } /* - * 'generic_file_llseek()' unconditionally sets @file->f_version to - * zero, and we use this for detecting whether the file was seek'ed. + * 'ubifs_dir_llseek()' sets @data->cookie to zero, and we use this + * for detecting whether the file was seek'ed. */ - file->f_version = 1; + data->cookie = 1; /* File positions 0 and 1 correspond to "." and ".." */ if (ctx->pos < 2) { - ubifs_assert(c, !file->private_data); + ubifs_assert(c, !data->dent); if (!dir_emit_dots(file, ctx)) { if (encrypted) fscrypt_fname_free_buffer(&fstr); @@ -641,10 +647,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; } - dent = file->private_data; + dent = data->dent; if (!dent) { /* * The directory was seek'ed to and is now readdir'ed. @@ -658,7 +664,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; } while (1) { @@ -701,15 +707,15 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } - kfree(file->private_data); + kfree(data->dent); ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; cond_resched(); } out: - kfree(file->private_data); - file->private_data = NULL; + kfree(data->dent); + data->dent = NULL; if (encrypted) fscrypt_fname_free_buffer(&fstr); @@ -733,7 +739,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) /* Free saved readdir() state when the directory is closed */ static int ubifs_dir_release(struct inode *dir, struct file *file) { - kfree(file->private_data); + struct ubifs_dir_data *data = file->private_data; + + kfree(data->dent); + kfree(data); file->private_data = NULL; return 0; } @@ -1712,6 +1721,24 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path, return 0; } +static int ubifs_dir_open(struct inode *inode, struct file *file) +{ + struct ubifs_dir_data *data; + + data = kzalloc(sizeof(struct ubifs_dir_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + file->private_data = data; + return 0; +} + +static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct ubifs_dir_data *data = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &data->cookie); +} + const struct inode_operations ubifs_dir_inode_operations = { .lookup = ubifs_lookup, .create = ubifs_create, @@ -1732,7 +1759,8 @@ const struct inode_operations ubifs_dir_inode_operations = { }; const struct file_operations ubifs_dir_operations = { - .llseek = generic_file_llseek, + .open = ubifs_dir_open, + .llseek = ubifs_dir_llseek, .release = ubifs_dir_release, .read = generic_read_dir, .iterate_shared = ubifs_readdir, diff --git a/fs/udf/dir.c b/fs/udf/dir.c index f94f45fe2c91..5023dfe191e8 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -60,7 +60,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) * identifying beginning of dir entry (names are under user control), * we need to scan the directory from the beginning. */ - if (!inode_eq_iversion(dir, file->f_version)) { + if (!inode_eq_iversion(dir, *(u64 *)file->private_data)) { emit_pos = nf_pos; nf_pos = 0; } else { @@ -122,15 +122,37 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) udf_fiiter_release(&iter); out: if (pos_valid) - file->f_version = inode_query_iversion(dir); + *(u64 *)file->private_data = inode_query_iversion(dir); kfree(fname); return ret; } +static int udf_dir_open(struct inode *inode, struct file *file) +{ + file->private_data = kzalloc(sizeof(u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int udf_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static loff_t udf_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_llseek_cookie(file, offset, whence, + (u64 *)file->private_data); +} + /* readdir and lookup functions */ const struct file_operations udf_dir_operations = { - .llseek = generic_file_llseek, + .open = udf_dir_open, + .release = udf_dir_release, + .llseek = udf_dir_llseek, .read = generic_read_dir, .iterate_shared = udf_readdir, .unlocked_ioctl = udf_ioctl, diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 61f25d3cf3f7..335f0ae529b4 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -435,7 +435,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); - bool need_revalidate = !inode_eq_iversion(inode, file->f_version); + bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data); unsigned flags = UFS_SB(sb)->s_flags; UFSD("BEGIN\n"); @@ -462,7 +462,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask); ctx->pos = (n<f_version = inode_query_iversion(inode); + *(u64 *)file->private_data = inode_query_iversion(inode); need_revalidate = false; } de = (struct ufs_dir_entry *)(kaddr+offset); @@ -646,9 +646,31 @@ int ufs_empty_dir(struct inode * inode) return 0; } +static int ufs_dir_open(struct inode *inode, struct file *file) +{ + file->private_data = kzalloc(sizeof(u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int ufs_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static loff_t ufs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_llseek_cookie(file, offset, whence, + (u64 *)file->private_data); +} + const struct file_operations ufs_dir_operations = { + .open = ufs_dir_open, + .release = ufs_dir_release, .read = generic_read_dir, .iterate_shared = ufs_readdir, .fsync = generic_file_fsync, - .llseek = generic_file_llseek, + .llseek = ufs_dir_llseek, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 58c91a52cad1..7e11ce172140 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1001,8 +1001,8 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_cred: stashed credentials of creator/opener * @f_path: path of the file * @f_pos_lock: lock protecting file position + * @f_pipe: specific to pipes * @f_pos: file position - * @f_version: file version * @f_security: LSM security context of this file * @f_owner: file owner * @f_wb_err: writeback error @@ -1026,13 +1026,17 @@ struct file { const struct cred *f_cred; /* --- cacheline 1 boundary (64 bytes) --- */ struct path f_path; - struct mutex f_pos_lock; + union { + /* regular files (with FMODE_ATOMIC_POS) and directories */ + struct mutex f_pos_lock; + /* pipes */ + u64 f_pipe; + }; loff_t f_pos; - u64 f_version; - /* --- cacheline 2 boundary (128 bytes) --- */ #ifdef CONFIG_SECURITY void *f_security; #endif + /* --- cacheline 2 boundary (128 bytes) --- */ struct fown_struct *f_owner; errseq_t f_wb_err; errseq_t f_sb_err; @@ -3202,6 +3206,8 @@ extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); +loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence, + u64 *cookie); extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);