2018-09-28 14:43:23 +00:00
|
|
|
/*
|
|
|
|
FUSE: Filesystem in Userspace
|
|
|
|
Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
|
|
|
|
|
|
|
|
This program can be distributed under the terms of the GNU GPL.
|
|
|
|
See the file COPYING.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#include "fuse_i.h"
|
2018-10-01 08:07:05 +00:00
|
|
|
#include <linux/iversion.h>
|
2018-09-28 14:43:23 +00:00
|
|
|
#include <linux/posix_acl.h>
|
2018-10-01 08:07:04 +00:00
|
|
|
#include <linux/pagemap.h>
|
|
|
|
#include <linux/highmem.h>
|
2018-09-28 14:43:23 +00:00
|
|
|
|
|
|
|
static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
|
|
|
|
{
|
|
|
|
struct fuse_conn *fc = get_fuse_conn(dir);
|
|
|
|
struct fuse_inode *fi = get_fuse_inode(dir);
|
|
|
|
|
|
|
|
if (!fc->do_readdirplus)
|
|
|
|
return false;
|
|
|
|
if (!fc->readdirplus_auto)
|
|
|
|
return true;
|
|
|
|
if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
|
|
|
|
return true;
|
|
|
|
if (ctx->pos == 0)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
static void fuse_add_dirent_to_cache(struct file *file,
|
|
|
|
struct fuse_dirent *dirent, loff_t pos)
|
|
|
|
{
|
|
|
|
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
|
|
|
|
size_t reclen = FUSE_DIRENT_SIZE(dirent);
|
|
|
|
pgoff_t index;
|
|
|
|
struct page *page;
|
|
|
|
loff_t size;
|
2018-10-01 08:07:04 +00:00
|
|
|
u64 version;
|
2018-10-01 08:07:04 +00:00
|
|
|
unsigned int offset;
|
|
|
|
void *addr;
|
|
|
|
|
|
|
|
spin_lock(&fi->rdc.lock);
|
|
|
|
/*
|
|
|
|
* Is cache already completed? Or this entry does not go at the end of
|
|
|
|
* cache?
|
|
|
|
*/
|
|
|
|
if (fi->rdc.cached || pos != fi->rdc.pos) {
|
|
|
|
spin_unlock(&fi->rdc.lock);
|
|
|
|
return;
|
|
|
|
}
|
2018-10-01 08:07:04 +00:00
|
|
|
version = fi->rdc.version;
|
2018-10-01 08:07:04 +00:00
|
|
|
size = fi->rdc.size;
|
|
|
|
offset = size & ~PAGE_MASK;
|
|
|
|
index = size >> PAGE_SHIFT;
|
|
|
|
/* Dirent doesn't fit in current page? Jump to next page. */
|
|
|
|
if (offset + reclen > PAGE_SIZE) {
|
|
|
|
index++;
|
|
|
|
offset = 0;
|
|
|
|
}
|
|
|
|
spin_unlock(&fi->rdc.lock);
|
|
|
|
|
|
|
|
if (offset) {
|
|
|
|
page = find_lock_page(file->f_mapping, index);
|
|
|
|
} else {
|
|
|
|
page = find_or_create_page(file->f_mapping, index,
|
|
|
|
mapping_gfp_mask(file->f_mapping));
|
|
|
|
}
|
|
|
|
if (!page)
|
|
|
|
return;
|
|
|
|
|
|
|
|
spin_lock(&fi->rdc.lock);
|
|
|
|
/* Raced with another readdir */
|
2018-10-01 08:07:04 +00:00
|
|
|
if (fi->rdc.version != version || fi->rdc.size != size ||
|
|
|
|
WARN_ON(fi->rdc.pos != pos))
|
2018-10-01 08:07:04 +00:00
|
|
|
goto unlock;
|
|
|
|
|
2021-09-08 08:38:28 +00:00
|
|
|
addr = kmap_local_page(page);
|
2022-10-20 15:18:58 +00:00
|
|
|
if (!offset) {
|
2018-10-01 08:07:04 +00:00
|
|
|
clear_page(addr);
|
2022-10-20 15:18:58 +00:00
|
|
|
SetPageUptodate(page);
|
|
|
|
}
|
2018-10-01 08:07:04 +00:00
|
|
|
memcpy(addr + offset, dirent, reclen);
|
2021-09-08 08:38:28 +00:00
|
|
|
kunmap_local(addr);
|
2018-10-01 08:07:04 +00:00
|
|
|
fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
|
|
|
|
fi->rdc.pos = dirent->off;
|
|
|
|
unlock:
|
|
|
|
spin_unlock(&fi->rdc.lock);
|
|
|
|
unlock_page(page);
|
|
|
|
put_page(page);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fuse_readdir_cache_end(struct file *file, loff_t pos)
|
|
|
|
{
|
|
|
|
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
|
|
|
|
loff_t end;
|
|
|
|
|
|
|
|
spin_lock(&fi->rdc.lock);
|
|
|
|
/* does cache end position match current position? */
|
|
|
|
if (fi->rdc.pos != pos) {
|
|
|
|
spin_unlock(&fi->rdc.lock);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
fi->rdc.cached = true;
|
|
|
|
end = ALIGN(fi->rdc.size, PAGE_SIZE);
|
|
|
|
spin_unlock(&fi->rdc.lock);
|
|
|
|
|
|
|
|
/* truncate unused tail of cache */
|
|
|
|
truncate_inode_pages(file->f_mapping, end);
|
|
|
|
}
|
|
|
|
|
2018-09-28 14:43:23 +00:00
|
|
|
static bool fuse_emit(struct file *file, struct dir_context *ctx,
|
|
|
|
struct fuse_dirent *dirent)
|
|
|
|
{
|
2018-10-01 08:07:04 +00:00
|
|
|
struct fuse_file *ff = file->private_data;
|
|
|
|
|
|
|
|
if (ff->open_flags & FOPEN_CACHE_DIR)
|
|
|
|
fuse_add_dirent_to_cache(file, dirent, ctx->pos);
|
|
|
|
|
2018-09-28 14:43:23 +00:00
|
|
|
return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
|
|
|
|
dirent->type);
|
|
|
|
}
|
|
|
|
|
2018-09-28 14:43:23 +00:00
|
|
|
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
|
|
|
|
struct dir_context *ctx)
|
|
|
|
{
|
|
|
|
while (nbytes >= FUSE_NAME_OFFSET) {
|
|
|
|
struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
|
|
|
|
size_t reclen = FUSE_DIRENT_SIZE(dirent);
|
|
|
|
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
|
|
|
|
return -EIO;
|
|
|
|
if (reclen > nbytes)
|
|
|
|
break;
|
|
|
|
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
|
|
|
|
return -EIO;
|
|
|
|
|
2018-09-28 14:43:23 +00:00
|
|
|
if (!fuse_emit(file, ctx, dirent))
|
2018-09-28 14:43:23 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
buf += reclen;
|
|
|
|
nbytes -= reclen;
|
|
|
|
ctx->pos = dirent->off;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fuse_direntplus_link(struct file *file,
|
|
|
|
struct fuse_direntplus *direntplus,
|
fuse: check attributes staleness on fuse_iget()
Function fuse_direntplus_link() might call fuse_iget() to initialize a new
fuse_inode and change its attributes. If fi->attr_version is always
initialized with 0, even if the attributes returned by the FUSE_READDIR
request is staled, as the new fi->attr_version is 0, fuse_change_attributes
will still set the staled attributes to inode. This wrong behaviour may
cause file size inconsistency even when there is no changes from
server-side.
To reproduce the issue, consider the following 2 programs (A and B) are
running concurrently,
A B
---------------------------------- --------------------------------
{ /fusemnt/dir/f is a file path in a fuse mount, the size of f is 0. }
readdir(/fusemnt/dir) start
//Daemon set size 0 to f direntry
fallocate(f, 1024)
stat(f) // B see size 1024
echo 2 > /proc/sys/vm/drop_caches
readdir(/fusemnt/dir) reply to kernel
Kernel set 0 to the I_NEW inode
stat(f) // B see size 0
In the above case, only program B is modifying the file size, however, B
observes file size changing between the 2 'readonly' stat() calls. To fix
this issue, we should make sure readdirplus still follows the rule of
attr_version staleness checking even if the fi->attr_version is lost due to
inode eviction.
To identify this situation, the new fc->evict_ctr is used to record whether
the eviction of inodes occurs during the readdirplus request processing.
If it does, the result of readdirplus may be inaccurate; otherwise, the
result of readdirplus can be trusted. Although this may still lead to
incorrect invalidation, considering the relatively low frequency of
evict occurrences, it should be acceptable.
Link: https://lore.kernel.org/lkml/20230711043405.66256-2-zhangjiachen.jaycee@bytedance.com/
Link: https://lore.kernel.org/lkml/20241114070905.48901-1-zhangtianci.1997@bytedance.com/
Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2024-11-18 10:16:00 +00:00
|
|
|
u64 attr_version, u64 evict_ctr)
|
2018-09-28 14:43:23 +00:00
|
|
|
{
|
|
|
|
struct fuse_entry_out *o = &direntplus->entry_out;
|
|
|
|
struct fuse_dirent *dirent = &direntplus->dirent;
|
|
|
|
struct dentry *parent = file->f_path.dentry;
|
|
|
|
struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
|
|
|
|
struct dentry *dentry;
|
|
|
|
struct dentry *alias;
|
|
|
|
struct inode *dir = d_inode(parent);
|
|
|
|
struct fuse_conn *fc;
|
|
|
|
struct inode *inode;
|
|
|
|
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
|
|
|
|
|
|
|
|
if (!o->nodeid) {
|
|
|
|
/*
|
|
|
|
* Unlike in the case of fuse_lookup, zero nodeid does not mean
|
|
|
|
* ENOENT. Instead, it only means the userspace filesystem did
|
|
|
|
* not want to return attributes/handle for this entry.
|
|
|
|
*
|
|
|
|
* So do nothing.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (name.name[0] == '.') {
|
|
|
|
/*
|
|
|
|
* We could potentially refresh the attributes of the directory
|
|
|
|
* and its parent?
|
|
|
|
*/
|
|
|
|
if (name.len == 1)
|
|
|
|
return 0;
|
|
|
|
if (name.name[1] == '.' && name.len == 2)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (invalid_nodeid(o->nodeid))
|
|
|
|
return -EIO;
|
2019-11-12 10:49:04 +00:00
|
|
|
if (fuse_invalid_attr(&o->attr))
|
2018-09-28 14:43:23 +00:00
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
fc = get_fuse_conn(dir);
|
|
|
|
|
|
|
|
name.hash = full_name_hash(parent, name.name, name.len);
|
|
|
|
dentry = d_lookup(parent, &name);
|
|
|
|
if (!dentry) {
|
|
|
|
retry:
|
|
|
|
dentry = d_alloc_parallel(parent, &name, &wq);
|
|
|
|
if (IS_ERR(dentry))
|
|
|
|
return PTR_ERR(dentry);
|
|
|
|
}
|
|
|
|
if (!d_in_lookup(dentry)) {
|
|
|
|
struct fuse_inode *fi;
|
|
|
|
inode = d_inode(dentry);
|
2021-06-21 11:03:53 +00:00
|
|
|
if (inode && get_node_id(inode) != o->nodeid)
|
|
|
|
inode = NULL;
|
2018-09-28 14:43:23 +00:00
|
|
|
if (!inode ||
|
2021-06-21 11:03:53 +00:00
|
|
|
fuse_stale_inode(inode, o->generation, &o->attr)) {
|
|
|
|
if (inode)
|
|
|
|
fuse_make_bad(inode);
|
2018-09-28 14:43:23 +00:00
|
|
|
d_invalidate(dentry);
|
|
|
|
dput(dentry);
|
|
|
|
goto retry;
|
|
|
|
}
|
2020-12-10 14:33:14 +00:00
|
|
|
if (fuse_is_bad(inode)) {
|
2018-09-28 14:43:23 +00:00
|
|
|
dput(dentry);
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
fi = get_fuse_inode(inode);
|
2018-11-09 10:33:27 +00:00
|
|
|
spin_lock(&fi->lock);
|
2018-09-28 14:43:23 +00:00
|
|
|
fi->nlookup++;
|
2018-11-09 10:33:27 +00:00
|
|
|
spin_unlock(&fi->lock);
|
2018-09-28 14:43:23 +00:00
|
|
|
|
|
|
|
forget_all_cached_acls(inode);
|
2023-08-10 10:45:05 +00:00
|
|
|
fuse_change_attributes(inode, &o->attr, NULL,
|
2023-08-10 10:45:05 +00:00
|
|
|
ATTR_TIMEOUT(o),
|
2018-09-28 14:43:23 +00:00
|
|
|
attr_version);
|
|
|
|
/*
|
|
|
|
* The other branch comes via fuse_iget()
|
|
|
|
* which bumps nlookup inside
|
|
|
|
*/
|
|
|
|
} else {
|
|
|
|
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
|
2023-08-10 10:45:05 +00:00
|
|
|
&o->attr, ATTR_TIMEOUT(o),
|
fuse: check attributes staleness on fuse_iget()
Function fuse_direntplus_link() might call fuse_iget() to initialize a new
fuse_inode and change its attributes. If fi->attr_version is always
initialized with 0, even if the attributes returned by the FUSE_READDIR
request is staled, as the new fi->attr_version is 0, fuse_change_attributes
will still set the staled attributes to inode. This wrong behaviour may
cause file size inconsistency even when there is no changes from
server-side.
To reproduce the issue, consider the following 2 programs (A and B) are
running concurrently,
A B
---------------------------------- --------------------------------
{ /fusemnt/dir/f is a file path in a fuse mount, the size of f is 0. }
readdir(/fusemnt/dir) start
//Daemon set size 0 to f direntry
fallocate(f, 1024)
stat(f) // B see size 1024
echo 2 > /proc/sys/vm/drop_caches
readdir(/fusemnt/dir) reply to kernel
Kernel set 0 to the I_NEW inode
stat(f) // B see size 0
In the above case, only program B is modifying the file size, however, B
observes file size changing between the 2 'readonly' stat() calls. To fix
this issue, we should make sure readdirplus still follows the rule of
attr_version staleness checking even if the fi->attr_version is lost due to
inode eviction.
To identify this situation, the new fc->evict_ctr is used to record whether
the eviction of inodes occurs during the readdirplus request processing.
If it does, the result of readdirplus may be inaccurate; otherwise, the
result of readdirplus can be trusted. Although this may still lead to
incorrect invalidation, considering the relatively low frequency of
evict occurrences, it should be acceptable.
Link: https://lore.kernel.org/lkml/20230711043405.66256-2-zhangjiachen.jaycee@bytedance.com/
Link: https://lore.kernel.org/lkml/20241114070905.48901-1-zhangtianci.1997@bytedance.com/
Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2024-11-18 10:16:00 +00:00
|
|
|
attr_version, evict_ctr);
|
2018-09-28 14:43:23 +00:00
|
|
|
if (!inode)
|
|
|
|
inode = ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
alias = d_splice_alias(inode, dentry);
|
|
|
|
d_lookup_done(dentry);
|
|
|
|
if (alias) {
|
|
|
|
dput(dentry);
|
|
|
|
dentry = alias;
|
|
|
|
}
|
2023-04-25 11:13:54 +00:00
|
|
|
if (IS_ERR(dentry)) {
|
|
|
|
if (!IS_ERR(inode)) {
|
|
|
|
struct fuse_inode *fi = get_fuse_inode(inode);
|
|
|
|
|
|
|
|
spin_lock(&fi->lock);
|
|
|
|
fi->nlookup--;
|
|
|
|
spin_unlock(&fi->lock);
|
|
|
|
}
|
2018-09-28 14:43:23 +00:00
|
|
|
return PTR_ERR(dentry);
|
2023-04-25 11:13:54 +00:00
|
|
|
}
|
2018-09-28 14:43:23 +00:00
|
|
|
}
|
|
|
|
if (fc->readdirplus_auto)
|
|
|
|
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
|
|
|
|
fuse_change_entry_timeout(dentry, o);
|
|
|
|
|
|
|
|
dput(dentry);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-09-10 13:04:08 +00:00
|
|
|
static void fuse_force_forget(struct file *file, u64 nodeid)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(file);
|
2020-05-06 15:44:12 +00:00
|
|
|
struct fuse_mount *fm = get_fuse_mount(inode);
|
2019-09-10 13:04:08 +00:00
|
|
|
struct fuse_forget_in inarg;
|
|
|
|
FUSE_ARGS(args);
|
|
|
|
|
|
|
|
memset(&inarg, 0, sizeof(inarg));
|
|
|
|
inarg.nlookup = 1;
|
|
|
|
args.opcode = FUSE_FORGET;
|
|
|
|
args.nodeid = nodeid;
|
|
|
|
args.in_numargs = 1;
|
|
|
|
args.in_args[0].size = sizeof(inarg);
|
|
|
|
args.in_args[0].value = &inarg;
|
|
|
|
args.force = true;
|
|
|
|
args.noreply = true;
|
|
|
|
|
2020-05-06 15:44:12 +00:00
|
|
|
fuse_simple_request(fm, &args);
|
2019-09-10 13:04:08 +00:00
|
|
|
/* ignore errors */
|
|
|
|
}
|
|
|
|
|
2018-09-28 14:43:23 +00:00
|
|
|
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
|
fuse: check attributes staleness on fuse_iget()
Function fuse_direntplus_link() might call fuse_iget() to initialize a new
fuse_inode and change its attributes. If fi->attr_version is always
initialized with 0, even if the attributes returned by the FUSE_READDIR
request is staled, as the new fi->attr_version is 0, fuse_change_attributes
will still set the staled attributes to inode. This wrong behaviour may
cause file size inconsistency even when there is no changes from
server-side.
To reproduce the issue, consider the following 2 programs (A and B) are
running concurrently,
A B
---------------------------------- --------------------------------
{ /fusemnt/dir/f is a file path in a fuse mount, the size of f is 0. }
readdir(/fusemnt/dir) start
//Daemon set size 0 to f direntry
fallocate(f, 1024)
stat(f) // B see size 1024
echo 2 > /proc/sys/vm/drop_caches
readdir(/fusemnt/dir) reply to kernel
Kernel set 0 to the I_NEW inode
stat(f) // B see size 0
In the above case, only program B is modifying the file size, however, B
observes file size changing between the 2 'readonly' stat() calls. To fix
this issue, we should make sure readdirplus still follows the rule of
attr_version staleness checking even if the fi->attr_version is lost due to
inode eviction.
To identify this situation, the new fc->evict_ctr is used to record whether
the eviction of inodes occurs during the readdirplus request processing.
If it does, the result of readdirplus may be inaccurate; otherwise, the
result of readdirplus can be trusted. Although this may still lead to
incorrect invalidation, considering the relatively low frequency of
evict occurrences, it should be acceptable.
Link: https://lore.kernel.org/lkml/20230711043405.66256-2-zhangjiachen.jaycee@bytedance.com/
Link: https://lore.kernel.org/lkml/20241114070905.48901-1-zhangtianci.1997@bytedance.com/
Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2024-11-18 10:16:00 +00:00
|
|
|
struct dir_context *ctx, u64 attr_version,
|
|
|
|
u64 evict_ctr)
|
2018-09-28 14:43:23 +00:00
|
|
|
{
|
|
|
|
struct fuse_direntplus *direntplus;
|
|
|
|
struct fuse_dirent *dirent;
|
|
|
|
size_t reclen;
|
|
|
|
int over = 0;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
|
|
|
|
direntplus = (struct fuse_direntplus *) buf;
|
|
|
|
dirent = &direntplus->dirent;
|
|
|
|
reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
|
|
|
|
|
|
|
|
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
|
|
|
|
return -EIO;
|
|
|
|
if (reclen > nbytes)
|
|
|
|
break;
|
|
|
|
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
if (!over) {
|
|
|
|
/* We fill entries into dstbuf only as much as
|
|
|
|
it can hold. But we still continue iterating
|
|
|
|
over remaining entries to link them. If not,
|
|
|
|
we need to send a FORGET for each of those
|
|
|
|
which we did not link.
|
|
|
|
*/
|
2018-09-28 14:43:23 +00:00
|
|
|
over = !fuse_emit(file, ctx, dirent);
|
2018-09-28 14:43:23 +00:00
|
|
|
if (!over)
|
|
|
|
ctx->pos = dirent->off;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf += reclen;
|
|
|
|
nbytes -= reclen;
|
|
|
|
|
fuse: check attributes staleness on fuse_iget()
Function fuse_direntplus_link() might call fuse_iget() to initialize a new
fuse_inode and change its attributes. If fi->attr_version is always
initialized with 0, even if the attributes returned by the FUSE_READDIR
request is staled, as the new fi->attr_version is 0, fuse_change_attributes
will still set the staled attributes to inode. This wrong behaviour may
cause file size inconsistency even when there is no changes from
server-side.
To reproduce the issue, consider the following 2 programs (A and B) are
running concurrently,
A B
---------------------------------- --------------------------------
{ /fusemnt/dir/f is a file path in a fuse mount, the size of f is 0. }
readdir(/fusemnt/dir) start
//Daemon set size 0 to f direntry
fallocate(f, 1024)
stat(f) // B see size 1024
echo 2 > /proc/sys/vm/drop_caches
readdir(/fusemnt/dir) reply to kernel
Kernel set 0 to the I_NEW inode
stat(f) // B see size 0
In the above case, only program B is modifying the file size, however, B
observes file size changing between the 2 'readonly' stat() calls. To fix
this issue, we should make sure readdirplus still follows the rule of
attr_version staleness checking even if the fi->attr_version is lost due to
inode eviction.
To identify this situation, the new fc->evict_ctr is used to record whether
the eviction of inodes occurs during the readdirplus request processing.
If it does, the result of readdirplus may be inaccurate; otherwise, the
result of readdirplus can be trusted. Although this may still lead to
incorrect invalidation, considering the relatively low frequency of
evict occurrences, it should be acceptable.
Link: https://lore.kernel.org/lkml/20230711043405.66256-2-zhangjiachen.jaycee@bytedance.com/
Link: https://lore.kernel.org/lkml/20241114070905.48901-1-zhangtianci.1997@bytedance.com/
Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2024-11-18 10:16:00 +00:00
|
|
|
ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
|
2018-09-28 14:43:23 +00:00
|
|
|
if (ret)
|
|
|
|
fuse_force_forget(file, direntplus->entry_out.nodeid);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
|
2018-09-28 14:43:23 +00:00
|
|
|
{
|
2019-09-10 13:04:10 +00:00
|
|
|
int plus;
|
|
|
|
ssize_t res;
|
2024-10-24 17:18:01 +00:00
|
|
|
struct folio *folio;
|
2018-09-28 14:43:23 +00:00
|
|
|
struct inode *inode = file_inode(file);
|
2020-05-06 15:44:12 +00:00
|
|
|
struct fuse_mount *fm = get_fuse_mount(inode);
|
2019-09-10 13:04:10 +00:00
|
|
|
struct fuse_io_args ia = {};
|
|
|
|
struct fuse_args_pages *ap = &ia.ap;
|
2024-10-24 17:18:01 +00:00
|
|
|
struct fuse_folio_desc desc = { .length = PAGE_SIZE };
|
fuse: check attributes staleness on fuse_iget()
Function fuse_direntplus_link() might call fuse_iget() to initialize a new
fuse_inode and change its attributes. If fi->attr_version is always
initialized with 0, even if the attributes returned by the FUSE_READDIR
request is staled, as the new fi->attr_version is 0, fuse_change_attributes
will still set the staled attributes to inode. This wrong behaviour may
cause file size inconsistency even when there is no changes from
server-side.
To reproduce the issue, consider the following 2 programs (A and B) are
running concurrently,
A B
---------------------------------- --------------------------------
{ /fusemnt/dir/f is a file path in a fuse mount, the size of f is 0. }
readdir(/fusemnt/dir) start
//Daemon set size 0 to f direntry
fallocate(f, 1024)
stat(f) // B see size 1024
echo 2 > /proc/sys/vm/drop_caches
readdir(/fusemnt/dir) reply to kernel
Kernel set 0 to the I_NEW inode
stat(f) // B see size 0
In the above case, only program B is modifying the file size, however, B
observes file size changing between the 2 'readonly' stat() calls. To fix
this issue, we should make sure readdirplus still follows the rule of
attr_version staleness checking even if the fi->attr_version is lost due to
inode eviction.
To identify this situation, the new fc->evict_ctr is used to record whether
the eviction of inodes occurs during the readdirplus request processing.
If it does, the result of readdirplus may be inaccurate; otherwise, the
result of readdirplus can be trusted. Although this may still lead to
incorrect invalidation, considering the relatively low frequency of
evict occurrences, it should be acceptable.
Link: https://lore.kernel.org/lkml/20230711043405.66256-2-zhangjiachen.jaycee@bytedance.com/
Link: https://lore.kernel.org/lkml/20241114070905.48901-1-zhangtianci.1997@bytedance.com/
Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2024-11-18 10:16:00 +00:00
|
|
|
u64 attr_version = 0, evict_ctr = 0;
|
2018-09-28 14:43:23 +00:00
|
|
|
bool locked;
|
|
|
|
|
2024-10-24 17:18:01 +00:00
|
|
|
folio = folio_alloc(GFP_KERNEL, 0);
|
|
|
|
if (!folio)
|
2018-09-28 14:43:23 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
plus = fuse_use_readdirplus(inode, ctx);
|
2020-01-14 12:39:45 +00:00
|
|
|
ap->args.out_pages = true;
|
2024-10-24 17:18:01 +00:00
|
|
|
ap->num_folios = 1;
|
|
|
|
ap->folios = &folio;
|
2024-10-24 17:18:09 +00:00
|
|
|
ap->descs = &desc;
|
2018-09-28 14:43:23 +00:00
|
|
|
if (plus) {
|
2020-05-06 15:44:12 +00:00
|
|
|
attr_version = fuse_get_attr_version(fm->fc);
|
fuse: check attributes staleness on fuse_iget()
Function fuse_direntplus_link() might call fuse_iget() to initialize a new
fuse_inode and change its attributes. If fi->attr_version is always
initialized with 0, even if the attributes returned by the FUSE_READDIR
request is staled, as the new fi->attr_version is 0, fuse_change_attributes
will still set the staled attributes to inode. This wrong behaviour may
cause file size inconsistency even when there is no changes from
server-side.
To reproduce the issue, consider the following 2 programs (A and B) are
running concurrently,
A B
---------------------------------- --------------------------------
{ /fusemnt/dir/f is a file path in a fuse mount, the size of f is 0. }
readdir(/fusemnt/dir) start
//Daemon set size 0 to f direntry
fallocate(f, 1024)
stat(f) // B see size 1024
echo 2 > /proc/sys/vm/drop_caches
readdir(/fusemnt/dir) reply to kernel
Kernel set 0 to the I_NEW inode
stat(f) // B see size 0
In the above case, only program B is modifying the file size, however, B
observes file size changing between the 2 'readonly' stat() calls. To fix
this issue, we should make sure readdirplus still follows the rule of
attr_version staleness checking even if the fi->attr_version is lost due to
inode eviction.
To identify this situation, the new fc->evict_ctr is used to record whether
the eviction of inodes occurs during the readdirplus request processing.
If it does, the result of readdirplus may be inaccurate; otherwise, the
result of readdirplus can be trusted. Although this may still lead to
incorrect invalidation, considering the relatively low frequency of
evict occurrences, it should be acceptable.
Link: https://lore.kernel.org/lkml/20230711043405.66256-2-zhangjiachen.jaycee@bytedance.com/
Link: https://lore.kernel.org/lkml/20241114070905.48901-1-zhangtianci.1997@bytedance.com/
Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2024-11-18 10:16:00 +00:00
|
|
|
evict_ctr = fuse_get_evict_ctr(fm->fc);
|
2019-09-10 13:04:10 +00:00
|
|
|
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
|
|
|
|
FUSE_READDIRPLUS);
|
2018-09-28 14:43:23 +00:00
|
|
|
} else {
|
2019-09-10 13:04:10 +00:00
|
|
|
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
|
|
|
|
FUSE_READDIR);
|
2018-09-28 14:43:23 +00:00
|
|
|
}
|
|
|
|
locked = fuse_lock_inode(inode);
|
2020-05-06 15:44:12 +00:00
|
|
|
res = fuse_simple_request(fm, &ap->args);
|
2018-09-28 14:43:23 +00:00
|
|
|
fuse_unlock_inode(inode, locked);
|
2019-09-10 13:04:10 +00:00
|
|
|
if (res >= 0) {
|
|
|
|
if (!res) {
|
2018-10-01 08:07:04 +00:00
|
|
|
struct fuse_file *ff = file->private_data;
|
|
|
|
|
|
|
|
if (ff->open_flags & FOPEN_CACHE_DIR)
|
|
|
|
fuse_readdir_cache_end(file, ctx->pos);
|
|
|
|
} else if (plus) {
|
2024-10-24 17:18:01 +00:00
|
|
|
res = parse_dirplusfile(folio_address(folio), res,
|
fuse: check attributes staleness on fuse_iget()
Function fuse_direntplus_link() might call fuse_iget() to initialize a new
fuse_inode and change its attributes. If fi->attr_version is always
initialized with 0, even if the attributes returned by the FUSE_READDIR
request is staled, as the new fi->attr_version is 0, fuse_change_attributes
will still set the staled attributes to inode. This wrong behaviour may
cause file size inconsistency even when there is no changes from
server-side.
To reproduce the issue, consider the following 2 programs (A and B) are
running concurrently,
A B
---------------------------------- --------------------------------
{ /fusemnt/dir/f is a file path in a fuse mount, the size of f is 0. }
readdir(/fusemnt/dir) start
//Daemon set size 0 to f direntry
fallocate(f, 1024)
stat(f) // B see size 1024
echo 2 > /proc/sys/vm/drop_caches
readdir(/fusemnt/dir) reply to kernel
Kernel set 0 to the I_NEW inode
stat(f) // B see size 0
In the above case, only program B is modifying the file size, however, B
observes file size changing between the 2 'readonly' stat() calls. To fix
this issue, we should make sure readdirplus still follows the rule of
attr_version staleness checking even if the fi->attr_version is lost due to
inode eviction.
To identify this situation, the new fc->evict_ctr is used to record whether
the eviction of inodes occurs during the readdirplus request processing.
If it does, the result of readdirplus may be inaccurate; otherwise, the
result of readdirplus can be trusted. Although this may still lead to
incorrect invalidation, considering the relatively low frequency of
evict occurrences, it should be acceptable.
Link: https://lore.kernel.org/lkml/20230711043405.66256-2-zhangjiachen.jaycee@bytedance.com/
Link: https://lore.kernel.org/lkml/20241114070905.48901-1-zhangtianci.1997@bytedance.com/
Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2024-11-18 10:16:00 +00:00
|
|
|
file, ctx, attr_version,
|
|
|
|
evict_ctr);
|
2018-09-28 14:43:23 +00:00
|
|
|
} else {
|
2024-10-24 17:18:01 +00:00
|
|
|
res = parse_dirfile(folio_address(folio), res, file,
|
2018-09-28 14:43:23 +00:00
|
|
|
ctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-24 17:18:01 +00:00
|
|
|
folio_put(folio);
|
2018-09-28 14:43:23 +00:00
|
|
|
fuse_invalidate_atime(inode);
|
2019-09-10 13:04:10 +00:00
|
|
|
return res;
|
2018-09-28 14:43:23 +00:00
|
|
|
}
|
2018-10-01 08:07:04 +00:00
|
|
|
|
|
|
|
enum fuse_parse_result {
|
|
|
|
FOUND_ERR = -1,
|
|
|
|
FOUND_NONE = 0,
|
|
|
|
FOUND_SOME,
|
|
|
|
FOUND_ALL,
|
|
|
|
};
|
|
|
|
|
|
|
|
static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
|
|
|
|
void *addr, unsigned int size,
|
|
|
|
struct dir_context *ctx)
|
|
|
|
{
|
|
|
|
unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
|
|
|
|
enum fuse_parse_result res = FOUND_NONE;
|
|
|
|
|
|
|
|
WARN_ON(offset >= size);
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
struct fuse_dirent *dirent = addr + offset;
|
|
|
|
unsigned int nbytes = size - offset;
|
2019-09-22 13:19:36 +00:00
|
|
|
size_t reclen;
|
2018-10-01 08:07:04 +00:00
|
|
|
|
|
|
|
if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
|
|
|
|
break;
|
|
|
|
|
2019-09-22 13:19:36 +00:00
|
|
|
reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
|
|
|
|
return FOUND_ERR;
|
|
|
|
if (WARN_ON(reclen > nbytes))
|
|
|
|
return FOUND_ERR;
|
|
|
|
if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
|
|
|
|
return FOUND_ERR;
|
|
|
|
|
|
|
|
if (ff->readdir.pos == ctx->pos) {
|
|
|
|
res = FOUND_SOME;
|
|
|
|
if (!dir_emit(ctx, dirent->name, dirent->namelen,
|
|
|
|
dirent->ino, dirent->type))
|
|
|
|
return FOUND_ALL;
|
|
|
|
ctx->pos = dirent->off;
|
|
|
|
}
|
|
|
|
ff->readdir.pos = dirent->off;
|
|
|
|
ff->readdir.cache_off += reclen;
|
|
|
|
|
|
|
|
offset += reclen;
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
static void fuse_rdc_reset(struct inode *inode)
|
2018-10-01 08:07:04 +00:00
|
|
|
{
|
2018-10-01 08:07:04 +00:00
|
|
|
struct fuse_inode *fi = get_fuse_inode(inode);
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
fi->rdc.cached = false;
|
|
|
|
fi->rdc.version++;
|
|
|
|
fi->rdc.size = 0;
|
|
|
|
fi->rdc.pos = 0;
|
|
|
|
}
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
#define UNCACHED 1
|
|
|
|
|
|
|
|
static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
|
|
|
|
{
|
|
|
|
struct fuse_file *ff = file->private_data;
|
|
|
|
struct inode *inode = file_inode(file);
|
2018-10-01 08:07:04 +00:00
|
|
|
struct fuse_conn *fc = get_fuse_conn(inode);
|
2018-10-01 08:07:04 +00:00
|
|
|
struct fuse_inode *fi = get_fuse_inode(inode);
|
|
|
|
enum fuse_parse_result res;
|
|
|
|
pgoff_t index;
|
|
|
|
unsigned int size;
|
|
|
|
struct page *page;
|
|
|
|
void *addr;
|
|
|
|
|
|
|
|
/* Seeked? If so, reset the cache stream */
|
|
|
|
if (ff->readdir.pos != ctx->pos) {
|
|
|
|
ff->readdir.pos = 0;
|
|
|
|
ff->readdir.cache_off = 0;
|
|
|
|
}
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
/*
|
|
|
|
* We're just about to start reading into the cache or reading the
|
|
|
|
* cache; both cases require an up-to-date mtime value.
|
|
|
|
*/
|
|
|
|
if (!ctx->pos && fc->auto_inval_data) {
|
2021-10-22 15:03:03 +00:00
|
|
|
int err = fuse_update_attributes(inode, file, STATX_MTIME);
|
2018-10-01 08:07:04 +00:00
|
|
|
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
retry:
|
|
|
|
spin_lock(&fi->rdc.lock);
|
2018-10-01 08:07:04 +00:00
|
|
|
retry_locked:
|
2018-10-01 08:07:04 +00:00
|
|
|
if (!fi->rdc.cached) {
|
2018-10-01 08:07:04 +00:00
|
|
|
/* Starting cache? Set cache mtime. */
|
|
|
|
if (!ctx->pos && !fi->rdc.size) {
|
2023-10-04 18:52:24 +00:00
|
|
|
fi->rdc.mtime = inode_get_mtime(inode);
|
2018-10-01 08:07:05 +00:00
|
|
|
fi->rdc.iversion = inode_query_iversion(inode);
|
2018-10-01 08:07:04 +00:00
|
|
|
}
|
2018-10-01 08:07:04 +00:00
|
|
|
spin_unlock(&fi->rdc.lock);
|
|
|
|
return UNCACHED;
|
|
|
|
}
|
2018-10-01 08:07:04 +00:00
|
|
|
/*
|
|
|
|
* When at the beginning of the directory (i.e. just after opendir(3) or
|
|
|
|
* rewinddir(3)), then need to check whether directory contents have
|
|
|
|
* changed, and reset the cache if so.
|
|
|
|
*/
|
|
|
|
if (!ctx->pos) {
|
2023-10-04 18:52:24 +00:00
|
|
|
struct timespec64 mtime = inode_get_mtime(inode);
|
|
|
|
|
2018-10-01 08:07:05 +00:00
|
|
|
if (inode_peek_iversion(inode) != fi->rdc.iversion ||
|
2023-10-04 18:52:24 +00:00
|
|
|
!timespec64_equal(&fi->rdc.mtime, &mtime)) {
|
2018-10-01 08:07:04 +00:00
|
|
|
fuse_rdc_reset(inode);
|
|
|
|
goto retry_locked;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
/*
|
|
|
|
* If cache version changed since the last getdents() call, then reset
|
|
|
|
* the cache stream.
|
|
|
|
*/
|
|
|
|
if (ff->readdir.version != fi->rdc.version) {
|
|
|
|
ff->readdir.pos = 0;
|
|
|
|
ff->readdir.cache_off = 0;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If at the beginning of the cache, than reset version to
|
|
|
|
* current.
|
|
|
|
*/
|
|
|
|
if (ff->readdir.pos == 0)
|
|
|
|
ff->readdir.version = fi->rdc.version;
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
WARN_ON(fi->rdc.size < ff->readdir.cache_off);
|
|
|
|
|
|
|
|
index = ff->readdir.cache_off >> PAGE_SHIFT;
|
|
|
|
|
|
|
|
if (index == (fi->rdc.size >> PAGE_SHIFT))
|
|
|
|
size = fi->rdc.size & ~PAGE_MASK;
|
|
|
|
else
|
|
|
|
size = PAGE_SIZE;
|
|
|
|
spin_unlock(&fi->rdc.lock);
|
|
|
|
|
|
|
|
/* EOF? */
|
|
|
|
if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
page = find_get_page_flags(file->f_mapping, index,
|
|
|
|
FGP_ACCESSED | FGP_LOCK);
|
2022-10-20 15:18:58 +00:00
|
|
|
/* Page gone missing, then re-added to cache, but not initialized? */
|
|
|
|
if (page && !PageUptodate(page)) {
|
|
|
|
unlock_page(page);
|
|
|
|
put_page(page);
|
|
|
|
page = NULL;
|
|
|
|
}
|
2018-10-01 08:07:04 +00:00
|
|
|
spin_lock(&fi->rdc.lock);
|
2018-10-01 08:07:04 +00:00
|
|
|
if (!page) {
|
|
|
|
/*
|
|
|
|
* Uh-oh: page gone missing, cache is useless
|
|
|
|
*/
|
2018-10-01 08:07:04 +00:00
|
|
|
if (fi->rdc.version == ff->readdir.version)
|
2018-10-01 08:07:04 +00:00
|
|
|
fuse_rdc_reset(inode);
|
|
|
|
goto retry_locked;
|
2018-10-01 08:07:04 +00:00
|
|
|
}
|
|
|
|
|
2018-10-01 08:07:04 +00:00
|
|
|
/* Make sure it's still the same version after getting the page. */
|
|
|
|
if (ff->readdir.version != fi->rdc.version) {
|
|
|
|
spin_unlock(&fi->rdc.lock);
|
|
|
|
unlock_page(page);
|
|
|
|
put_page(page);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
spin_unlock(&fi->rdc.lock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Contents of the page are now protected against changing by holding
|
|
|
|
* the page lock.
|
|
|
|
*/
|
2022-10-12 11:23:23 +00:00
|
|
|
addr = kmap_local_page(page);
|
2018-10-01 08:07:04 +00:00
|
|
|
res = fuse_parse_cache(ff, addr, size, ctx);
|
2022-10-12 11:23:23 +00:00
|
|
|
kunmap_local(addr);
|
2018-10-01 08:07:04 +00:00
|
|
|
unlock_page(page);
|
|
|
|
put_page(page);
|
|
|
|
|
|
|
|
if (res == FOUND_ERR)
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
if (res == FOUND_ALL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (size == PAGE_SIZE) {
|
|
|
|
/* We hit end of page: skip to next page. */
|
|
|
|
ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* End of cache reached. If found position, then we are done, otherwise
|
|
|
|
* need to fall back to uncached, since the position we were looking for
|
|
|
|
* wasn't in the cache.
|
|
|
|
*/
|
|
|
|
return res == FOUND_SOME ? 0 : UNCACHED;
|
|
|
|
}
|
|
|
|
|
|
|
|
int fuse_readdir(struct file *file, struct dir_context *ctx)
|
|
|
|
{
|
|
|
|
struct fuse_file *ff = file->private_data;
|
|
|
|
struct inode *inode = file_inode(file);
|
|
|
|
int err;
|
|
|
|
|
2020-12-10 14:33:14 +00:00
|
|
|
if (fuse_is_bad(inode))
|
2018-10-01 08:07:04 +00:00
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
err = UNCACHED;
|
|
|
|
if (ff->open_flags & FOPEN_CACHE_DIR)
|
|
|
|
err = fuse_readdir_cached(file, ctx);
|
|
|
|
if (err == UNCACHED)
|
|
|
|
err = fuse_readdir_uncached(file, ctx);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|