mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-28 16:52:18 +00:00
be2ca38253
This reverts commit724a08450f
. This code simplification introduced significant regressions on servers that do not remap inode numbers when exporting multiple underlying filesystems with colliding inodes, as can be illustrated with simple tmpfs exports in qemu with remapping disabled: ``` # host side cd /tmp/linux-test mkdir m1 m2 mount -t tmpfs tmpfs m1 mount -t tmpfs tmpfs m2 mkdir m1/dir m2/dir echo foo > m1/dir/foo echo bar > m2/dir/bar # guest side # started with -virtfs local,path=/tmp/linux-test,mount_tag=tmp,security_model=mapped-file mount -t 9p -o trans=virtio,debug=1 tmp /mnt/t ls /mnt/t/m1/dir # foo ls /mnt/t/m2/dir # bar (works ok if directry isn't open) # cd to keep first dir's inode alive cd /mnt/t/m1/dir ls /mnt/t/m2/dir # foo (should be bar) ``` Other examples can be crafted with regular files with fscache enabled, in which case I/Os just happen to the wrong file leading to corruptions, or guest failing to boot with: | VFS: Lookup of 'com.android.runtime' in 9p 9p would have caused loop In theory, we'd want the servers to be smart enough and ensure they never send us two different files with the same 'qid.path', but while qemu has an option to remap that is recommended (and qemu prints a warning if this case happens), there are many other servers which do not (kvmtool, nfs-ganesha, probably diod...), we should at least ensure we don't cause regressions on this: - assume servers can't be trusted and operations that should get a 'new' inode properly do so. commitd05dcfdf5e
(" fs/9p: mitigate inode collisions") attempted to do this, but v9fs_fid_iget_dotl() was not called so some higher level of caching got in the way; this needs to be fixed properly before we can re-apply the patches. - if we ever want to really simplify this code, we will need to add some negotiation with the server at mount time where the server could claim they handle this properly, at which point we could optimize this out. (but that might not be needed at all if we properly handle the 'new' check?) Fixes:724a08450f
("fs/9p: simplify iget to remove unnecessary paths") Reported-by: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/all/20240408141436.GA17022@redhat.com/ Link: https://lkml.kernel.org/r/20240923100508.GA32066@willie-the-truck Cc: stable@vger.kernel.org # v6.9+ Message-ID: <20241024-revert_iget-v1-4-4cac63d25f72@codewreck.org> Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
92 lines
3.3 KiB
C
92 lines
3.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* V9FS VFS extensions.
|
|
*
|
|
* Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
|
|
* Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
|
|
*/
|
|
#ifndef FS_9P_V9FS_VFS_H
|
|
#define FS_9P_V9FS_VFS_H
|
|
|
|
/* plan9 semantics are that created files are implicitly opened.
|
|
* But linux semantics are that you call create, then open.
|
|
* the plan9 approach is superior as it provides an atomic
|
|
* open.
|
|
* we track the create fid here. When the file is opened, if fidopen is
|
|
* non-zero, we use the fid and can skip some steps.
|
|
* there may be a better way to do this, but I don't know it.
|
|
* one BAD way is to clunk the fid on create, then open it again:
|
|
* you lose the atomicity of file open
|
|
*/
|
|
|
|
/* special case:
|
|
* unlink calls remove, which is an implicit clunk. So we have to track
|
|
* that kind of thing so that we don't try to clunk a dead fid.
|
|
*/
|
|
#define P9_LOCK_TIMEOUT (30*HZ)
|
|
|
|
/* flags for v9fs_stat2inode() & v9fs_stat2inode_dotl() */
|
|
#define V9FS_STAT2INODE_KEEP_ISIZE 1
|
|
|
|
extern struct file_system_type v9fs_fs_type;
|
|
extern const struct address_space_operations v9fs_addr_operations;
|
|
extern const struct file_operations v9fs_file_operations;
|
|
extern const struct file_operations v9fs_file_operations_dotl;
|
|
extern const struct file_operations v9fs_dir_operations;
|
|
extern const struct file_operations v9fs_dir_operations_dotl;
|
|
extern const struct dentry_operations v9fs_dentry_operations;
|
|
extern const struct dentry_operations v9fs_cached_dentry_operations;
|
|
extern struct kmem_cache *v9fs_inode_cache;
|
|
|
|
struct inode *v9fs_alloc_inode(struct super_block *sb);
|
|
void v9fs_free_inode(struct inode *inode);
|
|
void v9fs_set_netfs_context(struct inode *inode);
|
|
int v9fs_init_inode(struct v9fs_session_info *v9ses,
|
|
struct inode *inode, umode_t mode, dev_t rdev);
|
|
void v9fs_evict_inode(struct inode *inode);
|
|
#if (BITS_PER_LONG == 32)
|
|
#define QID2INO(q) ((ino_t) (((q)->path+2) ^ (((q)->path) >> 32)))
|
|
#else
|
|
#define QID2INO(q) ((ino_t) ((q)->path+2))
|
|
#endif
|
|
|
|
void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
|
|
struct super_block *sb, unsigned int flags);
|
|
void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
|
|
unsigned int flags);
|
|
int v9fs_dir_release(struct inode *inode, struct file *filp);
|
|
int v9fs_file_open(struct inode *inode, struct file *file);
|
|
int v9fs_uflags2omode(int uflags, int extended);
|
|
|
|
void v9fs_blank_wstat(struct p9_wstat *wstat);
|
|
int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap,
|
|
struct dentry *dentry, struct iattr *iattr);
|
|
int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
|
|
int datasync);
|
|
int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
|
|
int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
|
|
static inline void v9fs_invalidate_inode_attr(struct inode *inode)
|
|
{
|
|
struct v9fs_inode *v9inode;
|
|
|
|
v9inode = V9FS_I(inode);
|
|
v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
|
|
}
|
|
|
|
int v9fs_open_to_dotl_flags(int flags);
|
|
|
|
static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size)
|
|
{
|
|
/*
|
|
* 32-bit need the lock, concurrent updates could break the
|
|
* sequences and make i_size_read() loop forever.
|
|
* 64-bit updates are atomic and can skip the locking.
|
|
*/
|
|
if (sizeof(i_size) > sizeof(long))
|
|
spin_lock(&inode->i_lock);
|
|
i_size_write(inode, i_size);
|
|
if (sizeof(i_size) > sizeof(long))
|
|
spin_unlock(&inode->i_lock);
|
|
}
|
|
#endif
|