mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-16 09:56:46 +00:00
8c9440fea7
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZZU0CgAKCRCRxhvAZXjc osncAQDSJK0frJL+72NqXxa4YNzivrnuw6fhp5iaDAEqxdm8ygEAoJWyh7Rmkt8G drAXWGyGnCYqv7UgC6axLyciid7TxQg= =vJuv -----END PGP SIGNATURE----- Merge tag 'vfs-6.8.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs Pull vfs mount updates from Christian Brauner: "This contains the work to retrieve detailed information about mounts via two new system calls. This is hopefully the beginning of the end of the saga that started with fsinfo() years ago. The LWN articles in [1] and [2] can serve as a summary so we can avoid rehashing everything here. At LSFMM in May 2022 we got into a room and agreed on what we want to do about fsinfo(). Basically, split it into pieces. This is the first part of that agreement. Specifically, it is concerned with retrieving information about mounts. So this only concerns the mount information retrieval, not the mount table change notification, or the extended filesystem specific mount option work. That is separate work. Currently mounts have a 32bit id. Mount ids are already in heavy use by libmount and other low-level userspace but they can't be relied upon because they're recycled very quickly. We agreed that mounts should carry a unique 64bit id by which they can be referenced directly. This is now implemented as part of this work. The new 64bit mount id is exposed in statx() through the new STATX_MNT_ID_UNIQUE flag. If the flag isn't raised the old mount id is returned. If it is raised and the kernel supports the new 64bit mount id the flag is raised in the result mask and the new 64bit mount id is returned. New and old mount ids do not overlap so they cannot be conflated. Two new system calls are introduced that operate on the 64bit mount id: statmount() and listmount(). A summary of the api and usage can be found on LWN as well (cf. [3]) but of course, I'll provide a summary here as well. Both system calls rely on struct mnt_id_req. Which is the request struct used to pass the 64bit mount id identifying the mount to operate on. It is extensible to allow for the addition of new parameters and for future use in other apis that make use of mount ids. statmount() mimicks the semantics of statx() and exposes a set flags that userspace may raise in mnt_id_req to request specific information to be retrieved. A statmount() call returns a struct statmount filled in with information about the requested mount. Supported requests are indicated by raising the request flag passed in struct mnt_id_req in the @mask argument in struct statmount. Currently we do support: - STATMOUNT_SB_BASIC: Basic filesystem info - STATMOUNT_MNT_BASIC Mount information (mount id, parent mount id, mount attributes etc) - STATMOUNT_PROPAGATE_FROM Propagation from what mount in current namespace - STATMOUNT_MNT_ROOT Path of the root of the mount (e.g., mount --bind /bla /mnt returns /bla) - STATMOUNT_MNT_POINT Path of the mount point (e.g., mount --bind /bla /mnt returns /mnt) - STATMOUNT_FS_TYPE Name of the filesystem type as the magic number isn't enough due to submounts The string options STATMOUNT_MNT_{ROOT,POINT} and STATMOUNT_FS_TYPE are appended to the end of the struct. Userspace can use the offsets in @fs_type, @mnt_root, and @mnt_point to reference those strings easily. The struct statmount reserves quite a bit of space currently for future extensibility. This isn't really a problem and if this bothers us we can just send a follow-up pull request during this cycle. listmount() is given a 64bit mount id via mnt_id_req just as statmount(). It takes a buffer and a size to return an array of the 64bit ids of the child mounts of the requested mount. Userspace can thus choose to either retrieve child mounts for a mount in batches or iterate through the child mounts. For most use-cases it will be sufficient to just leave space for a few child mounts. But for big mount tables having an iterator is really helpful. Iterating through a mount table works by setting @param in mnt_id_req to the mount id of the last child mount retrieved in the previous listmount() call" Link: https://lwn.net/Articles/934469 [1] Link: https://lwn.net/Articles/829212 [2] Link: https://lwn.net/Articles/950569 [3] * tag 'vfs-6.8.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: add selftest for statmount/listmount fs: keep struct mnt_id_req extensible wire up syscalls for statmount/listmount add listmount(2) syscall statmount: simplify string option retrieval statmount: simplify numeric option retrieval add statmount(2) syscall namespace: extract show_path() helper mounts: keep list of mounts in an rbtree add unique mount ID
310 lines
8.9 KiB
C
310 lines
8.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/* fs/ internal definitions
|
|
*
|
|
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*/
|
|
|
|
struct super_block;
|
|
struct file_system_type;
|
|
struct iomap;
|
|
struct iomap_ops;
|
|
struct linux_binprm;
|
|
struct path;
|
|
struct mount;
|
|
struct shrink_control;
|
|
struct fs_context;
|
|
struct pipe_inode_info;
|
|
struct iov_iter;
|
|
struct mnt_idmap;
|
|
|
|
/*
|
|
* block/bdev.c
|
|
*/
|
|
#ifdef CONFIG_BLOCK
|
|
extern void __init bdev_cache_init(void);
|
|
#else
|
|
static inline void bdev_cache_init(void)
|
|
{
|
|
}
|
|
#endif /* CONFIG_BLOCK */
|
|
|
|
/*
|
|
* buffer.c
|
|
*/
|
|
int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
|
|
get_block_t *get_block, const struct iomap *iomap);
|
|
|
|
/*
|
|
* char_dev.c
|
|
*/
|
|
extern void __init chrdev_init(void);
|
|
|
|
/*
|
|
* fs_context.c
|
|
*/
|
|
extern const struct fs_context_operations legacy_fs_context_ops;
|
|
extern int parse_monolithic_mount_data(struct fs_context *, void *);
|
|
extern void vfs_clean_context(struct fs_context *fc);
|
|
extern int finish_clean_context(struct fs_context *fc);
|
|
|
|
/*
|
|
* namei.c
|
|
*/
|
|
extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
|
|
struct path *path, struct path *root);
|
|
int do_rmdir(int dfd, struct filename *name);
|
|
int do_unlinkat(int dfd, struct filename *name);
|
|
int may_linkat(struct mnt_idmap *idmap, const struct path *link);
|
|
int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
|
|
struct filename *newname, unsigned int flags);
|
|
int do_mkdirat(int dfd, struct filename *name, umode_t mode);
|
|
int do_symlinkat(struct filename *from, int newdfd, struct filename *to);
|
|
int do_linkat(int olddfd, struct filename *old, int newdfd,
|
|
struct filename *new, int flags);
|
|
|
|
/*
|
|
* namespace.c
|
|
*/
|
|
extern struct vfsmount *lookup_mnt(const struct path *);
|
|
extern int finish_automount(struct vfsmount *, const struct path *);
|
|
|
|
extern int sb_prepare_remount_readonly(struct super_block *);
|
|
|
|
extern void __init mnt_init(void);
|
|
|
|
int mnt_get_write_access_file(struct file *file);
|
|
void mnt_put_write_access_file(struct file *file);
|
|
|
|
extern void dissolve_on_fput(struct vfsmount *);
|
|
extern bool may_mount(void);
|
|
|
|
int path_mount(const char *dev_name, struct path *path,
|
|
const char *type_page, unsigned long flags, void *data_page);
|
|
int path_umount(struct path *path, int flags);
|
|
|
|
int show_path(struct seq_file *m, struct dentry *root);
|
|
|
|
/*
|
|
* fs_struct.c
|
|
*/
|
|
extern void chroot_fs_refs(const struct path *, const struct path *);
|
|
|
|
/*
|
|
* file_table.c
|
|
*/
|
|
struct file *alloc_empty_file(int flags, const struct cred *cred);
|
|
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
|
|
struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
|
|
|
|
static inline void file_put_write_access(struct file *file)
|
|
{
|
|
put_write_access(file->f_inode);
|
|
mnt_put_write_access(file->f_path.mnt);
|
|
if (unlikely(file->f_mode & FMODE_BACKING))
|
|
mnt_put_write_access(backing_file_user_path(file)->mnt);
|
|
}
|
|
|
|
static inline void put_file_access(struct file *file)
|
|
{
|
|
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
|
|
i_readcount_dec(file->f_inode);
|
|
} else if (file->f_mode & FMODE_WRITER) {
|
|
file_put_write_access(file);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* super.c
|
|
*/
|
|
extern int reconfigure_super(struct fs_context *);
|
|
extern bool super_trylock_shared(struct super_block *sb);
|
|
struct super_block *user_get_super(dev_t, bool excl);
|
|
void put_super(struct super_block *sb);
|
|
extern bool mount_capable(struct fs_context *);
|
|
int sb_init_dio_done_wq(struct super_block *sb);
|
|
|
|
/*
|
|
* Prepare superblock for changing its read-only state (i.e., either remount
|
|
* read-write superblock read-only or vice versa). After this function returns
|
|
* mnt_is_readonly() will return true for any mount of the superblock if its
|
|
* caller is able to observe any changes done by the remount. This holds until
|
|
* sb_end_ro_state_change() is called.
|
|
*/
|
|
static inline void sb_start_ro_state_change(struct super_block *sb)
|
|
{
|
|
WRITE_ONCE(sb->s_readonly_remount, 1);
|
|
/*
|
|
* For RO->RW transition, the barrier pairs with the barrier in
|
|
* mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY
|
|
* cleared, it will see s_readonly_remount set.
|
|
* For RW->RO transition, the barrier pairs with the barrier in
|
|
* mnt_get_write_access() before the mnt_is_readonly() check.
|
|
* The barrier makes sure if mnt_get_write_access() sees MNT_WRITE_HOLD
|
|
* already cleared, it will see s_readonly_remount set.
|
|
*/
|
|
smp_wmb();
|
|
}
|
|
|
|
/*
|
|
* Ends section changing read-only state of the superblock. After this function
|
|
* returns if mnt_is_readonly() returns false, the caller will be able to
|
|
* observe all the changes remount did to the superblock.
|
|
*/
|
|
static inline void sb_end_ro_state_change(struct super_block *sb)
|
|
{
|
|
/*
|
|
* This barrier provides release semantics that pairs with
|
|
* the smp_rmb() acquire semantics in mnt_is_readonly().
|
|
* This barrier pair ensure that when mnt_is_readonly() sees
|
|
* 0 for sb->s_readonly_remount, it will also see all the
|
|
* preceding flag changes that were made during the RO state
|
|
* change.
|
|
*/
|
|
smp_wmb();
|
|
WRITE_ONCE(sb->s_readonly_remount, 0);
|
|
}
|
|
|
|
/*
|
|
* open.c
|
|
*/
|
|
struct open_flags {
|
|
int open_flag;
|
|
umode_t mode;
|
|
int acc_mode;
|
|
int intent;
|
|
int lookup_flags;
|
|
};
|
|
extern struct file *do_filp_open(int dfd, struct filename *pathname,
|
|
const struct open_flags *op);
|
|
extern struct file *do_file_open_root(const struct path *,
|
|
const char *, const struct open_flags *);
|
|
extern struct open_how build_open_how(int flags, umode_t mode);
|
|
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
|
|
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
|
|
|
|
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
|
|
int chmod_common(const struct path *path, umode_t mode);
|
|
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
|
|
int flag);
|
|
int chown_common(const struct path *path, uid_t user, gid_t group);
|
|
extern int vfs_open(const struct path *, struct file *);
|
|
|
|
/*
|
|
* inode.c
|
|
*/
|
|
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
|
|
int dentry_needs_remove_privs(struct mnt_idmap *, struct dentry *dentry);
|
|
bool in_group_or_capable(struct mnt_idmap *idmap,
|
|
const struct inode *inode, vfsgid_t vfsgid);
|
|
void lock_two_inodes(struct inode *inode1, struct inode *inode2,
|
|
unsigned subclass1, unsigned subclass2);
|
|
|
|
/*
|
|
* fs-writeback.c
|
|
*/
|
|
extern long get_nr_dirty_inodes(void);
|
|
void invalidate_inodes(struct super_block *sb);
|
|
|
|
/*
|
|
* dcache.c
|
|
*/
|
|
extern int d_set_mounted(struct dentry *dentry);
|
|
extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
|
|
extern struct dentry *d_alloc_cursor(struct dentry *);
|
|
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
|
|
extern char *simple_dname(struct dentry *, char *, int);
|
|
extern void dput_to_list(struct dentry *, struct list_head *);
|
|
extern void shrink_dentry_list(struct list_head *);
|
|
|
|
/*
|
|
* pipe.c
|
|
*/
|
|
extern const struct file_operations pipefifo_fops;
|
|
|
|
/*
|
|
* fs_pin.c
|
|
*/
|
|
extern void group_pin_kill(struct hlist_head *p);
|
|
extern void mnt_pin_kill(struct mount *m);
|
|
|
|
/*
|
|
* fs/nsfs.c
|
|
*/
|
|
extern const struct dentry_operations ns_dentry_operations;
|
|
|
|
/*
|
|
* fs/stat.c:
|
|
*/
|
|
|
|
int getname_statx_lookup_flags(int flags);
|
|
int do_statx(int dfd, struct filename *filename, unsigned int flags,
|
|
unsigned int mask, struct statx __user *buffer);
|
|
|
|
/*
|
|
* fs/splice.c:
|
|
*/
|
|
long splice_file_to_pipe(struct file *in,
|
|
struct pipe_inode_info *opipe,
|
|
loff_t *offset,
|
|
size_t len, unsigned int flags);
|
|
|
|
/*
|
|
* fs/xattr.c:
|
|
*/
|
|
struct xattr_name {
|
|
char name[XATTR_NAME_MAX + 1];
|
|
};
|
|
|
|
struct xattr_ctx {
|
|
/* Value of attribute */
|
|
union {
|
|
const void __user *cvalue;
|
|
void __user *value;
|
|
};
|
|
void *kvalue;
|
|
size_t size;
|
|
/* Attribute name */
|
|
struct xattr_name *kname;
|
|
unsigned int flags;
|
|
};
|
|
|
|
|
|
ssize_t do_getxattr(struct mnt_idmap *idmap,
|
|
struct dentry *d,
|
|
struct xattr_ctx *ctx);
|
|
|
|
int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
|
|
int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
|
|
struct xattr_ctx *ctx);
|
|
int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode);
|
|
|
|
#ifdef CONFIG_FS_POSIX_ACL
|
|
int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
|
|
const char *acl_name, const void *kvalue, size_t size);
|
|
ssize_t do_get_acl(struct mnt_idmap *idmap, struct dentry *dentry,
|
|
const char *acl_name, void *kvalue, size_t size);
|
|
#else
|
|
static inline int do_set_acl(struct mnt_idmap *idmap,
|
|
struct dentry *dentry, const char *acl_name,
|
|
const void *kvalue, size_t size)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
static inline ssize_t do_get_acl(struct mnt_idmap *idmap,
|
|
struct dentry *dentry, const char *acl_name,
|
|
void *kvalue, size_t size)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
#endif
|
|
|
|
ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos);
|
|
|
|
/*
|
|
* fs/attr.c
|
|
*/
|
|
struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns);
|
|
struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap);
|
|
void mnt_idmap_put(struct mnt_idmap *idmap);
|