mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 13:15:57 +00:00
344bac8f0d
Move mnt->mnt_node into the union with mnt->mnt_rcu and mnt->mnt_llist instead of keeping it with mnt->mnt_list. This allows us to use RB_CLEAR_NODE(&mnt->mnt_node) in umount_tree() as well as list_empty(&mnt->mnt_node). That in turn allows us to remove MNT_ONRB. This also fixes the bug reported in [1] where seemingly MNT_ONRB wasn't set in @mnt->mnt_flags even though the mount was present in the mount rbtree of the mount namespace. The root cause is the following race. When a btrfs subvolume is mounted a temporary mount is created: btrfs_get_tree_subvol() { mnt = fc_mount() // Register the newly allocated mount with sb->mounts: lock_mount_hash(); list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); unlock_mount_hash(); } and registered on sb->s_mounts. Later it is added to an anonymous mount namespace via mount_subvol(): -> mount_subvol() -> mount_subtree() -> alloc_mnt_ns() mnt_add_to_ns() vfs_path_lookup() put_mnt_ns() The mnt_add_to_ns() call raises MNT_ONRB in @mnt->mnt_flags. If someone concurrently does a ro remount: reconfigure_super() -> sb_prepare_remount_readonly() { list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { } all mounts registered in sb->s_mounts are visited and first MNT_WRITE_HOLD is raised, then MNT_READONLY is raised, and finally MNT_WRITE_HOLD is removed again. The flag modification for MNT_WRITE_HOLD/MNT_READONLY and MNT_ONRB race so MNT_ONRB might be lost. Fixes: 2eea9ce4310d ("mounts: keep list of mounts in an rbtree") Cc: <stable@kernel.org> # v6.8+ Link: https://lore.kernel.org/r/20241215-vfs-6-14-mount-work-v1-1-fd55922c4af8@kernel.org Link: https://lore.kernel.org/r/ec6784ed-8722-4695-980a-4400d4e7bd1a@gmx.com [1] Signed-off-by: Christian Brauner <brauner@kernel.org>
173 lines
4.8 KiB
C
173 lines
4.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#include <linux/mount.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/ns_common.h>
|
|
#include <linux/fs_pin.h>
|
|
|
|
struct mnt_namespace {
|
|
struct ns_common ns;
|
|
struct mount * root;
|
|
struct rb_root mounts; /* Protected by namespace_sem */
|
|
struct user_namespace *user_ns;
|
|
struct ucounts *ucounts;
|
|
u64 seq; /* Sequence number to prevent loops */
|
|
wait_queue_head_t poll;
|
|
u64 event;
|
|
unsigned int nr_mounts; /* # of mounts in the namespace */
|
|
unsigned int pending_mounts;
|
|
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
|
|
refcount_t passive; /* number references not pinning @mounts */
|
|
} __randomize_layout;
|
|
|
|
struct mnt_pcp {
|
|
int mnt_count;
|
|
int mnt_writers;
|
|
};
|
|
|
|
struct mountpoint {
|
|
struct hlist_node m_hash;
|
|
struct dentry *m_dentry;
|
|
struct hlist_head m_list;
|
|
int m_count;
|
|
};
|
|
|
|
struct mount {
|
|
struct hlist_node mnt_hash;
|
|
struct mount *mnt_parent;
|
|
struct dentry *mnt_mountpoint;
|
|
struct vfsmount mnt;
|
|
union {
|
|
struct rb_node mnt_node; /* node in the ns->mounts rbtree */
|
|
struct rcu_head mnt_rcu;
|
|
struct llist_node mnt_llist;
|
|
};
|
|
#ifdef CONFIG_SMP
|
|
struct mnt_pcp __percpu *mnt_pcp;
|
|
#else
|
|
int mnt_count;
|
|
int mnt_writers;
|
|
#endif
|
|
struct list_head mnt_mounts; /* list of children, anchored here */
|
|
struct list_head mnt_child; /* and going through their mnt_child */
|
|
struct list_head mnt_instance; /* mount instance on sb->s_mounts */
|
|
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
|
|
struct list_head mnt_list;
|
|
struct list_head mnt_expire; /* link in fs-specific expiry list */
|
|
struct list_head mnt_share; /* circular list of shared mounts */
|
|
struct list_head mnt_slave_list;/* list of slave mounts */
|
|
struct list_head mnt_slave; /* slave list entry */
|
|
struct mount *mnt_master; /* slave is on master->mnt_slave_list */
|
|
struct mnt_namespace *mnt_ns; /* containing namespace */
|
|
struct mountpoint *mnt_mp; /* where is it mounted */
|
|
union {
|
|
struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
|
|
struct hlist_node mnt_umount;
|
|
};
|
|
struct list_head mnt_umounting; /* list entry for umount propagation */
|
|
#ifdef CONFIG_FSNOTIFY
|
|
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
|
|
__u32 mnt_fsnotify_mask;
|
|
#endif
|
|
int mnt_id; /* mount identifier, reused */
|
|
u64 mnt_id_unique; /* mount ID unique until reboot */
|
|
int mnt_group_id; /* peer group identifier */
|
|
int mnt_expiry_mark; /* true if marked for expiry */
|
|
struct hlist_head mnt_pins;
|
|
struct hlist_head mnt_stuck_children;
|
|
} __randomize_layout;
|
|
|
|
#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
|
|
|
|
static inline struct mount *real_mount(struct vfsmount *mnt)
|
|
{
|
|
return container_of(mnt, struct mount, mnt);
|
|
}
|
|
|
|
static inline int mnt_has_parent(struct mount *mnt)
|
|
{
|
|
return mnt != mnt->mnt_parent;
|
|
}
|
|
|
|
static inline int is_mounted(struct vfsmount *mnt)
|
|
{
|
|
/* neither detached nor internal? */
|
|
return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns);
|
|
}
|
|
|
|
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
|
|
|
|
extern int __legitimize_mnt(struct vfsmount *, unsigned);
|
|
|
|
static inline bool __path_is_mountpoint(const struct path *path)
|
|
{
|
|
struct mount *m = __lookup_mnt(path->mnt, path->dentry);
|
|
return m && likely(!(m->mnt.mnt_flags & MNT_SYNC_UMOUNT));
|
|
}
|
|
|
|
extern void __detach_mounts(struct dentry *dentry);
|
|
|
|
static inline void detach_mounts(struct dentry *dentry)
|
|
{
|
|
if (!d_mountpoint(dentry))
|
|
return;
|
|
__detach_mounts(dentry);
|
|
}
|
|
|
|
static inline void get_mnt_ns(struct mnt_namespace *ns)
|
|
{
|
|
refcount_inc(&ns->ns.count);
|
|
}
|
|
|
|
extern seqlock_t mount_lock;
|
|
|
|
struct proc_mounts {
|
|
struct mnt_namespace *ns;
|
|
struct path root;
|
|
int (*show)(struct seq_file *, struct vfsmount *);
|
|
};
|
|
|
|
extern const struct seq_operations mounts_op;
|
|
|
|
extern bool __is_local_mountpoint(struct dentry *dentry);
|
|
static inline bool is_local_mountpoint(struct dentry *dentry)
|
|
{
|
|
if (!d_mountpoint(dentry))
|
|
return false;
|
|
|
|
return __is_local_mountpoint(dentry);
|
|
}
|
|
|
|
static inline bool is_anon_ns(struct mnt_namespace *ns)
|
|
{
|
|
return ns->seq == 0;
|
|
}
|
|
|
|
static inline bool mnt_ns_attached(const struct mount *mnt)
|
|
{
|
|
return !RB_EMPTY_NODE(&mnt->mnt_node);
|
|
}
|
|
|
|
static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
|
|
{
|
|
WARN_ON(!mnt_ns_attached(mnt));
|
|
rb_erase(&mnt->mnt_node, &mnt->mnt_ns->mounts);
|
|
RB_CLEAR_NODE(&mnt->mnt_node);
|
|
list_add_tail(&mnt->mnt_list, dt_list);
|
|
}
|
|
|
|
bool has_locked_children(struct mount *mnt, struct dentry *dentry);
|
|
struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mnt_ns, bool previous);
|
|
static inline struct mnt_namespace *lookup_next_mnt_ns(struct mnt_namespace *mntns)
|
|
{
|
|
return __lookup_next_mnt_ns(mntns, false);
|
|
}
|
|
static inline struct mnt_namespace *lookup_prev_mnt_ns(struct mnt_namespace *mntns)
|
|
{
|
|
return __lookup_next_mnt_ns(mntns, true);
|
|
}
|
|
static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
|
|
{
|
|
return container_of(ns, struct mnt_namespace, ns);
|
|
}
|