Merge branch 'vfs-6.14.mount' into vfs.all

This commit is contained in:
Christian Brauner 2024-12-17 21:42:07 +01:00
commit ef5bbd2a28
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
17 changed files with 657 additions and 112 deletions

View File

@ -8,15 +8,23 @@
struct mnt_namespace {
struct ns_common ns;
struct mount * root;
struct rb_root mounts; /* Protected by namespace_sem */
struct {
struct rb_root mounts; /* Protected by namespace_sem */
struct rb_node *mnt_last_node; /* last (rightmost) mount in the rbtree */
struct rb_node *mnt_first_node; /* first (leftmost) mount in the rbtree */
};
struct user_namespace *user_ns;
struct ucounts *ucounts;
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
union {
wait_queue_head_t poll;
struct rcu_head mnt_ns_rcu;
};
u64 event;
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */
refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout;
@ -38,6 +46,7 @@ struct mount {
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
union {
struct rb_node mnt_node; /* node in the ns->mounts rbtree */
struct rcu_head mnt_rcu;
struct llist_node mnt_llist;
};
@ -51,10 +60,7 @@ struct mount {
struct list_head mnt_child; /* and going through their mnt_child */
struct list_head mnt_instance; /* mount instance on sb->s_mounts */
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
union {
struct rb_node mnt_node; /* Under ns->mounts */
struct list_head mnt_list;
};
struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */
struct list_head mnt_share; /* circular list of shared mounts */
struct list_head mnt_slave_list;/* list of slave mounts */
@ -145,24 +151,28 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
return ns->seq == 0;
}
static inline bool mnt_ns_attached(const struct mount *mnt)
{
return !RB_EMPTY_NODE(&mnt->mnt_node);
}
static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
{
WARN_ON(!(mnt->mnt.mnt_flags & MNT_ONRB));
mnt->mnt.mnt_flags &= ~MNT_ONRB;
rb_erase(&mnt->mnt_node, &mnt->mnt_ns->mounts);
struct mnt_namespace *ns = mnt->mnt_ns;
WARN_ON(!mnt_ns_attached(mnt));
if (ns->mnt_last_node == &mnt->mnt_node)
ns->mnt_last_node = rb_prev(&mnt->mnt_node);
if (ns->mnt_first_node == &mnt->mnt_node)
ns->mnt_first_node = rb_next(&mnt->mnt_node);
rb_erase(&mnt->mnt_node, &ns->mounts);
RB_CLEAR_NODE(&mnt->mnt_node);
list_add_tail(&mnt->mnt_list, dt_list);
}
bool has_locked_children(struct mount *mnt, struct dentry *dentry);
struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mnt_ns, bool previous);
static inline struct mnt_namespace *lookup_next_mnt_ns(struct mnt_namespace *mntns)
{
return __lookup_next_mnt_ns(mntns, false);
}
static inline struct mnt_namespace *lookup_prev_mnt_ns(struct mnt_namespace *mntns)
{
return __lookup_next_mnt_ns(mntns, true);
}
struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mnt_ns,
bool previous);
static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
return container_of(ns, struct mnt_namespace, ns);

View File

@ -32,7 +32,6 @@
#include <linux/fs_context.h>
#include <linux/shmem_fs.h>
#include <linux/mnt_idmapping.h>
#include <linux/nospec.h>
#include "pnode.h"
#include "internal.h"
@ -66,12 +65,12 @@ static int __init set_mphash_entries(char *str)
__setup("mphash_entries=", set_mphash_entries);
static u64 event;
static DEFINE_IDA(mnt_id_ida);
static DEFINE_XARRAY_FLAGS(mnt_id_xa, XA_FLAGS_ALLOC);
static DEFINE_IDA(mnt_group_ida);
/* Don't allow confusion with old 32bit mount ID */
#define MNT_UNIQUE_ID_OFFSET (1ULL << 31)
static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET);
static u64 mnt_id_ctr = MNT_UNIQUE_ID_OFFSET;
static struct hlist_head *mount_hashtable __ro_after_init;
static struct hlist_head *mountpoint_hashtable __ro_after_init;
@ -79,8 +78,10 @@ static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static DEFINE_RWLOCK(mnt_ns_tree_lock);
static DEFINE_SEQLOCK(mnt_ns_tree_lock);
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
struct mount_kattr {
unsigned int attr_set;
@ -106,17 +107,6 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns)
{
u64 seq_b = ns->seq;
if (seq < seq_b)
return -1;
if (seq > seq_b)
return 1;
return 0;
}
static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
{
if (!node)
@ -124,24 +114,53 @@ static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
}
static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b)
static int mnt_ns_cmp(struct rb_node *a, const struct rb_node *b)
{
struct mnt_namespace *ns_a = node_to_mnt_ns(a);
struct mnt_namespace *ns_b = node_to_mnt_ns(b);
u64 seq_a = ns_a->seq;
u64 seq_b = ns_b->seq;
return mnt_ns_cmp(seq_a, ns_b) < 0;
if (seq_a < seq_b)
return -1;
if (seq_a > seq_b)
return 1;
return 0;
}
static inline void mnt_ns_tree_write_lock(void)
{
write_seqlock(&mnt_ns_tree_lock);
}
static inline void mnt_ns_tree_write_unlock(void)
{
write_sequnlock(&mnt_ns_tree_lock);
}
static void mnt_ns_tree_add(struct mnt_namespace *ns)
{
guard(write_lock)(&mnt_ns_tree_lock);
rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less);
struct rb_node *node, *prev;
mnt_ns_tree_write_lock();
node = rb_find_add_rcu(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_cmp);
/*
* If there's no previous entry simply add it after the
* head and if there is add it after the previous entry.
*/
prev = rb_prev(&ns->mnt_ns_tree_node);
if (!prev)
list_add_rcu(&ns->mnt_ns_list, &mnt_ns_list);
else
list_add_rcu(&ns->mnt_ns_list, &node_to_mnt_ns(prev)->mnt_ns_list);
mnt_ns_tree_write_unlock();
WARN_ON_ONCE(node);
}
static void mnt_ns_release(struct mnt_namespace *ns)
{
lockdep_assert_not_held(&mnt_ns_tree_lock);
lockdep_assert_not_held(&mnt_ns_tree_lock.lock);
/* keep alive for {list,stat}mount() */
if (refcount_dec_and_test(&ns->passive)) {
@ -151,41 +170,34 @@ static void mnt_ns_release(struct mnt_namespace *ns)
}
DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
static void mnt_ns_release_rcu(struct rcu_head *rcu)
{
mnt_ns_release(container_of(rcu, struct mnt_namespace, mnt_ns_rcu));
}
static void mnt_ns_tree_remove(struct mnt_namespace *ns)
{
/* remove from global mount namespace list */
if (!is_anon_ns(ns)) {
guard(write_lock)(&mnt_ns_tree_lock);
mnt_ns_tree_write_lock();
rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
list_bidir_del_rcu(&ns->mnt_ns_list);
mnt_ns_tree_write_unlock();
}
mnt_ns_release(ns);
call_rcu(&ns->mnt_ns_rcu, mnt_ns_release_rcu);
}
/*
* Returns the mount namespace which either has the specified id, or has the
* next smallest id afer the specified one.
*/
static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
static int mnt_ns_find(const void *key, const struct rb_node *node)
{
struct rb_node *node = mnt_ns_tree.rb_node;
struct mnt_namespace *ret = NULL;
const u64 mnt_ns_id = *(u64 *)key;
const struct mnt_namespace *ns = node_to_mnt_ns(node);
lockdep_assert_held(&mnt_ns_tree_lock);
while (node) {
struct mnt_namespace *n = node_to_mnt_ns(node);
if (mnt_ns_id <= n->seq) {
ret = node_to_mnt_ns(node);
if (mnt_ns_id == n->seq)
break;
node = node->rb_left;
} else {
node = node->rb_right;
}
}
return ret;
if (mnt_ns_id < ns->seq)
return -1;
if (mnt_ns_id > ns->seq)
return 1;
return 0;
}
/*
@ -195,18 +207,37 @@ static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
* namespace the @namespace_sem must first be acquired. If the namespace has
* already shut down before acquiring @namespace_sem, {list,stat}mount() will
* see that the mount rbtree of the namespace is empty.
*
* Note the lookup is lockless protected by a sequence counter. We only
* need to guard against false negatives as false positives aren't
* possible. So if we didn't find a mount namespace and the sequence
* counter has changed we need to retry. If the sequence counter is
* still the same we know the search actually failed.
*/
static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
{
struct mnt_namespace *ns;
struct mnt_namespace *ns;
struct rb_node *node;
unsigned int seq;
guard(read_lock)(&mnt_ns_tree_lock);
ns = mnt_ns_find_id_at(mnt_ns_id);
if (!ns || ns->seq != mnt_ns_id)
return NULL;
guard(rcu)();
do {
seq = read_seqbegin(&mnt_ns_tree_lock);
node = rb_find_rcu(&mnt_ns_id, &mnt_ns_tree, mnt_ns_find);
if (node)
break;
} while (read_seqretry(&mnt_ns_tree_lock, seq));
refcount_inc(&ns->passive);
return ns;
if (!node)
return NULL;
/*
* The last reference count is put with RCU delay so we can
* unconditonally acquire a reference here.
*/
ns = node_to_mnt_ns(node);
refcount_inc(&ns->passive);
return ns;
}
static inline void lock_mount_hash(void)
@ -236,18 +267,19 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry)
static int mnt_alloc_id(struct mount *mnt)
{
int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
int res;
if (res < 0)
return res;
mnt->mnt_id = res;
mnt->mnt_id_unique = atomic64_inc_return(&mnt_id_ctr);
return 0;
xa_lock(&mnt_id_xa);
res = __xa_alloc(&mnt_id_xa, &mnt->mnt_id, mnt, XA_LIMIT(1, INT_MAX), GFP_KERNEL);
if (!res)
mnt->mnt_id_unique = ++mnt_id_ctr;
xa_unlock(&mnt_id_xa);
return res;
}
static void mnt_free_id(struct mount *mnt)
{
ida_free(&mnt_id_ida, mnt->mnt_id);
xa_erase(&mnt_id_xa, mnt->mnt_id);
}
/*
@ -344,6 +376,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_HLIST_NODE(&mnt->mnt_mp_list);
INIT_LIST_HEAD(&mnt->mnt_umounting);
INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
RB_CLEAR_NODE(&mnt->mnt_node);
mnt->mnt.mnt_idmap = &nop_mnt_idmap;
}
return mnt;
@ -1123,19 +1156,27 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
{
struct rb_node **link = &ns->mounts.rb_node;
struct rb_node *parent = NULL;
bool mnt_first_node = true, mnt_last_node = true;
WARN_ON(mnt->mnt.mnt_flags & MNT_ONRB);
WARN_ON(mnt_ns_attached(mnt));
mnt->mnt_ns = ns;
while (*link) {
parent = *link;
if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique)
if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique) {
link = &parent->rb_left;
else
mnt_last_node = false;
} else {
link = &parent->rb_right;
mnt_first_node = false;
}
}
if (mnt_last_node)
ns->mnt_last_node = &mnt->mnt_node;
if (mnt_first_node)
ns->mnt_first_node = &mnt->mnt_node;
rb_link_node(&mnt->mnt_node, parent, link);
rb_insert_color(&mnt->mnt_node, &ns->mounts);
mnt->mnt.mnt_flags |= MNT_ONRB;
}
/*
@ -1305,7 +1346,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
}
mnt->mnt.mnt_flags = old->mnt.mnt_flags;
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL|MNT_ONRB);
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
atomic_inc(&sb->s_active);
mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));
@ -1763,7 +1804,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
/* Gather the mounts to umount */
for (p = mnt; p; p = next_mnt(p, mnt)) {
p->mnt.mnt_flags |= MNT_UMOUNT;
if (p->mnt.mnt_flags & MNT_ONRB)
if (mnt_ns_attached(p))
move_from_ns(p, &tmp_list);
else
list_move(&p->mnt_list, &tmp_list);
@ -1912,16 +1953,14 @@ static int do_umount(struct mount *mnt, int flags)
event++;
if (flags & MNT_DETACH) {
if (mnt->mnt.mnt_flags & MNT_ONRB ||
!list_empty(&mnt->mnt_list))
if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
umount_tree(mnt, UMOUNT_PROPAGATE);
retval = 0;
} else {
shrink_submounts(mnt);
retval = -EBUSY;
if (!propagate_mount_busy(mnt, 2)) {
if (mnt->mnt.mnt_flags & MNT_ONRB ||
!list_empty(&mnt->mnt_list))
if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
retval = 0;
}
@ -2071,30 +2110,34 @@ struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
return &mnt->ns;
}
struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mntns, bool previous)
struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool previous)
{
guard(read_lock)(&mnt_ns_tree_lock);
guard(rcu)();
for (;;) {
struct rb_node *node;
struct list_head *list;
if (previous)
node = rb_prev(&mntns->mnt_ns_tree_node);
list = rcu_dereference(list_bidir_prev_rcu(&mntns->mnt_ns_list));
else
node = rb_next(&mntns->mnt_ns_tree_node);
if (!node)
list = rcu_dereference(list_next_rcu(&mntns->mnt_ns_list));
if (list_is_head(list, &mnt_ns_list))
return ERR_PTR(-ENOENT);
mntns = node_to_mnt_ns(node);
node = &mntns->mnt_ns_tree_node;
mntns = list_entry_rcu(list, struct mnt_namespace, mnt_ns_list);
/*
* The last passive reference count is put with RCU
* delay so accessing the mount namespace is not just
* safe but all relevant members are still valid.
*/
if (!ns_capable_noaudit(mntns->user_ns, CAP_SYS_ADMIN))
continue;
/*
* Holding mnt_ns_tree_lock prevents the mount namespace from
* being freed but it may well be on it's deathbed. We want an
* active reference, not just a passive one here as we're
* persisting the mount namespace.
* We need an active reference count as we're persisting
* the mount namespace and it might already be on its
* deathbed.
*/
if (!refcount_inc_not_zero(&mntns->ns.count))
continue;
@ -3911,6 +3954,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
refcount_set(&new_ns->ns.count, 1);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
INIT_LIST_HEAD(&new_ns->mnt_ns_list);
RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns);
@ -3990,7 +4034,6 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(skip_mnt_tree(p), old);
}
mnt_ns_tree_add(new_ns);
namespace_unlock();
if (rootmnt)
@ -3998,6 +4041,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
if (pwdmnt)
mntput(pwdmnt);
mnt_ns_tree_add(new_ns);
return new_ns;
}
@ -5535,9 +5579,9 @@ static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
if (!last_mnt_id) {
if (reverse)
first = node_to_mount(rb_last(&ns->mounts));
first = node_to_mount(ns->mnt_last_node);
else
first = node_to_mount(rb_first(&ns->mounts));
first = node_to_mount(ns->mnt_first_node);
} else {
if (reverse)
first = mnt_find_id_at_reverse(ns, last_mnt_id - 1);

View File

@ -274,10 +274,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
if (usize < MNT_NS_INFO_SIZE_VER0)
return -EINVAL;
if (previous)
mnt_ns = lookup_prev_mnt_ns(to_mnt_ns(ns));
else
mnt_ns = lookup_next_mnt_ns(to_mnt_ns(ns));
mnt_ns = get_sequential_mnt_ns(to_mnt_ns(ns), previous);
if (IS_ERR(mnt_ns))
return PTR_ERR(mnt_ns);

View File

@ -50,7 +50,7 @@ struct path;
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | MNT_ONRB)
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
#define MNT_INTERNAL 0x4000
@ -64,7 +64,6 @@ struct path;
#define MNT_SYNC_UMOUNT 0x2000000
#define MNT_MARKED 0x4000000
#define MNT_UMOUNT 0x8000000
#define MNT_ONRB 0x10000000
struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */

View File

@ -30,6 +30,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
* way, we must not access it directly
*/
#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next)))
/*
* Return the ->prev pointer of a list_head in an rcu safe way. Don't
* access it directly.
*
* Any list traversed with list_bidir_prev_rcu() must never use
* list_del_rcu(). Doing so will poison the ->prev pointer that
* list_bidir_prev_rcu() relies on, which will result in segfaults.
* To prevent these segfaults, use list_bidir_del_rcu() instead
* of list_del_rcu().
*/
#define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev)))
/**
* list_tail_rcu - returns the prev pointer of the head of the list
@ -158,6 +169,39 @@ static inline void list_del_rcu(struct list_head *entry)
entry->prev = LIST_POISON2;
}
/**
* list_bidir_del_rcu - deletes entry from list without re-initialization
* @entry: the element to delete from the list.
*
* In contrast to list_del_rcu() doesn't poison the prev pointer thus
* allowing backwards traversal via list_bidir_prev_rcu().
*
* Note: list_empty() on entry does not return true after this because
* the entry is in a special undefined state that permits RCU-based
* lockfree reverse traversal. In particular this means that we can not
* poison the forward and backwards pointers that may still be used for
* walking the list.
*
* The caller must take whatever precautions are necessary (such as
* holding appropriate locks) to avoid racing with another list-mutation
* primitive, such as list_bidir_del_rcu() or list_add_rcu(), running on
* this same list. However, it is perfectly legal to run concurrently
* with the _rcu list-traversal primitives, such as
* list_for_each_entry_rcu().
*
* Note that list_del_rcu() and list_bidir_del_rcu() must not be used on
* the same list.
*
* Note that the caller is not permitted to immediately free
* the newly deleted entry. Instead, either synchronize_rcu()
* or call_rcu() must be used to defer freeing until an RCU
* grace period has elapsed.
*/
static inline void list_bidir_del_rcu(struct list_head *entry)
{
__list_del_entry(entry);
}
/**
* hlist_del_init_rcu - deletes entry from hash list with re-initialization
* @n: the element to delete from the hash list.

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
/test-fsmount
/test-list-all-mounts
/test-statx
/mountinfo

View File

@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
userprogs-always-y += test-fsmount test-statx mountinfo
userprogs-always-y += test-fsmount test-statx mountinfo test-list-all-mounts
userccflags += -I usr/include

View File

@ -0,0 +1,235 @@
// SPDX-License-Identifier: GPL-2.0-or-later
// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
#define _GNU_SOURCE
#include <errno.h>
#include <limits.h>
#include <linux/types.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include "../../tools/testing/selftests/pidfd/pidfd.h"
#define die_errno(format, ...) \
do { \
fprintf(stderr, "%m | %s: %d: %s: " format "\n", __FILE__, \
__LINE__, __func__, ##__VA_ARGS__); \
exit(EXIT_FAILURE); \
} while (0)
/* Get the id for a mount namespace */
#define NS_GET_MNTNS_ID _IO(0xb7, 0x5)
/* Get next mount namespace. */
struct mnt_ns_info {
__u32 size;
__u32 nr_mounts;
__u64 mnt_ns_id;
};
#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
/* Get information about namespace. */
#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info)
/* Get next namespace. */
#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info)
/* Get previous namespace. */
#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info)
#define PIDFD_GET_MNT_NAMESPACE _IO(0xFF, 3)
#ifndef __NR_listmount
#define __NR_listmount 458
#endif
#ifndef __NR_statmount
#define __NR_statmount 457
#endif
/* @mask bits for statmount(2) */
#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
struct statmount {
__u32 size;
__u32 mnt_opts;
__u64 mask;
__u32 sb_dev_major;
__u32 sb_dev_minor;
__u64 sb_magic;
__u32 sb_flags;
__u32 fs_type;
__u64 mnt_id;
__u64 mnt_parent_id;
__u32 mnt_id_old;
__u32 mnt_parent_id_old;
__u64 mnt_attr;
__u64 mnt_propagation;
__u64 mnt_peer_group;
__u64 mnt_master;
__u64 propagate_from;
__u32 mnt_root;
__u32 mnt_point;
__u64 mnt_ns_id;
__u64 __spare2[49];
char str[];
};
struct mnt_id_req {
__u32 size;
__u32 spare;
__u64 mnt_id;
__u64 param;
__u64 mnt_ns_id;
};
#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
#define LSMT_ROOT 0xffffffffffffffff /* root mount */
static int __statmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 mask,
struct statmount *stmnt, size_t bufsize,
unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER1,
.mnt_id = mnt_id,
.param = mask,
.mnt_ns_id = mnt_ns_id,
};
return syscall(__NR_statmount, &req, stmnt, bufsize, flags);
}
static struct statmount *sys_statmount(__u64 mnt_id, __u64 mnt_ns_id,
__u64 mask, unsigned int flags)
{
size_t bufsize = 1 << 15;
struct statmount *stmnt = NULL, *tmp = NULL;
int ret;
for (;;) {
tmp = realloc(stmnt, bufsize);
if (!tmp)
goto out;
stmnt = tmp;
ret = __statmount(mnt_id, mnt_ns_id, mask, stmnt, bufsize, flags);
if (!ret)
return stmnt;
if (errno != EOVERFLOW)
goto out;
bufsize <<= 1;
if (bufsize >= UINT_MAX / 2)
goto out;
}
out:
free(stmnt);
return NULL;
}
static ssize_t sys_listmount(__u64 mnt_id, __u64 last_mnt_id, __u64 mnt_ns_id,
__u64 list[], size_t num, unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER1,
.mnt_id = mnt_id,
.param = last_mnt_id,
.mnt_ns_id = mnt_ns_id,
};
return syscall(__NR_listmount, &req, list, num, flags);
}
int main(int argc, char *argv[])
{
#define LISTMNT_BUFFER 10
__u64 list[LISTMNT_BUFFER], last_mnt_id = 0;
int ret, pidfd, fd_mntns;
struct mnt_ns_info info = {};
pidfd = sys_pidfd_open(getpid(), 0);
if (pidfd < 0)
die_errno("pidfd_open failed");
fd_mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, 0);
if (fd_mntns < 0)
die_errno("ioctl(PIDFD_GET_MNT_NAMESPACE) failed");
ret = ioctl(fd_mntns, NS_MNT_GET_INFO, &info);
if (ret < 0)
die_errno("ioctl(NS_GET_MNTNS_ID) failed");
printf("Listing %u mounts for mount namespace %llu\n",
info.nr_mounts, info.mnt_ns_id);
for (;;) {
ssize_t nr_mounts;
next:
nr_mounts = sys_listmount(LSMT_ROOT, last_mnt_id,
info.mnt_ns_id, list, LISTMNT_BUFFER,
0);
if (nr_mounts <= 0) {
int fd_mntns_next;
printf("Finished listing %u mounts for mount namespace %llu\n\n",
info.nr_mounts, info.mnt_ns_id);
fd_mntns_next = ioctl(fd_mntns, NS_MNT_GET_NEXT, &info);
if (fd_mntns_next < 0) {
if (errno == ENOENT) {
printf("Finished listing all mount namespaces\n");
exit(0);
}
die_errno("ioctl(NS_MNT_GET_NEXT) failed");
}
close(fd_mntns);
fd_mntns = fd_mntns_next;
last_mnt_id = 0;
printf("Listing %u mounts for mount namespace %llu\n",
info.nr_mounts, info.mnt_ns_id);
goto next;
}
for (size_t cur = 0; cur < nr_mounts; cur++) {
struct statmount *stmnt;
last_mnt_id = list[cur];
stmnt = sys_statmount(last_mnt_id, info.mnt_ns_id,
STATMOUNT_SB_BASIC |
STATMOUNT_MNT_BASIC |
STATMOUNT_MNT_ROOT |
STATMOUNT_MNT_POINT |
STATMOUNT_MNT_NS_ID |
STATMOUNT_MNT_OPTS |
STATMOUNT_FS_TYPE, 0);
if (!stmnt) {
printf("Failed to statmount(%llu) in mount namespace(%llu)\n",
last_mnt_id, info.mnt_ns_id);
continue;
}
printf("mnt_id:\t\t%llu\nmnt_parent_id:\t%llu\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n\n",
stmnt->mnt_id,
stmnt->mnt_parent_id,
stmnt->str + stmnt->fs_type,
stmnt->str + stmnt->mnt_root,
stmnt->str + stmnt->mnt_point,
stmnt->str + stmnt->mnt_opts);
free(stmnt);
}
}
exit(0);
}

View File

@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
owner
pidns
iterate_mntns

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
TEST_GEN_PROGS := owner pidns
TEST_GEN_PROGS := owner pidns iterate_mntns
CFLAGS := -Wall -Werror
include ../lib.mk
include ../../lib.mk

View File

@ -0,0 +1,149 @@
// SPDX-License-Identifier: GPL-2.0-or-later
// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
#define _GNU_SOURCE
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <unistd.h>
#include "../../kselftest_harness.h"
#define MNT_NS_COUNT 11
#define MNT_NS_LAST_INDEX 10
struct mnt_ns_info {
__u32 size;
__u32 nr_mounts;
__u64 mnt_ns_id;
};
#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
/* Get information about namespace. */
#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info)
/* Get next namespace. */
#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info)
/* Get previous namespace. */
#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info)
FIXTURE(iterate_mount_namespaces) {
int fd_mnt_ns[MNT_NS_COUNT];
__u64 mnt_ns_id[MNT_NS_COUNT];
};
FIXTURE_SETUP(iterate_mount_namespaces)
{
for (int i = 0; i < MNT_NS_COUNT; i++)
self->fd_mnt_ns[i] = -EBADF;
/*
* Creating a new user namespace let's us guarantee that we only see
* mount namespaces that we did actually create.
*/
ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
for (int i = 0; i < MNT_NS_COUNT; i++) {
struct mnt_ns_info info = {};
ASSERT_EQ(unshare(CLONE_NEWNS), 0);
self->fd_mnt_ns[i] = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC);
ASSERT_GE(self->fd_mnt_ns[i], 0);
ASSERT_EQ(ioctl(self->fd_mnt_ns[i], NS_MNT_GET_INFO, &info), 0);
self->mnt_ns_id[i] = info.mnt_ns_id;
}
}
FIXTURE_TEARDOWN(iterate_mount_namespaces)
{
for (int i = 0; i < MNT_NS_COUNT; i++) {
if (self->fd_mnt_ns[i] < 0)
continue;
ASSERT_EQ(close(self->fd_mnt_ns[i]), 0);
}
}
TEST_F(iterate_mount_namespaces, iterate_all_forward)
{
int fd_mnt_ns_cur, count = 0;
fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC);
ASSERT_GE(fd_mnt_ns_cur, 0);
for (;; count++) {
struct mnt_ns_info info = {};
int fd_mnt_ns_next;
fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info);
if (fd_mnt_ns_next < 0 && errno == ENOENT)
break;
ASSERT_GE(fd_mnt_ns_next, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_next;
}
ASSERT_EQ(count, MNT_NS_LAST_INDEX);
}
TEST_F(iterate_mount_namespaces, iterate_all_backwards)
{
int fd_mnt_ns_cur, count = 0;
fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC);
ASSERT_GE(fd_mnt_ns_cur, 0);
for (;; count++) {
struct mnt_ns_info info = {};
int fd_mnt_ns_prev;
fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info);
if (fd_mnt_ns_prev < 0 && errno == ENOENT)
break;
ASSERT_GE(fd_mnt_ns_prev, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_prev;
}
ASSERT_EQ(count, MNT_NS_LAST_INDEX);
}
TEST_F(iterate_mount_namespaces, iterate_forward)
{
int fd_mnt_ns_cur;
ASSERT_EQ(setns(self->fd_mnt_ns[0], CLONE_NEWNS), 0);
fd_mnt_ns_cur = self->fd_mnt_ns[0];
for (int i = 1; i < MNT_NS_COUNT; i++) {
struct mnt_ns_info info = {};
int fd_mnt_ns_next;
fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info);
ASSERT_GE(fd_mnt_ns_next, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_next;
ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
}
}
TEST_F(iterate_mount_namespaces, iterate_backward)
{
int fd_mnt_ns_cur;
ASSERT_EQ(setns(self->fd_mnt_ns[MNT_NS_LAST_INDEX], CLONE_NEWNS), 0);
fd_mnt_ns_cur = self->fd_mnt_ns[MNT_NS_LAST_INDEX];
for (int i = MNT_NS_LAST_INDEX - 1; i >= 0; i--) {
struct mnt_ns_info info = {};
int fd_mnt_ns_prev;
fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info);
ASSERT_GE(fd_mnt_ns_prev, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_prev;
ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
}
}
TEST_HARNESS_MAIN

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-or-later
CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
TEST_GEN_PROGS := statmount_test statmount_test_ns
TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test
include ../../lib.mk

View File

@ -0,0 +1,66 @@
// SPDX-License-Identifier: GPL-2.0-or-later
// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
#define _GNU_SOURCE
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <unistd.h>
#include "statmount.h"
#include "../../kselftest_harness.h"
#ifndef LISTMOUNT_REVERSE
#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
#endif
#define LISTMNT_BUFFER 10
/* Check that all mount ids are in increasing order. */
TEST(listmount_forward)
{
uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0;
for (;;) {
ssize_t nr_mounts;
nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id,
list, LISTMNT_BUFFER, 0);
ASSERT_GE(nr_mounts, 0);
if (nr_mounts == 0)
break;
for (size_t cur = 0; cur < nr_mounts; cur++) {
if (cur < nr_mounts - 1)
ASSERT_LT(list[cur], list[cur + 1]);
last_mnt_id = list[cur];
}
}
}
/* Check that all mount ids are in decreasing order. */
TEST(listmount_backward)
{
uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0;
for (;;) {
ssize_t nr_mounts;
nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id,
list, LISTMNT_BUFFER, LISTMOUNT_REVERSE);
ASSERT_GE(nr_mounts, 0);
if (nr_mounts == 0)
break;
for (size_t cur = 0; cur < nr_mounts; cur++) {
if (cur < nr_mounts - 1)
ASSERT_GT(list[cur], list[cur + 1]);
last_mnt_id = list[cur];
}
}
}
TEST_HARNESS_MAIN

View File

@ -12,7 +12,6 @@
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
#include <sys/mount.h>
#include <sys/types.h>
#include <sys/wait.h>