Merge patch series "Support foreign mount namespace with statmount/listmount"

Josef Bacik <josef@toxicpanda.com> says:

Currently the only way to iterate over mount entries in mount namespaces that
aren't your own is to trawl through /proc in order to find /proc/$PID/mountinfo
for the mount namespace that you want.  This is hugely inefficient, so extend
both statmount() and listmount() to allow specifying a mount namespace id in
order to get to mounts in other mount namespaces.

There are a few components to this

1. Having a global index of the mount namespace based on the ->seq value in the
   mount namespace.  This gives us a unique identifier that isn't re-used.
2. Support looking up mount namespaces based on that unique identifier, and
   validating the user has permission to access the given mount namespace.
3. Provide a new ioctl() on nsfs in order to extract the unique identifier we
   can use for statmount() and listmount().

The code is relatively straightforward, and there is a selftest provided to
validate everything works properly.

This is based on vfs.all as of last week, so must be applied onto a tree that
has Christians error handling rework in this area.  If you wish you can pull the
tree directly here

https://github.com/josefbacik/linux/tree/listmount.combined

Christian and I collaborated on this series, which is why there's patches from
both of us in this series.

Christian Brauner (4):
  fs: relax permissions for listmount()
  fs: relax permissions for statmount()
  fs: Allow listmount() in foreign mount namespace
  fs: Allow statmount() in foreign mount namespace

Josef Bacik (4):
  fs: keep an index of current mount namespaces
  fs: export the mount ns id via statmount
  fs: add an ioctl to get the mnt ns id from nsfs
  selftests: add a test for the foreign mnt ns extensions

fs/mount.h                                    |   2 +
 fs/namespace.c                                | 240 ++++++++++--
 fs/nsfs.c                                     |  14 +
 include/uapi/linux/mount.h                    |   6 +-
 include/uapi/linux/nsfs.h                     |   2 +
 .../selftests/filesystems/statmount/Makefile  |   2 +-
 .../filesystems/statmount/statmount.h         |  46 +++
 .../filesystems/statmount/statmount_test.c    |  53 +--
 .../filesystems/statmount/statmount_test_ns.c | 360 ++++++++++++++++++
 9 files changed, 659 insertions(+), 66 deletions(-)
 create mode 100644 tools/testing/selftests/filesystems/statmount/statmount.h
 create mode 100644 tools/testing/selftests/filesystems/statmount/statmount_test_ns.c

Link: https://lore.kernel.org/r/cover.1719243756.git.josef@toxicpanda.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2024-06-25 11:19:24 +02:00
commit a7ebb0fe43
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
9 changed files with 663 additions and 66 deletions

View File

@ -16,6 +16,8 @@ struct mnt_namespace {
u64 event; u64 event;
unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts; unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout; } __randomize_layout;
struct mnt_pcp { struct mnt_pcp {

View File

@ -78,6 +78,8 @@ static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem); static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static DEFINE_RWLOCK(mnt_ns_tree_lock);
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
struct mount_kattr { struct mount_kattr {
unsigned int attr_set; unsigned int attr_set;
@ -103,6 +105,109 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/ */
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns)
{
u64 seq_b = ns->seq;
if (seq < seq_b)
return -1;
if (seq > seq_b)
return 1;
return 0;
}
static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
{
if (!node)
return NULL;
return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
}
static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b)
{
struct mnt_namespace *ns_a = node_to_mnt_ns(a);
struct mnt_namespace *ns_b = node_to_mnt_ns(b);
u64 seq_a = ns_a->seq;
return mnt_ns_cmp(seq_a, ns_b) < 0;
}
static void mnt_ns_tree_add(struct mnt_namespace *ns)
{
guard(write_lock)(&mnt_ns_tree_lock);
rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less);
}
static void mnt_ns_release(struct mnt_namespace *ns)
{
lockdep_assert_not_held(&mnt_ns_tree_lock);
/* keep alive for {list,stat}mount() */
if (refcount_dec_and_test(&ns->passive)) {
put_user_ns(ns->user_ns);
kfree(ns);
}
}
DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
static void mnt_ns_tree_remove(struct mnt_namespace *ns)
{
/* remove from global mount namespace list */
if (!is_anon_ns(ns)) {
guard(write_lock)(&mnt_ns_tree_lock);
rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
}
mnt_ns_release(ns);
}
/*
* Returns the mount namespace which either has the specified id, or has the
* next smallest id afer the specified one.
*/
static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
{
struct rb_node *node = mnt_ns_tree.rb_node;
struct mnt_namespace *ret = NULL;
lockdep_assert_held(&mnt_ns_tree_lock);
while (node) {
struct mnt_namespace *n = node_to_mnt_ns(node);
if (mnt_ns_id <= n->seq) {
ret = node_to_mnt_ns(node);
if (mnt_ns_id == n->seq)
break;
node = node->rb_left;
} else {
node = node->rb_right;
}
}
return ret;
}
/*
* Lookup a mount namespace by id and take a passive reference count. Taking a
* passive reference means the mount namespace can be emptied if e.g., the last
* task holding an active reference exits. To access the mounts of the
* namespace the @namespace_sem must first be acquired. If the namespace has
* already shut down before acquiring @namespace_sem, {list,stat}mount() will
* see that the mount rbtree of the namespace is empty.
*/
static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
{
struct mnt_namespace *ns;
guard(read_lock)(&mnt_ns_tree_lock);
ns = mnt_ns_find_id_at(mnt_ns_id);
if (!ns || ns->seq != mnt_ns_id)
return NULL;
refcount_inc(&ns->passive);
return ns;
}
static inline void lock_mount_hash(void) static inline void lock_mount_hash(void)
{ {
write_seqlock(&mount_lock); write_seqlock(&mount_lock);
@ -3733,8 +3838,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
if (!is_anon_ns(ns)) if (!is_anon_ns(ns))
ns_free_inum(&ns->ns); ns_free_inum(&ns->ns);
dec_mnt_namespaces(ns->ucounts); dec_mnt_namespaces(ns->ucounts);
put_user_ns(ns->user_ns); mnt_ns_tree_remove(ns);
kfree(ns);
} }
/* /*
@ -3773,7 +3877,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
if (!anon) if (!anon)
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
refcount_set(&new_ns->ns.count, 1); refcount_set(&new_ns->ns.count, 1);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT; new_ns->mounts = RB_ROOT;
RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll); init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns); new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts; new_ns->ucounts = ucounts;
@ -3850,6 +3956,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
while (p->mnt.mnt_root != q->mnt.mnt_root) while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(skip_mnt_tree(p), old); p = next_mnt(skip_mnt_tree(p), old);
} }
mnt_ns_tree_add(new_ns);
namespace_unlock(); namespace_unlock();
if (rootmnt) if (rootmnt)
@ -4867,6 +4974,12 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq)
return 0; return 0;
} }
static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
{
s->sm.mask |= STATMOUNT_MNT_NS_ID;
s->sm.mnt_ns_id = ns->seq;
}
static int statmount_string(struct kstatmount *s, u64 flag) static int statmount_string(struct kstatmount *s, u64 flag)
{ {
int ret; int ret;
@ -4930,6 +5043,7 @@ static int copy_statmount_to_user(struct kstatmount *s)
static int do_statmount(struct kstatmount *s) static int do_statmount(struct kstatmount *s)
{ {
struct mount *m = real_mount(s->mnt); struct mount *m = real_mount(s->mnt);
struct mnt_namespace *ns = m->mnt_ns;
int err; int err;
/* /*
@ -4937,7 +5051,7 @@ static int do_statmount(struct kstatmount *s)
* mounts to show users. * mounts to show users.
*/ */
if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) && if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
!ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
err = security_sb_statfs(s->mnt->mnt_root); err = security_sb_statfs(s->mnt->mnt_root);
@ -4962,6 +5076,9 @@ static int do_statmount(struct kstatmount *s)
if (!err && s->mask & STATMOUNT_MNT_POINT) if (!err && s->mask & STATMOUNT_MNT_POINT)
err = statmount_string(s, STATMOUNT_MNT_POINT); err = statmount_string(s, STATMOUNT_MNT_POINT);
if (!err && s->mask & STATMOUNT_MNT_NS_ID)
statmount_mnt_ns_id(s, ns);
if (err) if (err)
return err; return err;
@ -5003,7 +5120,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
int ret; int ret;
size_t usize; size_t usize;
BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER0); BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER1);
ret = get_user(usize, &req->size); ret = get_user(usize, &req->size);
if (ret) if (ret)
@ -5021,10 +5138,63 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
return 0; return 0;
} }
static struct mount *listmnt_next(struct mount *curr, bool reverse)
{
struct rb_node *node;
if (reverse)
node = rb_prev(&curr->mnt_node);
else
node = rb_next(&curr->mnt_node);
return node_to_mount(node);
}
static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
{
struct mount *first;
rwsem_assert_held(&namespace_sem);
/* We're looking at our own ns, just use get_fs_root. */
if (ns == current->nsproxy->mnt_ns) {
get_fs_root(current->fs, root);
return 0;
}
/*
* We have to find the first mount in our ns and use that, however it
* may not exist, so handle that properly.
*/
if (RB_EMPTY_ROOT(&ns->mounts))
return -ENOENT;
first = listmnt_next(ns->root, false);
if (!first)
return -ENOENT;
root->mnt = mntget(&first->mnt);
root->dentry = dget(root->mnt->mnt_root);
return 0;
}
/*
* If the user requested a specific mount namespace id, look that up and return
* that, or if not simply grab a passive reference on our mount namespace and
* return that.
*/
static struct mnt_namespace *grab_requested_mnt_ns(u64 mnt_ns_id)
{
if (mnt_ns_id)
return lookup_mnt_ns(mnt_ns_id);
refcount_inc(&current->nsproxy->mnt_ns->passive);
return current->nsproxy->mnt_ns;
}
SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
struct statmount __user *, buf, size_t, bufsize, struct statmount __user *, buf, size_t, bufsize,
unsigned int, flags) unsigned int, flags)
{ {
struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct vfsmount *mnt; struct vfsmount *mnt;
struct mnt_id_req kreq; struct mnt_id_req kreq;
struct kstatmount ks; struct kstatmount ks;
@ -5039,13 +5209,28 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
if (ret) if (ret)
return ret; return ret;
ns = grab_requested_mnt_ns(kreq.mnt_ns_id);
if (!ns)
return -ENOENT;
if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -ENOENT;
retry: retry:
ret = prepare_kstatmount(&ks, &kreq, buf, bufsize, seq_size); ret = prepare_kstatmount(&ks, &kreq, buf, bufsize, seq_size);
if (ret) if (ret)
return ret; return ret;
down_read(&namespace_sem); down_read(&namespace_sem);
mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns); /* Has the namespace already been emptied? */
if (kreq.mnt_ns_id && RB_EMPTY_ROOT(&ns->mounts)) {
up_read(&namespace_sem);
kvfree(ks.seq.buf);
return -ENOENT;
}
mnt = lookup_mnt_in_ns(kreq.mnt_id, ns);
if (!mnt) { if (!mnt) {
up_read(&namespace_sem); up_read(&namespace_sem);
kvfree(ks.seq.buf); kvfree(ks.seq.buf);
@ -5053,7 +5238,12 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
} }
ks.mnt = mnt; ks.mnt = mnt;
get_fs_root(current->fs, &ks.root); ret = grab_requested_root(ns, &ks.root);
if (ret) {
up_read(&namespace_sem);
kvfree(ks.seq.buf);
return ret;
}
ret = do_statmount(&ks); ret = do_statmount(&ks);
path_put(&ks.root); path_put(&ks.root);
up_read(&namespace_sem); up_read(&namespace_sem);
@ -5066,30 +5256,21 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
return ret; return ret;
} }
static struct mount *listmnt_next(struct mount *curr, bool reverse) static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
{ u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids,
struct rb_node *node; bool reverse)
if (reverse)
node = rb_prev(&curr->mnt_node);
else
node = rb_next(&curr->mnt_node);
return node_to_mount(node);
}
static ssize_t do_listmount(u64 mnt_parent_id, u64 last_mnt_id, u64 *mnt_ids,
size_t nr_mnt_ids, bool reverse)
{ {
struct path root __free(path_put) = {}; struct path root __free(path_put) = {};
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct path orig; struct path orig;
struct mount *r, *first; struct mount *r, *first;
ssize_t ret; ssize_t ret;
rwsem_assert_held(&namespace_sem); rwsem_assert_held(&namespace_sem);
get_fs_root(current->fs, &root); ret = grab_requested_root(ns, &root);
if (ret)
return ret;
if (mnt_parent_id == LSMT_ROOT) { if (mnt_parent_id == LSMT_ROOT) {
orig = root; orig = root;
} else { } else {
@ -5104,7 +5285,7 @@ static ssize_t do_listmount(u64 mnt_parent_id, u64 last_mnt_id, u64 *mnt_ids,
* mounts to show users. * mounts to show users.
*/ */
if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &root) && if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &root) &&
!ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
ret = security_sb_statfs(orig.dentry); ret = security_sb_statfs(orig.dentry);
@ -5141,6 +5322,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
{ {
u64 *kmnt_ids __free(kvfree) = NULL; u64 *kmnt_ids __free(kvfree) = NULL;
const size_t maxcount = 1000000; const size_t maxcount = 1000000;
struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct mnt_id_req kreq; struct mnt_id_req kreq;
ssize_t ret; ssize_t ret;
@ -5167,8 +5349,16 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
if (!kmnt_ids) if (!kmnt_ids)
return -ENOMEM; return -ENOMEM;
ns = grab_requested_mnt_ns(kreq.mnt_ns_id);
if (!ns)
return -ENOENT;
if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -ENOENT;
scoped_guard(rwsem_read, &namespace_sem) scoped_guard(rwsem_read, &namespace_sem)
ret = do_listmount(kreq.mnt_id, kreq.param, kmnt_ids, ret = do_listmount(ns, kreq.mnt_id, kreq.param, kmnt_ids,
nr_mnt_ids, (flags & LISTMOUNT_REVERSE)); nr_mnt_ids, (flags & LISTMOUNT_REVERSE));
if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids))) if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids)))
@ -5204,6 +5394,8 @@ static void __init init_mount_tree(void)
set_fs_pwd(current->fs, &root); set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root); set_fs_root(current->fs, &root);
mnt_ns_tree_add(ns);
} }
void __init mnt_init(void) void __init mnt_init(void)

View File

@ -12,6 +12,7 @@
#include <linux/nsfs.h> #include <linux/nsfs.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include "mount.h"
#include "internal.h" #include "internal.h"
static struct vfsmount *nsfs_mnt; static struct vfsmount *nsfs_mnt;
@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
argp = (uid_t __user *) arg; argp = (uid_t __user *) arg;
uid = from_kuid_munged(current_user_ns(), user_ns->owner); uid = from_kuid_munged(current_user_ns(), user_ns->owner);
return put_user(uid, argp); return put_user(uid, argp);
case NS_GET_MNTNS_ID: {
struct mnt_namespace *mnt_ns;
__u64 __user *idp;
__u64 id;
if (ns->ops->type != CLONE_NEWNS)
return -EINVAL;
mnt_ns = container_of(ns, struct mnt_namespace, ns);
idp = (__u64 __user *)arg;
id = mnt_ns->seq;
return put_user(id, idp);
}
default: default:
return -ENOTTY; return -ENOTTY;
} }

View File

@ -172,7 +172,8 @@ struct statmount {
__u64 propagate_from; /* Propagation from in current namespace */ __u64 propagate_from; /* Propagation from in current namespace */
__u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */
__u32 mnt_point; /* [str] Mountpoint relative to current root */ __u32 mnt_point; /* [str] Mountpoint relative to current root */
__u64 __spare2[50]; __u64 mnt_ns_id; /* ID of the mount namespace */
__u64 __spare2[49];
char str[]; /* Variable size part containing strings */ char str[]; /* Variable size part containing strings */
}; };
@ -188,10 +189,12 @@ struct mnt_id_req {
__u32 spare; __u32 spare;
__u64 mnt_id; __u64 mnt_id;
__u64 param; __u64 param;
__u64 mnt_ns_id;
}; };
/* List of all mnt_id_req versions. */ /* List of all mnt_id_req versions. */
#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
/* /*
* @mask bits for statmount(2) * @mask bits for statmount(2)
@ -202,6 +205,7 @@ struct mnt_id_req {
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
/* /*
* Special @mnt_id values that can be passed to listmount * Special @mnt_id values that can be passed to listmount

View File

@ -15,5 +15,7 @@
#define NS_GET_NSTYPE _IO(NSIO, 0x3) #define NS_GET_NSTYPE _IO(NSIO, 0x3)
/* Get owner UID (in the caller's user namespace) for a user namespace */ /* Get owner UID (in the caller's user namespace) for a user namespace */
#define NS_GET_OWNER_UID _IO(NSIO, 0x4) #define NS_GET_OWNER_UID _IO(NSIO, 0x4)
/* Get the id for a mount namespace */
#define NS_GET_MNTNS_ID _IO(NSIO, 0x5)
#endif /* __LINUX_NSFS_H */ #endif /* __LINUX_NSFS_H */

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-or-later # SPDX-License-Identifier: GPL-2.0-or-later
CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
TEST_GEN_PROGS := statmount_test TEST_GEN_PROGS := statmount_test statmount_test_ns
include ../../lib.mk include ../../lib.mk

View File

@ -0,0 +1,46 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __STATMOUNT_H
#define __STATMOUNT_H
#include <stdint.h>
#include <linux/mount.h>
#include <asm/unistd.h>
static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
struct statmount *buf, size_t bufsize,
unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = mask,
};
if (mnt_ns_id) {
req.size = MNT_ID_REQ_SIZE_VER1;
req.mnt_ns_id = mnt_ns_id;
}
return syscall(__NR_statmount, &req, buf, bufsize, flags);
}
static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
uint64_t last_mnt_id, uint64_t list[], size_t num,
unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = last_mnt_id,
};
if (mnt_ns_id) {
req.size = MNT_ID_REQ_SIZE_VER1;
req.mnt_ns_id = mnt_ns_id;
}
return syscall(__NR_listmount, &req, list, num, flags);
}
#endif /* __STATMOUNT_H */

View File

@ -4,17 +4,15 @@
#include <assert.h> #include <assert.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h>
#include <sched.h> #include <sched.h>
#include <fcntl.h> #include <fcntl.h>
#include <sys/param.h> #include <sys/param.h>
#include <sys/mount.h> #include <sys/mount.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/statfs.h> #include <sys/statfs.h>
#include <linux/mount.h>
#include <linux/stat.h> #include <linux/stat.h>
#include <asm/unistd.h>
#include "statmount.h"
#include "../../kselftest.h" #include "../../kselftest.h"
static const char *const known_fs[] = { static const char *const known_fs[] = {
@ -36,18 +34,6 @@ static const char *const known_fs[] = {
"ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs",
"zonefs", NULL }; "zonefs", NULL };
static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf,
size_t bufsize, unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = mask,
};
return syscall(__NR_statmount, &req, buf, bufsize, flags);
}
static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
{ {
size_t bufsize = 1 << 15; size_t bufsize = 1 << 15;
@ -56,7 +42,7 @@ static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigne
int ret; int ret;
for (;;) { for (;;) {
ret = statmount(mnt_id, mask, tmp, bufsize, flags); ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags);
if (ret != -1) if (ret != -1)
break; break;
if (tofree) if (tofree)
@ -122,7 +108,6 @@ static int orig_root;
static uint64_t root_id, parent_id; static uint64_t root_id, parent_id;
static uint32_t old_root_id, old_parent_id; static uint32_t old_root_id, old_parent_id;
static void cleanup_namespace(void) static void cleanup_namespace(void)
{ {
fchdir(orig_root); fchdir(orig_root);
@ -138,7 +123,7 @@ static void setup_namespace(void)
uid_t uid = getuid(); uid_t uid = getuid();
gid_t gid = getgid(); gid_t gid = getgid();
ret = unshare(CLONE_NEWNS|CLONE_NEWUSER); ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
if (ret == -1) if (ret == -1)
ksft_exit_fail_msg("unsharing mountns and userns: %s\n", ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
strerror(errno)); strerror(errno));
@ -208,25 +193,13 @@ static int setup_mount_tree(int log2_num)
return 0; return 0;
} }
static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id,
uint64_t list[], size_t num, unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = last_mnt_id,
};
return syscall(__NR_listmount, &req, list, num, flags);
}
static void test_listmount_empty_root(void) static void test_listmount_empty_root(void)
{ {
ssize_t res; ssize_t res;
const unsigned int size = 32; const unsigned int size = 32;
uint64_t list[size]; uint64_t list[size];
res = listmount(LSMT_ROOT, 0, list, size, 0); res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
if (res == -1) { if (res == -1) {
ksft_test_result_fail("listmount: %s\n", strerror(errno)); ksft_test_result_fail("listmount: %s\n", strerror(errno));
return; return;
@ -251,7 +224,7 @@ static void test_statmount_zero_mask(void)
struct statmount sm; struct statmount sm;
int ret; int ret;
ret = statmount(root_id, 0, &sm, sizeof(sm), 0); ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0);
if (ret == -1) { if (ret == -1) {
ksft_test_result_fail("statmount zero mask: %s\n", ksft_test_result_fail("statmount zero mask: %s\n",
strerror(errno)); strerror(errno));
@ -277,7 +250,7 @@ static void test_statmount_mnt_basic(void)
int ret; int ret;
uint64_t mask = STATMOUNT_MNT_BASIC; uint64_t mask = STATMOUNT_MNT_BASIC;
ret = statmount(root_id, mask, &sm, sizeof(sm), 0); ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) { if (ret == -1) {
ksft_test_result_fail("statmount mnt basic: %s\n", ksft_test_result_fail("statmount mnt basic: %s\n",
strerror(errno)); strerror(errno));
@ -337,7 +310,7 @@ static void test_statmount_sb_basic(void)
struct statx sx; struct statx sx;
struct statfs sf; struct statfs sf;
ret = statmount(root_id, mask, &sm, sizeof(sm), 0); ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) { if (ret == -1) {
ksft_test_result_fail("statmount sb basic: %s\n", ksft_test_result_fail("statmount sb basic: %s\n",
strerror(errno)); strerror(errno));
@ -498,14 +471,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
exactsize = sm->size; exactsize = sm->size;
shortsize = sizeof(*sm) + i; shortsize = sizeof(*sm) + i;
ret = statmount(root_id, mask, sm, exactsize, 0); ret = statmount(root_id, 0, mask, sm, exactsize, 0);
if (ret == -1) { if (ret == -1) {
ksft_test_result_fail("statmount exact size: %s\n", ksft_test_result_fail("statmount exact size: %s\n",
strerror(errno)); strerror(errno));
goto out; goto out;
} }
errno = 0; errno = 0;
ret = statmount(root_id, mask, sm, shortsize, 0); ret = statmount(root_id, 0, mask, sm, shortsize, 0);
if (ret != -1 || errno != EOVERFLOW) { if (ret != -1 || errno != EOVERFLOW) {
ksft_test_result_fail("should have failed with EOVERFLOW: %s\n", ksft_test_result_fail("should have failed with EOVERFLOW: %s\n",
strerror(errno)); strerror(errno));
@ -533,7 +506,7 @@ static void test_listmount_tree(void)
if (res == -1) if (res == -1)
return; return;
num = res = listmount(LSMT_ROOT, 0, list, size, 0); num = res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
if (res == -1) { if (res == -1) {
ksft_test_result_fail("listmount: %s\n", strerror(errno)); ksft_test_result_fail("listmount: %s\n", strerror(errno));
return; return;
@ -545,7 +518,7 @@ static void test_listmount_tree(void)
} }
for (i = 0; i < size - step;) { for (i = 0; i < size - step;) {
res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0); res = listmount(LSMT_ROOT, 0, i ? list2[i - 1] : 0, list2 + i, step, 0);
if (res == -1) if (res == -1)
ksft_test_result_fail("short listmount: %s\n", ksft_test_result_fail("short listmount: %s\n",
strerror(errno)); strerror(errno));
@ -577,11 +550,11 @@ int main(void)
int ret; int ret;
uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC |
STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT | STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT |
STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE; STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE | STATMOUNT_MNT_NS_ID;
ksft_print_header(); ksft_print_header();
ret = statmount(0, 0, NULL, 0, 0); ret = statmount(0, 0, 0, NULL, 0, 0);
assert(ret == -1); assert(ret == -1);
if (errno == ENOSYS) if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n"); ksft_exit_skip("statmount() syscall not supported\n");

View File

@ -0,0 +1,364 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#define _GNU_SOURCE
#include <assert.h>
#include <fcntl.h>
#include <limits.h>
#include <sched.h>
#include <stdlib.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <linux/nsfs.h>
#include <linux/stat.h>
#include "statmount.h"
#include "../../kselftest.h"
#define NSID_PASS 0
#define NSID_FAIL 1
#define NSID_SKIP 2
#define NSID_ERROR 3
static void handle_result(int ret, const char *testname)
{
if (ret == NSID_PASS)
ksft_test_result_pass("%s\n", testname);
else if (ret == NSID_FAIL)
ksft_test_result_fail("%s\n", testname);
else if (ret == NSID_ERROR)
ksft_exit_fail_msg("%s\n", testname);
else
ksft_test_result_skip("%s\n", testname);
}
static inline int wait_for_pid(pid_t pid)
{
int status, ret;
again:
ret = waitpid(pid, &status, 0);
if (ret == -1) {
if (errno == EINTR)
goto again;
ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
return -1;
}
if (!WIFEXITED(status)) {
ksft_print_msg(
"waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
WIFSIGNALED(status), WTERMSIG(status));
return -1;
}
ret = WEXITSTATUS(status);
return ret;
}
static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id)
{
int fd = open(mnt_ns, O_RDONLY);
if (fd < 0) {
ksft_print_msg("failed to open for ns %s: %s\n",
mnt_ns, strerror(errno));
sleep(60);
return NSID_ERROR;
}
if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) {
ksft_print_msg("failed to get the nsid for ns %s: %s\n",
mnt_ns, strerror(errno));
return NSID_ERROR;
}
close(fd);
return NSID_PASS;
}
static int get_mnt_id(const char *path, uint64_t *mnt_id)
{
struct statx sx;
int ret;
ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
if (ret == -1) {
ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
strerror(errno));
return NSID_ERROR;
}
if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
ksft_print_msg("no unique mount ID available for %s\n", path);
return NSID_ERROR;
}
*mnt_id = sx.stx_mnt_id;
return NSID_PASS;
}
static int write_file(const char *path, const char *val)
{
int fd = open(path, O_WRONLY);
size_t len = strlen(val);
int ret;
if (fd == -1) {
ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
return NSID_ERROR;
}
ret = write(fd, val, len);
if (ret == -1) {
ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
return NSID_ERROR;
}
if (ret != len) {
ksft_print_msg("short write to %s\n", path);
return NSID_ERROR;
}
ret = close(fd);
if (ret == -1) {
ksft_print_msg("closing %s\n", path);
return NSID_ERROR;
}
return NSID_PASS;
}
static int setup_namespace(void)
{
int ret;
char buf[32];
uid_t uid = getuid();
gid_t gid = getgid();
ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
if (ret == -1)
ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
strerror(errno));
sprintf(buf, "0 %d 1", uid);
ret = write_file("/proc/self/uid_map", buf);
if (ret != NSID_PASS)
return ret;
ret = write_file("/proc/self/setgroups", "deny");
if (ret != NSID_PASS)
return ret;
sprintf(buf, "0 %d 1", gid);
ret = write_file("/proc/self/gid_map", buf);
if (ret != NSID_PASS)
return ret;
ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (ret == -1) {
ksft_print_msg("making mount tree private: %s\n",
strerror(errno));
return NSID_ERROR;
}
return NSID_PASS;
}
static int _test_statmount_mnt_ns_id(void)
{
struct statmount sm;
uint64_t mnt_ns_id;
uint64_t root_id;
int ret;
ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id);
if (ret != NSID_PASS)
return ret;
ret = get_mnt_id("/", &root_id);
if (ret != NSID_PASS)
return ret;
ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
return NSID_ERROR;
}
if (sm.size != sizeof(sm)) {
ksft_print_msg("unexpected size: %u != %u\n", sm.size,
(uint32_t)sizeof(sm));
return NSID_FAIL;
}
if (sm.mask != STATMOUNT_MNT_NS_ID) {
ksft_print_msg("statmount mnt ns id unavailable\n");
return NSID_SKIP;
}
if (sm.mnt_ns_id != mnt_ns_id) {
ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n",
(unsigned long long)sm.mnt_ns_id,
(unsigned long long)mnt_ns_id);
return NSID_FAIL;
}
return NSID_PASS;
}
static void test_statmount_mnt_ns_id(void)
{
pid_t pid;
int ret;
pid = fork();
if (pid < 0)
ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
/* We're the original pid, wait for the result. */
if (pid != 0) {
ret = wait_for_pid(pid);
handle_result(ret, "test statmount ns id");
return;
}
ret = setup_namespace();
if (ret != NSID_PASS)
exit(ret);
ret = _test_statmount_mnt_ns_id();
exit(ret);
}
static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts)
{
uint64_t list[256];
uint64_t mnt_ns_id;
uint64_t nr_mounts;
char buf[256];
int ret;
/* Get the mount ns id for our child. */
snprintf(buf, sizeof(buf), "/proc/%lu/ns/mnt", (unsigned long)pid);
ret = get_mnt_ns_id(buf, &mnt_ns_id);
nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0);
if (nr_mounts == (uint64_t)-1) {
ksft_print_msg("listmount: %s\n", strerror(errno));
return NSID_ERROR;
}
if (nr_mounts != child_nr_mounts) {
ksft_print_msg("listmount results is %zi != %zi\n", nr_mounts,
child_nr_mounts);
return NSID_FAIL;
}
/* Validate that all of our entries match our mnt_ns_id. */
for (int i = 0; i < nr_mounts; i++) {
struct statmount sm;
ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm,
sizeof(sm), 0);
if (ret < 0) {
ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
return NSID_ERROR;
}
if (sm.mask != STATMOUNT_MNT_NS_ID) {
ksft_print_msg("statmount mnt ns id unavailable\n");
return NSID_SKIP;
}
if (sm.mnt_ns_id != mnt_ns_id) {
ksft_print_msg("listmount gave us the wrong ns id: 0x%llx != 0x%llx\n",
(unsigned long long)sm.mnt_ns_id,
(unsigned long long)mnt_ns_id);
return NSID_FAIL;
}
}
return NSID_PASS;
}
static void test_listmount_ns(void)
{
uint64_t nr_mounts;
char pval;
int child_ready_pipe[2];
int parent_ready_pipe[2];
pid_t pid;
int ret, child_ret;
if (pipe(child_ready_pipe) < 0)
ksft_exit_fail_msg("failed to create the child pipe: %s\n",
strerror(errno));
if (pipe(parent_ready_pipe) < 0)
ksft_exit_fail_msg("failed to create the parent pipe: %s\n",
strerror(errno));
pid = fork();
if (pid < 0)
ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
if (pid == 0) {
char cval;
uint64_t list[256];
close(child_ready_pipe[0]);
close(parent_ready_pipe[1]);
ret = setup_namespace();
if (ret != NSID_PASS)
exit(ret);
nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 256, 0);
if (nr_mounts == (uint64_t)-1) {
ksft_print_msg("listmount: %s\n", strerror(errno));
exit(NSID_FAIL);
}
/*
* Tell our parent how many mounts we have, and then wait for it
* to tell us we're done.
*/
write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts));
read(parent_ready_pipe[0], &cval, sizeof(cval));
exit(NSID_PASS);
}
close(child_ready_pipe[1]);
close(parent_ready_pipe[0]);
/* Wait until the child has created everything. */
if (read(child_ready_pipe[0], &nr_mounts, sizeof(nr_mounts)) !=
sizeof(nr_mounts))
ret = NSID_ERROR;
ret = validate_external_listmount(pid, nr_mounts);
if (write(parent_ready_pipe[1], &pval, sizeof(pval)) != sizeof(pval))
ret = NSID_ERROR;
child_ret = wait_for_pid(pid);
if (child_ret != NSID_PASS)
ret = child_ret;
handle_result(ret, "test listmount ns id");
}
int main(void)
{
int ret;
ksft_print_header();
ret = statmount(0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");
ksft_set_plan(2);
test_statmount_mnt_ns_id();
test_listmount_ns();
if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
ksft_exit_fail();
else
ksft_exit_pass();
}