mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-09 06:33:34 +00:00
fs.move_mount.move_mount_set_group.v5.15
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCYS3iBAAKCRCRxhvAZXjc olWeAP9CK0NMvXM4eZDQH8LZ7Bg3COvYoGhwuWFoLtHnvYHZ/AEA0jvoe8jH1ekK wYVkuquIE4Dw735mpjIOThByUUP3CQE= =+ham -----END PGP SIGNATURE----- Merge tag 'fs.move_mount.move_mount_set_group.v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux Pull move_mount updates from Christian Brauner: "This contains an extension to the move_mount() syscall making it possible to add a single private mount into an existing propagation tree. The use-case comes from the criu folks which have been struggling with restoring complex mount trees for a long time. Variations of this work have been discussed at Plumbers before, e.g. https://www.linuxplumbersconf.org/event/7/contributions/640/ The extension to move_mount() enables criu to restore any set of mount namespaces, mount trees and sharing group trees without introducing yet more complexity into mount propagation itself. The changes required to criu to make use of this and restore complex propagation trees are available at https://github.com/Snorch/criu/commits/mount-v2-poc A cleaned-up version of this will go up for merging into the main criu repo after this lands" * tag 'fs.move_mount.move_mount_set_group.v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: tests: add move_mount(MOVE_MOUNT_SET_GROUP) selftest move_mount: allow to add a mount into an existing group
This commit is contained in:
commit
1dd5915a5c
@ -2694,6 +2694,78 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int do_set_group(struct path *from_path, struct path *to_path)
|
||||
{
|
||||
struct mount *from, *to;
|
||||
int err;
|
||||
|
||||
from = real_mount(from_path->mnt);
|
||||
to = real_mount(to_path->mnt);
|
||||
|
||||
namespace_lock();
|
||||
|
||||
err = -EINVAL;
|
||||
/* To and From must be mounted */
|
||||
if (!is_mounted(&from->mnt))
|
||||
goto out;
|
||||
if (!is_mounted(&to->mnt))
|
||||
goto out;
|
||||
|
||||
err = -EPERM;
|
||||
/* We should be allowed to modify mount namespaces of both mounts */
|
||||
if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
|
||||
goto out;
|
||||
if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
|
||||
goto out;
|
||||
|
||||
err = -EINVAL;
|
||||
/* To and From paths should be mount roots */
|
||||
if (from_path->dentry != from_path->mnt->mnt_root)
|
||||
goto out;
|
||||
if (to_path->dentry != to_path->mnt->mnt_root)
|
||||
goto out;
|
||||
|
||||
/* Setting sharing groups is only allowed across same superblock */
|
||||
if (from->mnt.mnt_sb != to->mnt.mnt_sb)
|
||||
goto out;
|
||||
|
||||
/* From mount root should be wider than To mount root */
|
||||
if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
|
||||
goto out;
|
||||
|
||||
/* From mount should not have locked children in place of To's root */
|
||||
if (has_locked_children(from, to->mnt.mnt_root))
|
||||
goto out;
|
||||
|
||||
/* Setting sharing groups is only allowed on private mounts */
|
||||
if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
|
||||
goto out;
|
||||
|
||||
/* From should not be private */
|
||||
if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
|
||||
goto out;
|
||||
|
||||
if (IS_MNT_SLAVE(from)) {
|
||||
struct mount *m = from->mnt_master;
|
||||
|
||||
list_add(&to->mnt_slave, &m->mnt_slave_list);
|
||||
to->mnt_master = m;
|
||||
}
|
||||
|
||||
if (IS_MNT_SHARED(from)) {
|
||||
to->mnt_group_id = from->mnt_group_id;
|
||||
list_add(&to->mnt_share, &from->mnt_share);
|
||||
lock_mount_hash();
|
||||
set_mnt_shared(to);
|
||||
unlock_mount_hash();
|
||||
}
|
||||
|
||||
err = 0;
|
||||
out:
|
||||
namespace_unlock();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int do_move_mount(struct path *old_path, struct path *new_path)
|
||||
{
|
||||
struct mnt_namespace *ns;
|
||||
@ -3678,7 +3750,10 @@ SYSCALL_DEFINE5(move_mount,
|
||||
if (ret < 0)
|
||||
goto out_to;
|
||||
|
||||
ret = do_move_mount(&from_path, &to_path);
|
||||
if (flags & MOVE_MOUNT_SET_GROUP)
|
||||
ret = do_set_group(&from_path, &to_path);
|
||||
else
|
||||
ret = do_move_mount(&from_path, &to_path);
|
||||
|
||||
out_to:
|
||||
path_put(&to_path);
|
||||
|
@ -73,7 +73,8 @@
|
||||
#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
|
||||
#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
|
||||
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
|
||||
#define MOVE_MOUNT__MASK 0x00000077
|
||||
#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
|
||||
#define MOVE_MOUNT__MASK 0x00000177
|
||||
|
||||
/*
|
||||
* fsopen() flags.
|
||||
|
@ -35,6 +35,7 @@ TARGETS += memory-hotplug
|
||||
TARGETS += mincore
|
||||
TARGETS += mount
|
||||
TARGETS += mount_setattr
|
||||
TARGETS += move_mount_set_group
|
||||
TARGETS += mqueue
|
||||
TARGETS += nci
|
||||
TARGETS += net
|
||||
|
1
tools/testing/selftests/move_mount_set_group/.gitignore
vendored
Normal file
1
tools/testing/selftests/move_mount_set_group/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
move_mount_set_group_test
|
7
tools/testing/selftests/move_mount_set_group/Makefile
Normal file
7
tools/testing/selftests/move_mount_set_group/Makefile
Normal file
@ -0,0 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# Makefile for mount selftests.
|
||||
CFLAGS = -g -I../../../../usr/include/ -Wall -O2
|
||||
|
||||
TEST_GEN_FILES += move_mount_set_group_test
|
||||
|
||||
include ../lib.mk
|
1
tools/testing/selftests/move_mount_set_group/config
Normal file
1
tools/testing/selftests/move_mount_set_group/config
Normal file
@ -0,0 +1 @@
|
||||
CONFIG_USER_NS=y
|
@ -0,0 +1,375 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/wait.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdarg.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "../kselftest_harness.h"
|
||||
|
||||
#ifndef CLONE_NEWNS
|
||||
#define CLONE_NEWNS 0x00020000
|
||||
#endif
|
||||
|
||||
#ifndef CLONE_NEWUSER
|
||||
#define CLONE_NEWUSER 0x10000000
|
||||
#endif
|
||||
|
||||
#ifndef MS_SHARED
|
||||
#define MS_SHARED (1 << 20)
|
||||
#endif
|
||||
|
||||
#ifndef MS_PRIVATE
|
||||
#define MS_PRIVATE (1<<18)
|
||||
#endif
|
||||
|
||||
#ifndef MOVE_MOUNT_SET_GROUP
|
||||
#define MOVE_MOUNT_SET_GROUP 0x00000100
|
||||
#endif
|
||||
|
||||
#ifndef MOVE_MOUNT_F_EMPTY_PATH
|
||||
#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
|
||||
#endif
|
||||
|
||||
#ifndef MOVE_MOUNT_T_EMPTY_PATH
|
||||
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040
|
||||
#endif
|
||||
|
||||
static ssize_t write_nointr(int fd, const void *buf, size_t count)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
do {
|
||||
ret = write(fd, buf, count);
|
||||
} while (ret < 0 && errno == EINTR);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int write_file(const char *path, const void *buf, size_t count)
|
||||
{
|
||||
int fd;
|
||||
ssize_t ret;
|
||||
|
||||
fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
|
||||
if (fd < 0)
|
||||
return -1;
|
||||
|
||||
ret = write_nointr(fd, buf, count);
|
||||
close(fd);
|
||||
if (ret < 0 || (size_t)ret != count)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int create_and_enter_userns(void)
|
||||
{
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
char map[100];
|
||||
|
||||
uid = getuid();
|
||||
gid = getgid();
|
||||
|
||||
if (unshare(CLONE_NEWUSER))
|
||||
return -1;
|
||||
|
||||
if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
|
||||
errno != ENOENT)
|
||||
return -1;
|
||||
|
||||
snprintf(map, sizeof(map), "0 %d 1", uid);
|
||||
if (write_file("/proc/self/uid_map", map, strlen(map)))
|
||||
return -1;
|
||||
|
||||
|
||||
snprintf(map, sizeof(map), "0 %d 1", gid);
|
||||
if (write_file("/proc/self/gid_map", map, strlen(map)))
|
||||
return -1;
|
||||
|
||||
if (setgid(0))
|
||||
return -1;
|
||||
|
||||
if (setuid(0))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int prepare_unpriv_mountns(void)
|
||||
{
|
||||
if (create_and_enter_userns())
|
||||
return -1;
|
||||
|
||||
if (unshare(CLONE_NEWNS))
|
||||
return -1;
|
||||
|
||||
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char *get_field(char *src, int nfields)
|
||||
{
|
||||
int i;
|
||||
char *p = src;
|
||||
|
||||
for (i = 0; i < nfields; i++) {
|
||||
while (*p && *p != ' ' && *p != '\t')
|
||||
p++;
|
||||
|
||||
if (!*p)
|
||||
break;
|
||||
|
||||
p++;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static void null_endofword(char *word)
|
||||
{
|
||||
while (*word && *word != ' ' && *word != '\t')
|
||||
word++;
|
||||
*word = '\0';
|
||||
}
|
||||
|
||||
static bool is_shared_mount(const char *path)
|
||||
{
|
||||
size_t len = 0;
|
||||
char *line = NULL;
|
||||
FILE *f = NULL;
|
||||
|
||||
f = fopen("/proc/self/mountinfo", "re");
|
||||
if (!f)
|
||||
return false;
|
||||
|
||||
while (getline(&line, &len, f) != -1) {
|
||||
char *opts, *target;
|
||||
|
||||
target = get_field(line, 4);
|
||||
if (!target)
|
||||
continue;
|
||||
|
||||
opts = get_field(target, 2);
|
||||
if (!opts)
|
||||
continue;
|
||||
|
||||
null_endofword(target);
|
||||
|
||||
if (strcmp(target, path) != 0)
|
||||
continue;
|
||||
|
||||
null_endofword(opts);
|
||||
if (strstr(opts, "shared:"))
|
||||
return true;
|
||||
}
|
||||
|
||||
free(line);
|
||||
fclose(f);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Attempt to de-conflict with the selftests tree. */
|
||||
#ifndef SKIP
|
||||
#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define SET_GROUP_FROM "/tmp/move_mount_set_group_supported_from"
|
||||
#define SET_GROUP_TO "/tmp/move_mount_set_group_supported_to"
|
||||
|
||||
static int move_mount_set_group_supported(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
|
||||
"size=100000,mode=700"))
|
||||
return -1;
|
||||
|
||||
if (mount(NULL, "/tmp", NULL, MS_PRIVATE, 0))
|
||||
return -1;
|
||||
|
||||
if (mkdir(SET_GROUP_FROM, 0777))
|
||||
return -1;
|
||||
|
||||
if (mkdir(SET_GROUP_TO, 0777))
|
||||
return -1;
|
||||
|
||||
if (mount("testing", SET_GROUP_FROM, "tmpfs", MS_NOATIME | MS_NODEV,
|
||||
"size=100000,mode=700"))
|
||||
return -1;
|
||||
|
||||
if (mount(SET_GROUP_FROM, SET_GROUP_TO, NULL, MS_BIND, NULL))
|
||||
return -1;
|
||||
|
||||
if (mount(NULL, SET_GROUP_FROM, NULL, MS_SHARED, 0))
|
||||
return -1;
|
||||
|
||||
ret = syscall(SYS_move_mount, AT_FDCWD, SET_GROUP_FROM,
|
||||
AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP);
|
||||
umount2("/tmp", MNT_DETACH);
|
||||
|
||||
return ret < 0 ? false : true;
|
||||
}
|
||||
|
||||
FIXTURE(move_mount_set_group) {
|
||||
};
|
||||
|
||||
#define SET_GROUP_A "/tmp/A"
|
||||
|
||||
FIXTURE_SETUP(move_mount_set_group)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ASSERT_EQ(prepare_unpriv_mountns(), 0);
|
||||
|
||||
ret = move_mount_set_group_supported();
|
||||
ASSERT_GE(ret, 0);
|
||||
if (!ret)
|
||||
SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
|
||||
|
||||
umount2("/tmp", MNT_DETACH);
|
||||
|
||||
ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
|
||||
"size=100000,mode=700"), 0);
|
||||
|
||||
ASSERT_EQ(mkdir(SET_GROUP_A, 0777), 0);
|
||||
|
||||
ASSERT_EQ(mount("testing", SET_GROUP_A, "tmpfs", MS_NOATIME | MS_NODEV,
|
||||
"size=100000,mode=700"), 0);
|
||||
}
|
||||
|
||||
FIXTURE_TEARDOWN(move_mount_set_group)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = move_mount_set_group_supported();
|
||||
ASSERT_GE(ret, 0);
|
||||
if (!ret)
|
||||
SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
|
||||
|
||||
umount2("/tmp", MNT_DETACH);
|
||||
}
|
||||
|
||||
#define __STACK_SIZE (8 * 1024 * 1024)
|
||||
static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
|
||||
{
|
||||
void *stack;
|
||||
|
||||
stack = malloc(__STACK_SIZE);
|
||||
if (!stack)
|
||||
return -ENOMEM;
|
||||
|
||||
#ifdef __ia64__
|
||||
return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
|
||||
#else
|
||||
return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int wait_for_pid(pid_t pid)
|
||||
{
|
||||
int status, ret;
|
||||
|
||||
again:
|
||||
ret = waitpid(pid, &status, 0);
|
||||
if (ret == -1) {
|
||||
if (errno == EINTR)
|
||||
goto again;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!WIFEXITED(status))
|
||||
return -1;
|
||||
|
||||
return WEXITSTATUS(status);
|
||||
}
|
||||
|
||||
struct child_args {
|
||||
int unsfd;
|
||||
int mntnsfd;
|
||||
bool shared;
|
||||
int mntfd;
|
||||
};
|
||||
|
||||
static int get_nestedns_mount_cb(void *data)
|
||||
{
|
||||
struct child_args *ca = (struct child_args *)data;
|
||||
int ret;
|
||||
|
||||
ret = prepare_unpriv_mountns();
|
||||
if (ret)
|
||||
return 1;
|
||||
|
||||
if (ca->shared) {
|
||||
ret = mount(NULL, SET_GROUP_A, NULL, MS_SHARED, 0);
|
||||
if (ret)
|
||||
return 1;
|
||||
}
|
||||
|
||||
ret = open("/proc/self/ns/user", O_RDONLY);
|
||||
if (ret < 0)
|
||||
return 1;
|
||||
ca->unsfd = ret;
|
||||
|
||||
ret = open("/proc/self/ns/mnt", O_RDONLY);
|
||||
if (ret < 0)
|
||||
return 1;
|
||||
ca->mntnsfd = ret;
|
||||
|
||||
ret = open(SET_GROUP_A, O_RDONLY);
|
||||
if (ret < 0)
|
||||
return 1;
|
||||
ca->mntfd = ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
TEST_F(move_mount_set_group, complex_sharing_copying)
|
||||
{
|
||||
struct child_args ca_from = {
|
||||
.shared = true,
|
||||
};
|
||||
struct child_args ca_to = {
|
||||
.shared = false,
|
||||
};
|
||||
pid_t pid;
|
||||
int ret;
|
||||
|
||||
ret = move_mount_set_group_supported();
|
||||
ASSERT_GE(ret, 0);
|
||||
if (!ret)
|
||||
SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
|
||||
|
||||
pid = do_clone(get_nestedns_mount_cb, (void *)&ca_from, CLONE_VFORK |
|
||||
CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
|
||||
ASSERT_EQ(wait_for_pid(pid), 0);
|
||||
|
||||
pid = do_clone(get_nestedns_mount_cb, (void *)&ca_to, CLONE_VFORK |
|
||||
CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
|
||||
ASSERT_EQ(wait_for_pid(pid), 0);
|
||||
|
||||
ASSERT_EQ(syscall(SYS_move_mount, ca_from.mntfd, "",
|
||||
ca_to.mntfd, "", MOVE_MOUNT_SET_GROUP
|
||||
| MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH),
|
||||
0);
|
||||
|
||||
ASSERT_EQ(setns(ca_to.mntnsfd, CLONE_NEWNS), 0);
|
||||
ASSERT_EQ(is_shared_mount(SET_GROUP_A), 1);
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
Loading…
x
Reference in New Issue
Block a user