mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 02:05:33 +00:00
Merge branch 'vfs-6.14.pidfs' into vfs.all
This commit is contained in:
commit
e206d842e9
115
fs/fhandle.c
115
fs/fhandle.c
@ -187,17 +187,6 @@ static int get_path_from_fd(int fd, struct path *root)
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum handle_to_path_flags {
|
||||
HANDLE_CHECK_PERMS = (1 << 0),
|
||||
HANDLE_CHECK_SUBTREE = (1 << 1),
|
||||
};
|
||||
|
||||
struct handle_to_path_ctx {
|
||||
struct path root;
|
||||
enum handle_to_path_flags flags;
|
||||
unsigned int fh_flags;
|
||||
};
|
||||
|
||||
static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
|
||||
{
|
||||
struct handle_to_path_ctx *ctx = context;
|
||||
@ -261,50 +250,55 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
|
||||
{
|
||||
int handle_dwords;
|
||||
struct vfsmount *mnt = ctx->root.mnt;
|
||||
struct dentry *dentry;
|
||||
|
||||
/* change the handle size to multiple of sizeof(u32) */
|
||||
handle_dwords = handle->handle_bytes >> 2;
|
||||
path->dentry = exportfs_decode_fh_raw(mnt,
|
||||
(struct fid *)handle->f_handle,
|
||||
handle_dwords, handle->handle_type,
|
||||
ctx->fh_flags,
|
||||
vfs_dentry_acceptable, ctx);
|
||||
if (IS_ERR_OR_NULL(path->dentry)) {
|
||||
if (path->dentry == ERR_PTR(-ENOMEM))
|
||||
dentry = exportfs_decode_fh_raw(mnt, (struct fid *)handle->f_handle,
|
||||
handle_dwords, handle->handle_type,
|
||||
ctx->fh_flags, vfs_dentry_acceptable,
|
||||
ctx);
|
||||
if (IS_ERR_OR_NULL(dentry)) {
|
||||
if (dentry == ERR_PTR(-ENOMEM))
|
||||
return -ENOMEM;
|
||||
return -ESTALE;
|
||||
}
|
||||
path->dentry = dentry;
|
||||
path->mnt = mntget(mnt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow relaxed permissions of file handles if the caller has the
|
||||
* ability to mount the filesystem or create a bind-mount of the
|
||||
* provided @mountdirfd.
|
||||
*
|
||||
* In both cases the caller may be able to get an unobstructed way to
|
||||
* the encoded file handle. If the caller is only able to create a
|
||||
* bind-mount we need to verify that there are no locked mounts on top
|
||||
* of it that could prevent us from getting to the encoded file.
|
||||
*
|
||||
* In principle, locked mounts can prevent the caller from mounting the
|
||||
* filesystem but that only applies to procfs and sysfs neither of which
|
||||
* support decoding file handles.
|
||||
*/
|
||||
static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
|
||||
unsigned int o_flags)
|
||||
static inline int may_decode_fh(struct handle_to_path_ctx *ctx,
|
||||
unsigned int o_flags)
|
||||
{
|
||||
struct path *root = &ctx->root;
|
||||
|
||||
if (capable(CAP_DAC_READ_SEARCH))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Restrict to O_DIRECTORY to provide a deterministic API that avoids a
|
||||
* confusing api in the face of disconnected non-dir dentries.
|
||||
* Allow relaxed permissions of file handles if the caller has
|
||||
* the ability to mount the filesystem or create a bind-mount of
|
||||
* the provided @mountdirfd.
|
||||
*
|
||||
* In both cases the caller may be able to get an unobstructed
|
||||
* way to the encoded file handle. If the caller is only able to
|
||||
* create a bind-mount we need to verify that there are no
|
||||
* locked mounts on top of it that could prevent us from getting
|
||||
* to the encoded file.
|
||||
*
|
||||
* In principle, locked mounts can prevent the caller from
|
||||
* mounting the filesystem but that only applies to procfs and
|
||||
* sysfs neither of which support decoding file handles.
|
||||
*
|
||||
* Restrict to O_DIRECTORY to provide a deterministic API that
|
||||
* avoids a confusing api in the face of disconnected non-dir
|
||||
* dentries.
|
||||
*
|
||||
* There's only one dentry for each directory inode (VFS rule)...
|
||||
*/
|
||||
if (!(o_flags & O_DIRECTORY))
|
||||
return false;
|
||||
return -EPERM;
|
||||
|
||||
if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
|
||||
ctx->flags = HANDLE_CHECK_PERMS;
|
||||
@ -314,14 +308,14 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
|
||||
!has_locked_children(real_mount(root->mnt), root->dentry))
|
||||
ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
|
||||
else
|
||||
return false;
|
||||
return -EPERM;
|
||||
|
||||
/* Are we able to override DAC permissions? */
|
||||
if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
|
||||
return false;
|
||||
return -EPERM;
|
||||
|
||||
ctx->fh_flags = EXPORT_FH_DIR_ONLY;
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
|
||||
@ -331,15 +325,19 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
|
||||
struct file_handle f_handle;
|
||||
struct file_handle *handle = NULL;
|
||||
struct handle_to_path_ctx ctx = {};
|
||||
const struct export_operations *eops;
|
||||
|
||||
retval = get_path_from_fd(mountdirfd, &ctx.root);
|
||||
if (retval)
|
||||
goto out_err;
|
||||
|
||||
if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
|
||||
retval = -EPERM;
|
||||
eops = ctx.root.mnt->mnt_sb->s_export_op;
|
||||
if (eops && eops->permission)
|
||||
retval = eops->permission(&ctx, o_flags);
|
||||
else
|
||||
retval = may_decode_fh(&ctx, o_flags);
|
||||
if (retval)
|
||||
goto out_path;
|
||||
}
|
||||
|
||||
if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
|
||||
retval = -EFAULT;
|
||||
@ -398,29 +396,28 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
|
||||
int open_flag)
|
||||
{
|
||||
long retval = 0;
|
||||
struct path path;
|
||||
struct path path __free(path_put) = {};
|
||||
struct file *file;
|
||||
int fd;
|
||||
const struct export_operations *eops;
|
||||
|
||||
retval = handle_to_path(mountdirfd, ufh, &path, open_flag);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
fd = get_unused_fd_flags(open_flag);
|
||||
if (fd < 0) {
|
||||
path_put(&path);
|
||||
CLASS(get_unused_fd, fd)(O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
}
|
||||
file = file_open_root(&path, "", open_flag, 0);
|
||||
if (IS_ERR(file)) {
|
||||
put_unused_fd(fd);
|
||||
retval = PTR_ERR(file);
|
||||
} else {
|
||||
retval = fd;
|
||||
fd_install(fd, file);
|
||||
}
|
||||
path_put(&path);
|
||||
return retval;
|
||||
|
||||
eops = path.mnt->mnt_sb->s_export_op;
|
||||
if (eops->open)
|
||||
file = eops->open(&path, open_flag);
|
||||
else
|
||||
file = file_open_root(&path, "", open_flag, 0);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
fd_install(fd, file);
|
||||
return take_fd(fd);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -673,6 +673,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
|
||||
s->s_blocksize_bits = PAGE_SHIFT;
|
||||
s->s_magic = ctx->magic;
|
||||
s->s_op = ctx->ops ?: &simple_super_operations;
|
||||
s->s_export_op = ctx->eops;
|
||||
s->s_xattr = ctx->xattr;
|
||||
s->s_time_gran = 1;
|
||||
root = new_inode(s);
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include <linux/fs_context.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/mnt_idmapping.h>
|
||||
#include <linux/pidfs.h>
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include "pnode.h"
|
||||
@ -2736,8 +2737,13 @@ static struct mount *__do_loopback(struct path *old_path, int recurse)
|
||||
if (IS_MNT_UNBINDABLE(old))
|
||||
return mnt;
|
||||
|
||||
if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
|
||||
return mnt;
|
||||
if (!check_mnt(old)) {
|
||||
const struct dentry_operations *d_op = old_path->dentry->d_op;
|
||||
|
||||
if (d_op != &ns_dentry_operations &&
|
||||
d_op != &pidfs_dentry_operations)
|
||||
return mnt;
|
||||
}
|
||||
|
||||
if (!recurse && has_locked_children(old, old_path->dentry))
|
||||
return mnt;
|
||||
|
298
fs/pidfs.c
298
fs/pidfs.c
@ -1,5 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/exportfs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/cgroup.h>
|
||||
@ -23,6 +24,97 @@
|
||||
#include "internal.h"
|
||||
#include "mount.h"
|
||||
|
||||
static struct rb_root pidfs_ino_tree = RB_ROOT;
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
static inline unsigned long pidfs_ino(u64 ino)
|
||||
{
|
||||
return lower_32_bits(ino);
|
||||
}
|
||||
|
||||
/* On 32 bit the generation number are the upper 32 bits. */
|
||||
static inline u32 pidfs_gen(u64 ino)
|
||||
{
|
||||
return upper_32_bits(ino);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* On 64 bit simply return ino. */
|
||||
static inline unsigned long pidfs_ino(u64 ino)
|
||||
{
|
||||
return ino;
|
||||
}
|
||||
|
||||
/* On 64 bit the generation number is 0. */
|
||||
static inline u32 pidfs_gen(u64 ino)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int pidfs_ino_cmp(struct rb_node *a, const struct rb_node *b)
|
||||
{
|
||||
struct pid *pid_a = rb_entry(a, struct pid, pidfs_node);
|
||||
struct pid *pid_b = rb_entry(b, struct pid, pidfs_node);
|
||||
u64 pid_ino_a = pid_a->ino;
|
||||
u64 pid_ino_b = pid_b->ino;
|
||||
|
||||
if (pid_ino_a < pid_ino_b)
|
||||
return -1;
|
||||
if (pid_ino_a > pid_ino_b)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pidfs_add_pid(struct pid *pid)
|
||||
{
|
||||
static u64 pidfs_ino_nr = 2;
|
||||
|
||||
/*
|
||||
* On 64 bit nothing special happens. The 64bit number assigned
|
||||
* to struct pid is the inode number.
|
||||
*
|
||||
* On 32 bit the 64 bit number assigned to struct pid is split
|
||||
* into two 32 bit numbers. The lower 32 bits are used as the
|
||||
* inode number and the upper 32 bits are used as the inode
|
||||
* generation number.
|
||||
*
|
||||
* On 32 bit pidfs_ino() will return the lower 32 bit. When
|
||||
* pidfs_ino() returns zero a wrap around happened. When a
|
||||
* wraparound happens the 64 bit number will be incremented by 2
|
||||
* so inode numbering starts at 2 again.
|
||||
*
|
||||
* On 64 bit comparing two pidfds is as simple as comparing
|
||||
* inode numbers.
|
||||
*
|
||||
* When a wraparound happens on 32 bit multiple pidfds with the
|
||||
* same inode number are likely to exist (This isn't a problem
|
||||
* since before pidfs pidfds used the anonymous inode meaning
|
||||
* all pidfds had the same inode number.). Userspace can
|
||||
* reconstruct the 64 bit identifier by retrieving both the
|
||||
* inode number and the inode generation number to compare or
|
||||
* use file handles.
|
||||
*/
|
||||
if (pidfs_ino(pidfs_ino_nr) == 0)
|
||||
pidfs_ino_nr += 2;
|
||||
|
||||
pid->ino = pidfs_ino_nr;
|
||||
pid->stashed = NULL;
|
||||
pidfs_ino_nr++;
|
||||
|
||||
write_seqcount_begin(&pidmap_lock_seq);
|
||||
rb_find_add_rcu(&pid->pidfs_node, &pidfs_ino_tree, pidfs_ino_cmp);
|
||||
write_seqcount_end(&pidmap_lock_seq);
|
||||
}
|
||||
|
||||
void pidfs_remove_pid(struct pid *pid)
|
||||
{
|
||||
write_seqcount_begin(&pidmap_lock_seq);
|
||||
rb_erase(&pid->pidfs_node, &pidfs_ino_tree);
|
||||
write_seqcount_end(&pidmap_lock_seq);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
/**
|
||||
* pidfd_show_fdinfo - print information about a pidfd
|
||||
@ -190,6 +282,27 @@ static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool pidfs_ioctl_valid(unsigned int cmd)
|
||||
{
|
||||
switch (cmd) {
|
||||
case FS_IOC_GETVERSION:
|
||||
case PIDFD_GET_CGROUP_NAMESPACE:
|
||||
case PIDFD_GET_INFO:
|
||||
case PIDFD_GET_IPC_NAMESPACE:
|
||||
case PIDFD_GET_MNT_NAMESPACE:
|
||||
case PIDFD_GET_NET_NAMESPACE:
|
||||
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
|
||||
case PIDFD_GET_TIME_NAMESPACE:
|
||||
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
|
||||
case PIDFD_GET_UTS_NAMESPACE:
|
||||
case PIDFD_GET_USER_NAMESPACE:
|
||||
case PIDFD_GET_PID_NAMESPACE:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct task_struct *task __free(put_task) = NULL;
|
||||
@ -198,6 +311,17 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
struct ns_common *ns_common = NULL;
|
||||
struct pid_namespace *pid_ns;
|
||||
|
||||
if (!pidfs_ioctl_valid(cmd))
|
||||
return -ENOIOCTLCMD;
|
||||
|
||||
if (cmd == FS_IOC_GETVERSION) {
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
__u32 __user *argp = (__u32 __user *)arg;
|
||||
return put_user(file_inode(file)->i_generation, argp);
|
||||
}
|
||||
|
||||
task = get_pid_task(pid, PIDTYPE_PID);
|
||||
if (!task)
|
||||
return -ESRCH;
|
||||
@ -318,40 +442,6 @@ struct pid *pidfd_pid(const struct file *file)
|
||||
|
||||
static struct vfsmount *pidfs_mnt __ro_after_init;
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
/*
|
||||
* Provide a fallback mechanism for 32-bit systems so processes remain
|
||||
* reliably comparable by inode number even on those systems.
|
||||
*/
|
||||
static DEFINE_IDA(pidfd_inum_ida);
|
||||
|
||||
static int pidfs_inum(struct pid *pid, unsigned long *ino)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = ida_alloc_range(&pidfd_inum_ida, RESERVED_PIDS + 1,
|
||||
UINT_MAX, GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return -ENOSPC;
|
||||
|
||||
*ino = ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void pidfs_free_inum(unsigned long ino)
|
||||
{
|
||||
if (ino > 0)
|
||||
ida_free(&pidfd_inum_ida, ino);
|
||||
}
|
||||
#else
|
||||
static inline int pidfs_inum(struct pid *pid, unsigned long *ino)
|
||||
{
|
||||
*ino = pid->ino;
|
||||
return 0;
|
||||
}
|
||||
#define pidfs_free_inum(ino) ((void)(ino))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The vfs falls back to simple_setattr() if i_op->setattr() isn't
|
||||
* implemented. Let's reject it completely until we have a clean
|
||||
@ -403,7 +493,6 @@ static void pidfs_evict_inode(struct inode *inode)
|
||||
|
||||
clear_inode(inode);
|
||||
put_pid(pid);
|
||||
pidfs_free_inum(inode->i_ino);
|
||||
}
|
||||
|
||||
static const struct super_operations pidfs_sops = {
|
||||
@ -421,25 +510,149 @@ static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen)
|
||||
return dynamic_dname(buffer, buflen, "anon_inode:[pidfd]");
|
||||
}
|
||||
|
||||
static const struct dentry_operations pidfs_dentry_operations = {
|
||||
const struct dentry_operations pidfs_dentry_operations = {
|
||||
.d_delete = always_delete_dentry,
|
||||
.d_dname = pidfs_dname,
|
||||
.d_prune = stashed_dentry_prune,
|
||||
};
|
||||
|
||||
static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
|
||||
struct inode *parent)
|
||||
{
|
||||
const struct pid *pid = inode->i_private;
|
||||
|
||||
if (*max_len < 2) {
|
||||
*max_len = 2;
|
||||
return FILEID_INVALID;
|
||||
}
|
||||
|
||||
*max_len = 2;
|
||||
*(u64 *)fh = pid->ino;
|
||||
return FILEID_KERNFS;
|
||||
}
|
||||
|
||||
static int pidfs_ino_find(const void *key, const struct rb_node *node)
|
||||
{
|
||||
const u64 pid_ino = *(u64 *)key;
|
||||
const struct pid *pid = rb_entry(node, struct pid, pidfs_node);
|
||||
|
||||
if (pid_ino < pid->ino)
|
||||
return -1;
|
||||
if (pid_ino > pid->ino)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Find a struct pid based on the inode number. */
|
||||
static struct pid *pidfs_ino_get_pid(u64 ino)
|
||||
{
|
||||
struct pid *pid;
|
||||
struct rb_node *node;
|
||||
unsigned int seq;
|
||||
|
||||
guard(rcu)();
|
||||
do {
|
||||
seq = read_seqcount_begin(&pidmap_lock_seq);
|
||||
node = rb_find_rcu(&ino, &pidfs_ino_tree, pidfs_ino_find);
|
||||
if (node)
|
||||
break;
|
||||
} while (read_seqcount_retry(&pidmap_lock_seq, seq));
|
||||
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
pid = rb_entry(node, struct pid, pidfs_node);
|
||||
|
||||
/* Within our pid namespace hierarchy? */
|
||||
if (pid_vnr(pid) == 0)
|
||||
return NULL;
|
||||
|
||||
return get_pid(pid);
|
||||
}
|
||||
|
||||
static struct dentry *pidfs_fh_to_dentry(struct super_block *sb,
|
||||
struct fid *fid, int fh_len,
|
||||
int fh_type)
|
||||
{
|
||||
int ret;
|
||||
u64 pid_ino;
|
||||
struct path path;
|
||||
struct pid *pid;
|
||||
|
||||
if (fh_len < 2)
|
||||
return NULL;
|
||||
|
||||
switch (fh_type) {
|
||||
case FILEID_KERNFS:
|
||||
pid_ino = *(u64 *)fid;
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pid = pidfs_ino_get_pid(pid_ino);
|
||||
if (!pid)
|
||||
return NULL;
|
||||
|
||||
ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
mntput(path.mnt);
|
||||
return path.dentry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure that we reject any nonsensical flags that users pass via
|
||||
* open_by_handle_at(). Note that PIDFD_THREAD is defined as O_EXCL, and
|
||||
* PIDFD_NONBLOCK as O_NONBLOCK.
|
||||
*/
|
||||
#define VALID_FILE_HANDLE_OPEN_FLAGS \
|
||||
(O_RDONLY | O_WRONLY | O_RDWR | O_NONBLOCK | O_CLOEXEC | O_EXCL)
|
||||
|
||||
static int pidfs_export_permission(struct handle_to_path_ctx *ctx,
|
||||
unsigned int oflags)
|
||||
{
|
||||
if (oflags & ~(VALID_FILE_HANDLE_OPEN_FLAGS | O_LARGEFILE))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* pidfd_ino_get_pid() will verify that the struct pid is part
|
||||
* of the caller's pid namespace hierarchy. No further
|
||||
* permission checks are needed.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct file *pidfs_export_open(struct path *path, unsigned int oflags)
|
||||
{
|
||||
/*
|
||||
* Clear O_LARGEFILE as open_by_handle_at() forces it and raise
|
||||
* O_RDWR as pidfds always are.
|
||||
*/
|
||||
oflags &= ~O_LARGEFILE;
|
||||
return dentry_open(path, oflags | O_RDWR, current_cred());
|
||||
}
|
||||
|
||||
static const struct export_operations pidfs_export_operations = {
|
||||
.encode_fh = pidfs_encode_fh,
|
||||
.fh_to_dentry = pidfs_fh_to_dentry,
|
||||
.open = pidfs_export_open,
|
||||
.permission = pidfs_export_permission,
|
||||
};
|
||||
|
||||
static int pidfs_init_inode(struct inode *inode, void *data)
|
||||
{
|
||||
const struct pid *pid = data;
|
||||
|
||||
inode->i_private = data;
|
||||
inode->i_flags |= S_PRIVATE;
|
||||
inode->i_mode |= S_IRWXU;
|
||||
inode->i_op = &pidfs_inode_operations;
|
||||
inode->i_fop = &pidfs_file_operations;
|
||||
/*
|
||||
* Inode numbering for pidfs start at RESERVED_PIDS + 1. This
|
||||
* avoids collisions with the root inode which is 1 for pseudo
|
||||
* filesystems.
|
||||
*/
|
||||
return pidfs_inum(data, &inode->i_ino);
|
||||
inode->i_ino = pidfs_ino(pid->ino);
|
||||
inode->i_generation = pidfs_gen(pid->ino);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pidfs_put_data(void *data)
|
||||
@ -462,6 +675,7 @@ static int pidfs_init_fs_context(struct fs_context *fc)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->ops = &pidfs_sops;
|
||||
ctx->eops = &pidfs_export_operations;
|
||||
ctx->dops = &pidfs_dentry_operations;
|
||||
fc->s_fs_info = (void *)&pidfs_stashed_ops;
|
||||
return 0;
|
||||
|
@ -3,6 +3,7 @@
|
||||
#define LINUX_EXPORTFS_H 1
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/path.h>
|
||||
|
||||
struct dentry;
|
||||
struct iattr;
|
||||
@ -156,6 +157,17 @@ struct fid {
|
||||
};
|
||||
};
|
||||
|
||||
enum handle_to_path_flags {
|
||||
HANDLE_CHECK_PERMS = (1 << 0),
|
||||
HANDLE_CHECK_SUBTREE = (1 << 1),
|
||||
};
|
||||
|
||||
struct handle_to_path_ctx {
|
||||
struct path root;
|
||||
enum handle_to_path_flags flags;
|
||||
unsigned int fh_flags;
|
||||
};
|
||||
|
||||
#define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */
|
||||
#define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */
|
||||
#define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */
|
||||
@ -225,6 +237,12 @@ struct fid {
|
||||
* is also a directory. In the event that it cannot be found, or storage
|
||||
* space cannot be allocated, a %ERR_PTR should be returned.
|
||||
*
|
||||
* permission:
|
||||
* Allow filesystems to specify a custom permission function.
|
||||
*
|
||||
* open:
|
||||
* Allow filesystems to specify a custom open function.
|
||||
*
|
||||
* commit_metadata:
|
||||
* @commit_metadata should commit metadata changes to stable storage.
|
||||
*
|
||||
@ -251,6 +269,8 @@ struct export_operations {
|
||||
bool write, u32 *device_generation);
|
||||
int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
|
||||
int nr_iomaps, struct iattr *iattr);
|
||||
int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags);
|
||||
struct file * (*open)(struct path *path, unsigned int oflags);
|
||||
#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */
|
||||
#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */
|
||||
#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */
|
||||
|
@ -59,6 +59,7 @@ struct pid
|
||||
spinlock_t lock;
|
||||
struct dentry *stashed;
|
||||
u64 ino;
|
||||
struct rb_node pidfs_node;
|
||||
/* lists of tasks that use this pid */
|
||||
struct hlist_head tasks[PIDTYPE_MAX];
|
||||
struct hlist_head inodes;
|
||||
@ -68,6 +69,7 @@ struct pid
|
||||
struct upid numbers[];
|
||||
};
|
||||
|
||||
extern seqcount_spinlock_t pidmap_lock_seq;
|
||||
extern struct pid init_struct_pid;
|
||||
|
||||
struct file;
|
||||
|
@ -4,5 +4,8 @@
|
||||
|
||||
struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
|
||||
void __init pidfs_init(void);
|
||||
void pidfs_add_pid(struct pid *pid);
|
||||
void pidfs_remove_pid(struct pid *pid);
|
||||
extern const struct dentry_operations pidfs_dentry_operations;
|
||||
|
||||
#endif /* _LINUX_PID_FS_H */
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
struct pseudo_fs_context {
|
||||
const struct super_operations *ops;
|
||||
const struct export_operations *eops;
|
||||
const struct xattr_handler * const *xattr;
|
||||
const struct dentry_operations *dops;
|
||||
unsigned long magic;
|
||||
|
14
kernel/pid.c
14
kernel/pid.c
@ -43,6 +43,7 @@
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/pidfs.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <net/sock.h>
|
||||
#include <uapi/linux/pidfd.h>
|
||||
|
||||
@ -64,11 +65,6 @@ int pid_max = PID_MAX_DEFAULT;
|
||||
|
||||
int pid_max_min = RESERVED_PIDS + 1;
|
||||
int pid_max_max = PID_MAX_LIMIT;
|
||||
/*
|
||||
* Pseudo filesystems start inode numbering after one. We use Reserved
|
||||
* PIDs as a natural offset.
|
||||
*/
|
||||
static u64 pidfs_ino = RESERVED_PIDS;
|
||||
|
||||
/*
|
||||
* PID-map pages start out as NULL, they get allocated upon
|
||||
@ -108,6 +104,7 @@ EXPORT_SYMBOL_GPL(init_pid_ns);
|
||||
*/
|
||||
|
||||
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
|
||||
seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock);
|
||||
|
||||
void put_pid(struct pid *pid)
|
||||
{
|
||||
@ -158,6 +155,7 @@ void free_pid(struct pid *pid)
|
||||
|
||||
idr_remove(&ns->idr, upid->nr);
|
||||
}
|
||||
pidfs_remove_pid(pid);
|
||||
spin_unlock_irqrestore(&pidmap_lock, flags);
|
||||
|
||||
call_rcu(&pid->rcu, delayed_put_pid);
|
||||
@ -273,22 +271,24 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
INIT_HLIST_HEAD(&pid->inodes);
|
||||
|
||||
upid = pid->numbers + ns->level;
|
||||
idr_preload(GFP_KERNEL);
|
||||
spin_lock_irq(&pidmap_lock);
|
||||
if (!(ns->pid_allocated & PIDNS_ADDING))
|
||||
goto out_unlock;
|
||||
pid->stashed = NULL;
|
||||
pid->ino = ++pidfs_ino;
|
||||
pidfs_add_pid(pid);
|
||||
for ( ; upid >= pid->numbers; --upid) {
|
||||
/* Make the PID visible to find_pid_ns. */
|
||||
idr_replace(&upid->ns->idr, pid, upid->nr);
|
||||
upid->ns->pid_allocated++;
|
||||
}
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
idr_preload_end();
|
||||
|
||||
return pid;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
idr_preload_end();
|
||||
put_pid_ns(ns);
|
||||
|
||||
out_free:
|
||||
|
2
tools/testing/selftests/pidfd/.gitignore
vendored
2
tools/testing/selftests/pidfd/.gitignore
vendored
@ -6,3 +6,5 @@ pidfd_wait
|
||||
pidfd_fdinfo_test
|
||||
pidfd_getfd_test
|
||||
pidfd_setns_test
|
||||
pidfd_file_handle_test
|
||||
pidfd_bind_mount
|
||||
|
@ -2,7 +2,8 @@
|
||||
CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall
|
||||
|
||||
TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
|
||||
pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test
|
||||
pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \
|
||||
pidfd_file_handle_test pidfd_bind_mount
|
||||
|
||||
include ../lib.mk
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include "../kselftest.h"
|
||||
#include "../clone3/clone3_selftests.h"
|
||||
|
||||
#ifndef P_PIDFD
|
||||
#define P_PIDFD 3
|
||||
@ -68,6 +69,11 @@
|
||||
#define PIDFD_SKIP 3
|
||||
#define PIDFD_XFAIL 4
|
||||
|
||||
static inline int sys_waitid(int which, pid_t pid, siginfo_t *info, int options)
|
||||
{
|
||||
return syscall(__NR_waitid, which, pid, info, options, NULL);
|
||||
}
|
||||
|
||||
static inline int wait_for_pid(pid_t pid)
|
||||
{
|
||||
int status, ret;
|
||||
@ -114,4 +120,37 @@ static inline int sys_memfd_create(const char *name, unsigned int flags)
|
||||
return syscall(__NR_memfd_create, name, flags);
|
||||
}
|
||||
|
||||
static inline pid_t create_child(int *pidfd, unsigned flags)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD | flags,
|
||||
.exit_signal = SIGCHLD,
|
||||
.pidfd = ptr_to_u64(pidfd),
|
||||
};
|
||||
|
||||
return sys_clone3(&args, sizeof(struct __clone_args));
|
||||
}
|
||||
|
||||
static inline ssize_t read_nointr(int fd, void *buf, size_t count)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
do {
|
||||
ret = read(fd, buf, count);
|
||||
} while (ret < 0 && errno == EINTR);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline ssize_t write_nointr(int fd, const void *buf, size_t count)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
do {
|
||||
ret = write(fd, buf, count);
|
||||
} while (ret < 0 && errno == EINTR);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* __PIDFD_H */
|
||||
|
188
tools/testing/selftests/pidfd/pidfd_bind_mount.c
Normal file
188
tools/testing/selftests/pidfd/pidfd_bind_mount.c
Normal file
@ -0,0 +1,188 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <sched.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mount.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "pidfd.h"
|
||||
#include "../kselftest_harness.h"
|
||||
|
||||
#ifndef __NR_open_tree
|
||||
#if defined __alpha__
|
||||
#define __NR_open_tree 538
|
||||
#elif defined _MIPS_SIM
|
||||
#if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
|
||||
#define __NR_open_tree 4428
|
||||
#endif
|
||||
#if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
|
||||
#define __NR_open_tree 6428
|
||||
#endif
|
||||
#if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
|
||||
#define __NR_open_tree 5428
|
||||
#endif
|
||||
#elif defined __ia64__
|
||||
#define __NR_open_tree (428 + 1024)
|
||||
#else
|
||||
#define __NR_open_tree 428
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef __NR_move_mount
|
||||
#if defined __alpha__
|
||||
#define __NR_move_mount 539
|
||||
#elif defined _MIPS_SIM
|
||||
#if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
|
||||
#define __NR_move_mount 4429
|
||||
#endif
|
||||
#if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
|
||||
#define __NR_move_mount 6429
|
||||
#endif
|
||||
#if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
|
||||
#define __NR_move_mount 5429
|
||||
#endif
|
||||
#elif defined __ia64__
|
||||
#define __NR_move_mount (428 + 1024)
|
||||
#else
|
||||
#define __NR_move_mount 429
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MOVE_MOUNT_F_EMPTY_PATH
|
||||
#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
|
||||
#endif
|
||||
|
||||
#ifndef MOVE_MOUNT_F_EMPTY_PATH
|
||||
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
|
||||
#endif
|
||||
|
||||
static inline int sys_move_mount(int from_dfd, const char *from_pathname,
|
||||
int to_dfd, const char *to_pathname,
|
||||
unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
|
||||
to_pathname, flags);
|
||||
}
|
||||
|
||||
#ifndef OPEN_TREE_CLONE
|
||||
#define OPEN_TREE_CLONE 1
|
||||
#endif
|
||||
|
||||
#ifndef OPEN_TREE_CLOEXEC
|
||||
#define OPEN_TREE_CLOEXEC O_CLOEXEC
|
||||
#endif
|
||||
|
||||
#ifndef AT_RECURSIVE
|
||||
#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
|
||||
#endif
|
||||
|
||||
static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_open_tree, dfd, filename, flags);
|
||||
}
|
||||
|
||||
FIXTURE(pidfd_bind_mount) {
|
||||
char template[PATH_MAX];
|
||||
int fd_tmp;
|
||||
int pidfd;
|
||||
struct stat st1;
|
||||
struct stat st2;
|
||||
__u32 gen1;
|
||||
__u32 gen2;
|
||||
bool must_unmount;
|
||||
};
|
||||
|
||||
FIXTURE_SETUP(pidfd_bind_mount)
|
||||
{
|
||||
self->fd_tmp = -EBADF;
|
||||
self->must_unmount = false;
|
||||
ASSERT_EQ(unshare(CLONE_NEWNS), 0);
|
||||
ASSERT_LE(snprintf(self->template, PATH_MAX, "%s", P_tmpdir "/pidfd_bind_mount_XXXXXX"), PATH_MAX);
|
||||
self->fd_tmp = mkstemp(self->template);
|
||||
ASSERT_GE(self->fd_tmp, 0);
|
||||
self->pidfd = sys_pidfd_open(getpid(), 0);
|
||||
ASSERT_GE(self->pidfd, 0);
|
||||
ASSERT_GE(fstat(self->pidfd, &self->st1), 0);
|
||||
ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen1), 0);
|
||||
}
|
||||
|
||||
FIXTURE_TEARDOWN(pidfd_bind_mount)
|
||||
{
|
||||
ASSERT_EQ(close(self->fd_tmp), 0);
|
||||
if (self->must_unmount)
|
||||
ASSERT_EQ(umount2(self->template, 0), 0);
|
||||
ASSERT_EQ(unlink(self->template), 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that a detached mount can be created for a pidfd and then
|
||||
* attached to the filesystem hierarchy.
|
||||
*/
|
||||
TEST_F(pidfd_bind_mount, bind_mount)
|
||||
{
|
||||
int fd_tree;
|
||||
|
||||
fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH);
|
||||
ASSERT_GE(fd_tree, 0);
|
||||
|
||||
ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
|
||||
self->must_unmount = true;
|
||||
|
||||
ASSERT_EQ(close(fd_tree), 0);
|
||||
}
|
||||
|
||||
/* Test that a pidfd can be reopened through procfs. */
|
||||
TEST_F(pidfd_bind_mount, reopen)
|
||||
{
|
||||
int pidfd;
|
||||
char proc_path[PATH_MAX];
|
||||
|
||||
sprintf(proc_path, "/proc/self/fd/%d", self->pidfd);
|
||||
pidfd = open(proc_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_GE(fstat(self->pidfd, &self->st2), 0);
|
||||
ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen2), 0);
|
||||
|
||||
ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino);
|
||||
ASSERT_TRUE(self->gen1 == self->gen2);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that a detached mount can be created for a pidfd and then
|
||||
* attached to the filesystem hierarchy and reopened.
|
||||
*/
|
||||
TEST_F(pidfd_bind_mount, bind_mount_reopen)
|
||||
{
|
||||
int fd_tree, fd_pidfd_mnt;
|
||||
|
||||
fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH);
|
||||
ASSERT_GE(fd_tree, 0);
|
||||
|
||||
ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
|
||||
self->must_unmount = true;
|
||||
|
||||
fd_pidfd_mnt = openat(-EBADF, self->template, O_RDONLY | O_NOCTTY | O_CLOEXEC);
|
||||
ASSERT_GE(fd_pidfd_mnt, 0);
|
||||
|
||||
ASSERT_GE(fstat(fd_tree, &self->st2), 0);
|
||||
ASSERT_EQ(ioctl(fd_pidfd_mnt, FS_IOC_GETVERSION, &self->gen2), 0);
|
||||
|
||||
ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino);
|
||||
ASSERT_TRUE(self->gen1 == self->gen2);
|
||||
|
||||
ASSERT_EQ(close(fd_tree), 0);
|
||||
ASSERT_EQ(close(fd_pidfd_mnt), 0);
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
503
tools/testing/selftests/pidfd/pidfd_file_handle_test.c
Normal file
503
tools/testing/selftests/pidfd/pidfd_file_handle_test.c
Normal file
@ -0,0 +1,503 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <linux/types.h>
|
||||
#include <poll.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/socket.h>
|
||||
#include <linux/kcmp.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "pidfd.h"
|
||||
#include "../kselftest_harness.h"
|
||||
|
||||
FIXTURE(file_handle)
|
||||
{
|
||||
pid_t pid;
|
||||
int pidfd;
|
||||
|
||||
pid_t child_pid1;
|
||||
int child_pidfd1;
|
||||
|
||||
pid_t child_pid2;
|
||||
int child_pidfd2;
|
||||
|
||||
pid_t child_pid3;
|
||||
int child_pidfd3;
|
||||
};
|
||||
|
||||
FIXTURE_SETUP(file_handle)
|
||||
{
|
||||
int ret;
|
||||
int ipc_sockets[2];
|
||||
char c;
|
||||
|
||||
self->pid = getpid();
|
||||
self->pidfd = sys_pidfd_open(self->pid, 0);
|
||||
ASSERT_GE(self->pidfd, 0);
|
||||
|
||||
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
|
||||
EXPECT_EQ(ret, 0);
|
||||
|
||||
self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER);
|
||||
EXPECT_GE(self->child_pid1, 0);
|
||||
|
||||
if (self->child_pid1 == 0) {
|
||||
close(ipc_sockets[0]);
|
||||
|
||||
if (write_nointr(ipc_sockets[1], "1", 1) < 0)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
close(ipc_sockets[1]);
|
||||
|
||||
pause();
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
close(ipc_sockets[1]);
|
||||
ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
|
||||
close(ipc_sockets[0]);
|
||||
|
||||
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
|
||||
EXPECT_EQ(ret, 0);
|
||||
|
||||
self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID);
|
||||
EXPECT_GE(self->child_pid2, 0);
|
||||
|
||||
if (self->child_pid2 == 0) {
|
||||
close(ipc_sockets[0]);
|
||||
|
||||
if (write_nointr(ipc_sockets[1], "1", 1) < 0)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
close(ipc_sockets[1]);
|
||||
|
||||
pause();
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
close(ipc_sockets[1]);
|
||||
ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
|
||||
close(ipc_sockets[0]);
|
||||
|
||||
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
|
||||
EXPECT_EQ(ret, 0);
|
||||
|
||||
self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID);
|
||||
EXPECT_GE(self->child_pid3, 0);
|
||||
|
||||
if (self->child_pid3 == 0) {
|
||||
close(ipc_sockets[0]);
|
||||
|
||||
if (write_nointr(ipc_sockets[1], "1", 1) < 0)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
close(ipc_sockets[1]);
|
||||
|
||||
pause();
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
close(ipc_sockets[1]);
|
||||
ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
|
||||
close(ipc_sockets[0]);
|
||||
}
|
||||
|
||||
FIXTURE_TEARDOWN(file_handle)
|
||||
{
|
||||
EXPECT_EQ(close(self->pidfd), 0);
|
||||
|
||||
EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0);
|
||||
if (self->child_pidfd1 >= 0)
|
||||
EXPECT_EQ(0, close(self->child_pidfd1));
|
||||
|
||||
EXPECT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0);
|
||||
|
||||
EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0);
|
||||
if (self->child_pidfd2 >= 0)
|
||||
EXPECT_EQ(0, close(self->child_pidfd2));
|
||||
|
||||
EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0);
|
||||
|
||||
if (self->child_pidfd3 >= 0) {
|
||||
EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd3, SIGKILL, NULL, 0), 0);
|
||||
EXPECT_EQ(0, close(self->child_pidfd3));
|
||||
EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that we can decode a pidfs file handle in the same pid
|
||||
* namespace.
|
||||
*/
|
||||
TEST_F(file_handle, file_handle_same_pidns)
|
||||
{
|
||||
int mnt_id;
|
||||
struct file_handle *fh;
|
||||
int pidfd = -EBADF;
|
||||
struct stat st1, st2;
|
||||
|
||||
fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
ASSERT_NE(fh, NULL);
|
||||
memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
fh->handle_bytes = MAX_HANDLE_SZ;
|
||||
|
||||
ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
|
||||
|
||||
ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, 0);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_EQ(fstat(pidfd, &st2), 0);
|
||||
ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_EQ(fstat(pidfd, &st2), 0);
|
||||
ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_EQ(fstat(pidfd, &st2), 0);
|
||||
ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
|
||||
free(fh);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that we can decode a pidfs file handle from a child pid
|
||||
* namespace.
|
||||
*/
|
||||
TEST_F(file_handle, file_handle_child_pidns)
|
||||
{
|
||||
int mnt_id;
|
||||
struct file_handle *fh;
|
||||
int pidfd = -EBADF;
|
||||
struct stat st1, st2;
|
||||
|
||||
fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
ASSERT_NE(fh, NULL);
|
||||
memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
fh->handle_bytes = MAX_HANDLE_SZ;
|
||||
|
||||
ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
|
||||
|
||||
ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, 0);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_EQ(fstat(pidfd, &st2), 0);
|
||||
ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_EQ(fstat(pidfd, &st2), 0);
|
||||
ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_EQ(fstat(pidfd, &st2), 0);
|
||||
ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
|
||||
free(fh);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that we fail to decode a pidfs file handle from an ancestor
|
||||
* child pid namespace.
|
||||
*/
|
||||
TEST_F(file_handle, file_handle_foreign_pidns)
|
||||
{
|
||||
int mnt_id;
|
||||
struct file_handle *fh;
|
||||
pid_t pid;
|
||||
|
||||
fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
ASSERT_NE(fh, NULL);
|
||||
memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
fh->handle_bytes = MAX_HANDLE_SZ;
|
||||
|
||||
ASSERT_EQ(name_to_handle_at(self->pidfd, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
|
||||
|
||||
ASSERT_EQ(setns(self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID), 0);
|
||||
|
||||
pid = fork();
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
int pidfd = open_by_handle_at(self->pidfd, fh, 0);
|
||||
if (pidfd >= 0) {
|
||||
TH_LOG("Managed to open pidfd outside of the caller's pid namespace hierarchy");
|
||||
_exit(1);
|
||||
}
|
||||
_exit(0);
|
||||
}
|
||||
|
||||
ASSERT_EQ(wait_for_pid(pid), 0);
|
||||
|
||||
free(fh);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that we can decode a pidfs file handle of a process that has
|
||||
* exited but not been reaped.
|
||||
*/
|
||||
TEST_F(file_handle, pid_has_exited)
|
||||
{
|
||||
int mnt_id, pidfd, child_pidfd3;
|
||||
struct file_handle *fh;
|
||||
struct stat st1, st2;
|
||||
|
||||
fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
ASSERT_NE(fh, NULL);
|
||||
memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
fh->handle_bytes = MAX_HANDLE_SZ;
|
||||
|
||||
ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
|
||||
|
||||
ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, 0);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_EQ(fstat(pidfd, &st2), 0);
|
||||
ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
|
||||
child_pidfd3 = self->child_pidfd3;
|
||||
self->child_pidfd3 = -EBADF;
|
||||
EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0);
|
||||
EXPECT_EQ(close(child_pidfd3), 0);
|
||||
EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED | WNOWAIT), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, 0);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that we fail to decode a pidfs file handle of a process that has
|
||||
* already been reaped.
|
||||
*/
|
||||
TEST_F(file_handle, pid_has_been_reaped)
|
||||
{
|
||||
int mnt_id, pidfd, child_pidfd3;
|
||||
struct file_handle *fh;
|
||||
struct stat st1, st2;
|
||||
|
||||
fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
ASSERT_NE(fh, NULL);
|
||||
memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
fh->handle_bytes = MAX_HANDLE_SZ;
|
||||
|
||||
ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
|
||||
|
||||
ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, 0);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_EQ(fstat(pidfd, &st2), 0);
|
||||
ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
|
||||
child_pidfd3 = self->child_pidfd3;
|
||||
self->child_pidfd3 = -EBADF;
|
||||
EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0);
|
||||
EXPECT_EQ(close(child_pidfd3), 0);
|
||||
EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, 0);
|
||||
ASSERT_LT(pidfd, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test valid flags to open a pidfd file handle. Note, that
|
||||
* PIDFD_NONBLOCK is defined as O_NONBLOCK and O_NONBLOCK is an alias to
|
||||
* O_NDELAY. Also note that PIDFD_THREAD is an alias for O_EXCL.
|
||||
*/
|
||||
TEST_F(file_handle, open_by_handle_at_valid_flags)
|
||||
{
|
||||
int mnt_id;
|
||||
struct file_handle *fh;
|
||||
int pidfd = -EBADF;
|
||||
struct stat st1, st2;
|
||||
|
||||
fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
ASSERT_NE(fh, NULL);
|
||||
memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
fh->handle_bytes = MAX_HANDLE_SZ;
|
||||
|
||||
ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
|
||||
|
||||
ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh,
|
||||
O_RDONLY |
|
||||
O_WRONLY |
|
||||
O_RDWR |
|
||||
O_NONBLOCK |
|
||||
O_NDELAY |
|
||||
O_CLOEXEC |
|
||||
O_EXCL);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_EQ(fstat(pidfd, &st2), 0);
|
||||
ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that invalid flags passed to open a pidfd file handle are
|
||||
* rejected.
|
||||
*/
|
||||
TEST_F(file_handle, open_by_handle_at_invalid_flags)
|
||||
{
|
||||
int mnt_id;
|
||||
struct file_handle *fh;
|
||||
int pidfd = -EBADF;
|
||||
static const struct invalid_pidfs_file_handle_flags {
|
||||
int oflag;
|
||||
const char *oflag_name;
|
||||
} invalid_pidfs_file_handle_flags[] = {
|
||||
{ FASYNC, "FASYNC" },
|
||||
{ O_CREAT, "O_CREAT" },
|
||||
{ O_NOCTTY, "O_NOCTTY" },
|
||||
{ O_CREAT, "O_CREAT" },
|
||||
{ O_TRUNC, "O_TRUNC" },
|
||||
{ O_APPEND, "O_APPEND" },
|
||||
{ O_SYNC, "O_SYNC" },
|
||||
{ O_DSYNC, "O_DSYNC" },
|
||||
{ O_DIRECT, "O_DIRECT" },
|
||||
{ O_DIRECTORY, "O_DIRECTORY" },
|
||||
{ O_NOFOLLOW, "O_NOFOLLOW" },
|
||||
{ O_NOATIME, "O_NOATIME" },
|
||||
{ O_PATH, "O_PATH" },
|
||||
{ O_TMPFILE, "O_TMPFILE" },
|
||||
/*
|
||||
* O_LARGEFILE is added implicitly by
|
||||
* open_by_handle_at() so pidfs simply masks it off.
|
||||
*/
|
||||
};
|
||||
|
||||
fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
ASSERT_NE(fh, NULL);
|
||||
memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
fh->handle_bytes = MAX_HANDLE_SZ;
|
||||
|
||||
ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(invalid_pidfs_file_handle_flags); i++) {
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, invalid_pidfs_file_handle_flags[i].oflag);
|
||||
ASSERT_LT(pidfd, 0) {
|
||||
TH_LOG("open_by_handle_at() succeeded with invalid flags: %s", invalid_pidfs_file_handle_flags[i].oflag_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Test that lookup fails. */
|
||||
TEST_F(file_handle, lookup_must_fail)
|
||||
{
|
||||
int mnt_id;
|
||||
struct file_handle *fh;
|
||||
|
||||
fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
ASSERT_NE(fh, NULL);
|
||||
memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
fh->handle_bytes = MAX_HANDLE_SZ;
|
||||
|
||||
ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, AT_EMPTY_PATH), 0);
|
||||
ASSERT_EQ(errno, ENOTDIR);
|
||||
ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, 0), 0);
|
||||
ASSERT_EQ(errno, ENOTDIR);
|
||||
}
|
||||
|
||||
#ifndef AT_HANDLE_CONNECTABLE
|
||||
#define AT_HANDLE_CONNECTABLE 0x002
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Test that AT_HANDLE_CONNECTABLE is rejected. Connectable file handles
|
||||
* don't make sense for pidfs. Note that currently AT_HANDLE_CONNECTABLE
|
||||
* is rejected because it is incompatible with AT_EMPTY_PATH which is
|
||||
* required with pidfds as we don't support lookup.
|
||||
*/
|
||||
TEST_F(file_handle, invalid_name_to_handle_at_flags)
|
||||
{
|
||||
int mnt_id;
|
||||
struct file_handle *fh;
|
||||
|
||||
fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
ASSERT_NE(fh, NULL);
|
||||
memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
fh->handle_bytes = MAX_HANDLE_SZ;
|
||||
|
||||
ASSERT_NE(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_CONNECTABLE), 0);
|
||||
}
|
||||
|
||||
#ifndef AT_HANDLE_FID
|
||||
#define AT_HANDLE_FID 0x200
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Test that a request with AT_HANDLE_FID always leads to decodable file
|
||||
* handle as pidfs always provides export operations.
|
||||
*/
|
||||
TEST_F(file_handle, valid_name_to_handle_at_flags)
|
||||
{
|
||||
int mnt_id, pidfd;
|
||||
struct file_handle *fh;
|
||||
struct stat st1, st2;
|
||||
|
||||
fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
ASSERT_NE(fh, NULL);
|
||||
memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
|
||||
fh->handle_bytes = MAX_HANDLE_SZ;
|
||||
|
||||
ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_FID), 0);
|
||||
|
||||
ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0);
|
||||
|
||||
pidfd = open_by_handle_at(self->pidfd, fh, 0);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ASSERT_EQ(fstat(pidfd, &st2), 0);
|
||||
ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
|
||||
|
||||
ASSERT_EQ(close(pidfd), 0);
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
@ -19,7 +19,6 @@
|
||||
#include <linux/ioctl.h>
|
||||
|
||||
#include "pidfd.h"
|
||||
#include "../clone3/clone3_selftests.h"
|
||||
#include "../kselftest_harness.h"
|
||||
|
||||
#ifndef PIDFS_IOCTL_MAGIC
|
||||
@ -118,22 +117,6 @@ FIXTURE(current_nsset)
|
||||
int child_pidfd_derived_nsfds2[PIDFD_NS_MAX];
|
||||
};
|
||||
|
||||
static int sys_waitid(int which, pid_t pid, int options)
|
||||
{
|
||||
return syscall(__NR_waitid, which, pid, NULL, options, NULL);
|
||||
}
|
||||
|
||||
pid_t create_child(int *pidfd, unsigned flags)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD | flags,
|
||||
.exit_signal = SIGCHLD,
|
||||
.pidfd = ptr_to_u64(pidfd),
|
||||
};
|
||||
|
||||
return sys_clone3(&args, sizeof(struct clone_args));
|
||||
}
|
||||
|
||||
static bool switch_timens(void)
|
||||
{
|
||||
int fd, ret;
|
||||
@ -150,28 +133,6 @@ static bool switch_timens(void)
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
static ssize_t read_nointr(int fd, void *buf, size_t count)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
do {
|
||||
ret = read(fd, buf, count);
|
||||
} while (ret < 0 && errno == EINTR);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t write_nointr(int fd, const void *buf, size_t count)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
do {
|
||||
ret = write(fd, buf, count);
|
||||
} while (ret < 0 && errno == EINTR);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
FIXTURE_SETUP(current_nsset)
|
||||
{
|
||||
int i, proc_fd, ret;
|
||||
@ -229,7 +190,7 @@ FIXTURE_SETUP(current_nsset)
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED | WNOWAIT), 0);
|
||||
|
||||
self->pidfd = sys_pidfd_open(self->pid, 0);
|
||||
EXPECT_GE(self->pidfd, 0) {
|
||||
@ -432,9 +393,9 @@ FIXTURE_TEARDOWN(current_nsset)
|
||||
EXPECT_EQ(0, close(self->child_pidfd1));
|
||||
if (self->child_pidfd2 >= 0)
|
||||
EXPECT_EQ(0, close(self->child_pidfd2));
|
||||
ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0);
|
||||
}
|
||||
|
||||
static int preserve_ns(const int pid, const char *ns)
|
||||
|
@ -26,22 +26,11 @@
|
||||
#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
static pid_t sys_clone3(struct clone_args *args)
|
||||
{
|
||||
return syscall(__NR_clone3, args, sizeof(struct clone_args));
|
||||
}
|
||||
|
||||
static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
|
||||
struct rusage *ru)
|
||||
{
|
||||
return syscall(__NR_waitid, which, pid, info, options, ru);
|
||||
}
|
||||
|
||||
TEST(wait_simple)
|
||||
{
|
||||
int pidfd = -1;
|
||||
pid_t parent_tid = -1;
|
||||
struct clone_args args = {
|
||||
struct __clone_args args = {
|
||||
.parent_tid = ptr_to_u64(&parent_tid),
|
||||
.pidfd = ptr_to_u64(&pidfd),
|
||||
.flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
|
||||
@ -55,7 +44,7 @@ TEST(wait_simple)
|
||||
pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
|
||||
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED);
|
||||
ASSERT_NE(pid, 0);
|
||||
EXPECT_EQ(close(pidfd), 0);
|
||||
pidfd = -1;
|
||||
@ -63,18 +52,18 @@ TEST(wait_simple)
|
||||
pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
|
||||
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED);
|
||||
ASSERT_NE(pid, 0);
|
||||
EXPECT_EQ(close(pidfd), 0);
|
||||
pidfd = -1;
|
||||
|
||||
pid = sys_clone3(&args);
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0)
|
||||
exit(EXIT_SUCCESS);
|
||||
|
||||
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
|
||||
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED);
|
||||
ASSERT_GE(pid, 0);
|
||||
ASSERT_EQ(WIFEXITED(info.si_status), true);
|
||||
ASSERT_EQ(WEXITSTATUS(info.si_status), 0);
|
||||
@ -89,7 +78,7 @@ TEST(wait_states)
|
||||
{
|
||||
int pidfd = -1;
|
||||
pid_t parent_tid = -1;
|
||||
struct clone_args args = {
|
||||
struct __clone_args args = {
|
||||
.parent_tid = ptr_to_u64(&parent_tid),
|
||||
.pidfd = ptr_to_u64(&pidfd),
|
||||
.flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
|
||||
@ -102,7 +91,7 @@ TEST(wait_states)
|
||||
};
|
||||
|
||||
ASSERT_EQ(pipe(pfd), 0);
|
||||
pid = sys_clone3(&args);
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
@ -117,28 +106,28 @@ TEST(wait_states)
|
||||
}
|
||||
|
||||
close(pfd[0]);
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0);
|
||||
ASSERT_EQ(info.si_signo, SIGCHLD);
|
||||
ASSERT_EQ(info.si_code, CLD_STOPPED);
|
||||
ASSERT_EQ(info.si_pid, parent_tid);
|
||||
|
||||
ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
|
||||
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED), 0);
|
||||
ASSERT_EQ(write(pfd[1], "C", 1), 1);
|
||||
close(pfd[1]);
|
||||
ASSERT_EQ(info.si_signo, SIGCHLD);
|
||||
ASSERT_EQ(info.si_code, CLD_CONTINUED);
|
||||
ASSERT_EQ(info.si_pid, parent_tid);
|
||||
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED), 0);
|
||||
ASSERT_EQ(info.si_signo, SIGCHLD);
|
||||
ASSERT_EQ(info.si_code, CLD_STOPPED);
|
||||
ASSERT_EQ(info.si_pid, parent_tid);
|
||||
|
||||
ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
|
||||
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0);
|
||||
ASSERT_EQ(info.si_signo, SIGCHLD);
|
||||
ASSERT_EQ(info.si_code, CLD_KILLED);
|
||||
ASSERT_EQ(info.si_pid, parent_tid);
|
||||
@ -151,7 +140,7 @@ TEST(wait_nonblock)
|
||||
int pidfd;
|
||||
unsigned int flags = 0;
|
||||
pid_t parent_tid = -1;
|
||||
struct clone_args args = {
|
||||
struct __clone_args args = {
|
||||
.parent_tid = ptr_to_u64(&parent_tid),
|
||||
.flags = CLONE_PARENT_SETTID,
|
||||
.exit_signal = SIGCHLD,
|
||||
@ -173,12 +162,12 @@ TEST(wait_nonblock)
|
||||
SKIP(return, "Skipping PIDFD_NONBLOCK test");
|
||||
}
|
||||
|
||||
ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
|
||||
ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED);
|
||||
ASSERT_LT(ret, 0);
|
||||
ASSERT_EQ(errno, ECHILD);
|
||||
EXPECT_EQ(close(pidfd), 0);
|
||||
|
||||
pid = sys_clone3(&args);
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
@ -201,7 +190,7 @@ TEST(wait_nonblock)
|
||||
* Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when
|
||||
* child processes exist but none have exited.
|
||||
*/
|
||||
ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
|
||||
ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED);
|
||||
ASSERT_LT(ret, 0);
|
||||
ASSERT_EQ(errno, EAGAIN);
|
||||
|
||||
@ -210,19 +199,19 @@ TEST(wait_nonblock)
|
||||
* WNOHANG raised explicitly when child processes exist but none have
|
||||
* exited.
|
||||
*/
|
||||
ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL);
|
||||
ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG);
|
||||
ASSERT_EQ(ret, 0);
|
||||
|
||||
ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0);
|
||||
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0);
|
||||
ASSERT_EQ(info.si_signo, SIGCHLD);
|
||||
ASSERT_EQ(info.si_code, CLD_STOPPED);
|
||||
ASSERT_EQ(info.si_pid, parent_tid);
|
||||
|
||||
ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
|
||||
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
|
||||
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0);
|
||||
ASSERT_EQ(info.si_signo, SIGCHLD);
|
||||
ASSERT_EQ(info.si_code, CLD_EXITED);
|
||||
ASSERT_EQ(info.si_pid, parent_tid);
|
||||
|
Loading…
x
Reference in New Issue
Block a user