mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
b28ddcc32d
Moving pidfds from the anonymous inode infrastructure to a separate tiny in-kernel filesystem similar to sockfs, pipefs, and anon_inodefs causes selinux denials and thus various userspace components that make heavy use of pidfds to fail as pidfds used anon_inode_getfile() which aren't subject to any LSM hooks. But dentry_open() is and that would cause regressions. The failures that are seen are selinux denials. But the core failure is dbus-broker. That cascades into other services failing that depend on dbus-broker. For example, when dbus-broker fails to start polkit and all the others won't be able to work because they depend on dbus-broker. The reason for dbus-broker failing is because it doesn't handle failures for SO_PEERPIDFD correctly. Last kernel release we introduced SO_PEERPIDFD (and SCM_PIDFD). SO_PEERPIDFD allows dbus-broker and polkit and others to receive a pidfd for the peer of an AF_UNIX socket. This is the first time in the history of Linux that we can safely authenticate clients in a race-free manner. dbus-broker immediately made use of this but messed up the error checking. It only allowed EINVAL as a valid failure for SO_PEERPIDFD. That's obviously problematic not just because of LSM denials but because of seccomp denials that would prevent SO_PEERPIDFD from working; or any other new error code from there. So this is catching a flawed implementation in dbus-broker as well. It has to fallback to the old pid-based authentication when SO_PEERPIDFD doesn't work no matter the reasons otherwise it'll always risk such failures. So overall that LSM denial should not have caused dbus-broker to fail. It can never assume that a feature released one kernel ago like SO_PEERPIDFD can be assumed to be available. So, the next fix separate from the selinux policy update is to try and fix dbus-broker at [3]. That should make it into Fedora as well. In addition the selinux reference policy should also be updated. See [4] for that. If Selinux is in enforcing mode in userspace and it encounters anything that it doesn't know about it will deny it by default. And the policy is entirely in userspace including declaring new types for stuff like nsfs or pidfs to allow it. For now we continue to raise S_PRIVATE on the inode if it's a pidfs inode which means things behave exactly like before. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2265630 Link: https://github.com/fedora-selinux/selinux-policy/pull/2050 Link: https://github.com/bus1/dbus-broker/pull/343 [3] Link: https://github.com/SELinuxProject/refpolicy/pull/762 [4] Reported-by: Nathan Chancellor <nathan@kernel.org> Link: https://lore.kernel.org/r/20240222190334.GA412503@dev-arch.thelio-3990X Link: https://lore.kernel.org/r/20240218-neufahrzeuge-brauhaus-fb0eb6459771@brauner Signed-off-by: Christian Brauner <brauner@kernel.org>
251 lines
5.6 KiB
C
251 lines
5.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/mount.h>
|
|
#include <linux/pseudo_fs.h>
|
|
#include <linux/file.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/proc_ns.h>
|
|
#include <linux/magic.h>
|
|
#include <linux/ktime.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/user_namespace.h>
|
|
#include <linux/nsfs.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
#include "internal.h"
|
|
|
|
static struct vfsmount *nsfs_mnt;
|
|
|
|
static long ns_ioctl(struct file *filp, unsigned int ioctl,
|
|
unsigned long arg);
|
|
static const struct file_operations ns_file_operations = {
|
|
.llseek = no_llseek,
|
|
.unlocked_ioctl = ns_ioctl,
|
|
.compat_ioctl = compat_ptr_ioctl,
|
|
};
|
|
|
|
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
struct ns_common *ns = inode->i_private;
|
|
const struct proc_ns_operations *ns_ops = ns->ops;
|
|
|
|
return dynamic_dname(buffer, buflen, "%s:[%lu]",
|
|
ns_ops->name, inode->i_ino);
|
|
}
|
|
|
|
static void ns_prune_dentry(struct dentry *dentry)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
if (inode) {
|
|
struct ns_common *ns = inode->i_private;
|
|
WRITE_ONCE(ns->stashed, NULL);
|
|
}
|
|
}
|
|
|
|
const struct dentry_operations ns_dentry_operations =
|
|
{
|
|
.d_prune = ns_prune_dentry,
|
|
.d_delete = always_delete_dentry,
|
|
.d_dname = ns_dname,
|
|
};
|
|
|
|
static void nsfs_evict(struct inode *inode)
|
|
{
|
|
struct ns_common *ns = inode->i_private;
|
|
clear_inode(inode);
|
|
ns->ops->put(ns);
|
|
}
|
|
|
|
int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb,
|
|
void *private_data)
|
|
{
|
|
int ret;
|
|
|
|
do {
|
|
struct ns_common *ns = ns_get_cb(private_data);
|
|
if (!ns)
|
|
return -ENOENT;
|
|
ret = path_from_stashed(&ns->stashed, ns->inum, nsfs_mnt,
|
|
&ns_file_operations, NULL, ns, path);
|
|
if (ret <= 0 && ret != -EAGAIN)
|
|
ns->ops->put(ns);
|
|
} while (ret == -EAGAIN);
|
|
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct ns_get_path_task_args {
|
|
const struct proc_ns_operations *ns_ops;
|
|
struct task_struct *task;
|
|
};
|
|
|
|
static struct ns_common *ns_get_path_task(void *private_data)
|
|
{
|
|
struct ns_get_path_task_args *args = private_data;
|
|
|
|
return args->ns_ops->get(args->task);
|
|
}
|
|
|
|
int ns_get_path(struct path *path, struct task_struct *task,
|
|
const struct proc_ns_operations *ns_ops)
|
|
{
|
|
struct ns_get_path_task_args args = {
|
|
.ns_ops = ns_ops,
|
|
.task = task,
|
|
};
|
|
|
|
return ns_get_path_cb(path, ns_get_path_task, &args);
|
|
}
|
|
|
|
int open_related_ns(struct ns_common *ns,
|
|
struct ns_common *(*get_ns)(struct ns_common *ns))
|
|
{
|
|
struct path path = {};
|
|
struct file *f;
|
|
int err;
|
|
int fd;
|
|
|
|
fd = get_unused_fd_flags(O_CLOEXEC);
|
|
if (fd < 0)
|
|
return fd;
|
|
|
|
do {
|
|
struct ns_common *relative;
|
|
|
|
relative = get_ns(ns);
|
|
if (IS_ERR(relative)) {
|
|
put_unused_fd(fd);
|
|
return PTR_ERR(relative);
|
|
}
|
|
|
|
err = path_from_stashed(&relative->stashed, relative->inum,
|
|
nsfs_mnt, &ns_file_operations, NULL,
|
|
relative, &path);
|
|
if (err <= 0 && err != -EAGAIN)
|
|
relative->ops->put(relative);
|
|
} while (err == -EAGAIN);
|
|
|
|
if (err < 0) {
|
|
put_unused_fd(fd);
|
|
return err;
|
|
}
|
|
|
|
f = dentry_open(&path, O_RDONLY, current_cred());
|
|
path_put(&path);
|
|
if (IS_ERR(f)) {
|
|
put_unused_fd(fd);
|
|
fd = PTR_ERR(f);
|
|
} else
|
|
fd_install(fd, f);
|
|
|
|
return fd;
|
|
}
|
|
EXPORT_SYMBOL_GPL(open_related_ns);
|
|
|
|
static long ns_ioctl(struct file *filp, unsigned int ioctl,
|
|
unsigned long arg)
|
|
{
|
|
struct user_namespace *user_ns;
|
|
struct ns_common *ns = get_proc_ns(file_inode(filp));
|
|
uid_t __user *argp;
|
|
uid_t uid;
|
|
|
|
switch (ioctl) {
|
|
case NS_GET_USERNS:
|
|
return open_related_ns(ns, ns_get_owner);
|
|
case NS_GET_PARENT:
|
|
if (!ns->ops->get_parent)
|
|
return -EINVAL;
|
|
return open_related_ns(ns, ns->ops->get_parent);
|
|
case NS_GET_NSTYPE:
|
|
return ns->ops->type;
|
|
case NS_GET_OWNER_UID:
|
|
if (ns->ops->type != CLONE_NEWUSER)
|
|
return -EINVAL;
|
|
user_ns = container_of(ns, struct user_namespace, ns);
|
|
argp = (uid_t __user *) arg;
|
|
uid = from_kuid_munged(current_user_ns(), user_ns->owner);
|
|
return put_user(uid, argp);
|
|
default:
|
|
return -ENOTTY;
|
|
}
|
|
}
|
|
|
|
int ns_get_name(char *buf, size_t size, struct task_struct *task,
|
|
const struct proc_ns_operations *ns_ops)
|
|
{
|
|
struct ns_common *ns;
|
|
int res = -ENOENT;
|
|
const char *name;
|
|
ns = ns_ops->get(task);
|
|
if (ns) {
|
|
name = ns_ops->real_ns_name ? : ns_ops->name;
|
|
res = snprintf(buf, size, "%s:[%u]", name, ns->inum);
|
|
ns_ops->put(ns);
|
|
}
|
|
return res;
|
|
}
|
|
|
|
bool proc_ns_file(const struct file *file)
|
|
{
|
|
return file->f_op == &ns_file_operations;
|
|
}
|
|
|
|
/**
|
|
* ns_match() - Returns true if current namespace matches dev/ino provided.
|
|
* @ns: current namespace
|
|
* @dev: dev_t from nsfs that will be matched against current nsfs
|
|
* @ino: ino_t from nsfs that will be matched against current nsfs
|
|
*
|
|
* Return: true if dev and ino matches the current nsfs.
|
|
*/
|
|
bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino)
|
|
{
|
|
return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev);
|
|
}
|
|
|
|
|
|
static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
const struct ns_common *ns = inode->i_private;
|
|
const struct proc_ns_operations *ns_ops = ns->ops;
|
|
|
|
seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino);
|
|
return 0;
|
|
}
|
|
|
|
static const struct super_operations nsfs_ops = {
|
|
.statfs = simple_statfs,
|
|
.evict_inode = nsfs_evict,
|
|
.show_path = nsfs_show_path,
|
|
};
|
|
|
|
static int nsfs_init_fs_context(struct fs_context *fc)
|
|
{
|
|
struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC);
|
|
if (!ctx)
|
|
return -ENOMEM;
|
|
ctx->ops = &nsfs_ops;
|
|
ctx->dops = &ns_dentry_operations;
|
|
return 0;
|
|
}
|
|
|
|
static struct file_system_type nsfs = {
|
|
.name = "nsfs",
|
|
.init_fs_context = nsfs_init_fs_context,
|
|
.kill_sb = kill_anon_super,
|
|
};
|
|
|
|
void __init nsfs_init(void)
|
|
{
|
|
nsfs_mnt = kern_mount(&nsfs);
|
|
if (IS_ERR(nsfs_mnt))
|
|
panic("can't set nsfs up\n");
|
|
nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER;
|
|
}
|