mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-12 08:48:48 +00:00
c2e1f2e30d
Applying restrictive seccomp filter programs to large or diverse codebases often requires handling threads which may be started early in the process lifetime (e.g., by code that is linked in). While it is possible to apply permissive programs prior to process start up, it is difficult to further restrict the kernel ABI to those threads after that point. This change adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER for synchronizing thread group seccomp filters at filter installation time. When calling seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, filter) an attempt will be made to synchronize all threads in current's threadgroup to its new seccomp filter program. This is possible iff all threads are using a filter that is an ancestor to the filter current is attempting to synchronize to. NULL filters (where the task is running as SECCOMP_MODE_NONE) are also treated as ancestors allowing threads to be transitioned into SECCOMP_MODE_FILTER. If prctrl(PR_SET_NO_NEW_PRIVS, ...) has been set on the calling thread, no_new_privs will be set for all synchronized threads too. On success, 0 is returned. On failure, the pid of one of the failing threads will be returned and no filters will have been applied. The race conditions against another thread are: - requesting TSYNC (already handled by sighand lock) - performing a clone (already handled by sighand lock) - changing its filter (already handled by sighand lock) - calling exec (handled by cred_guard_mutex) The clone case is assisted by the fact that new threads will have their seccomp state duplicated from their parent before appearing on the tasklist. Holding cred_guard_mutex means that seccomp filters cannot be assigned while in the middle of another thread's exec (potentially bypassing no_new_privs or similar). The call to de_thread() may kill threads waiting for the mutex. Changes across threads to the filter pointer includes a barrier. Based on patches by Will Drewry. Suggested-by: Julien Tinnes <jln@chromium.org> Signed-off-by: Kees Cook <keescook@chromium.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Andy Lutomirski <luto@amacapital.net>
92 lines
2.1 KiB
C
92 lines
2.1 KiB
C
#ifndef _LINUX_SECCOMP_H
|
|
#define _LINUX_SECCOMP_H
|
|
|
|
#include <uapi/linux/seccomp.h>
|
|
|
|
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
|
|
|
|
#ifdef CONFIG_SECCOMP
|
|
|
|
#include <linux/thread_info.h>
|
|
#include <asm/seccomp.h>
|
|
|
|
struct seccomp_filter;
|
|
/**
|
|
* struct seccomp - the state of a seccomp'ed process
|
|
*
|
|
* @mode: indicates one of the valid values above for controlled
|
|
* system calls available to a process.
|
|
* @filter: must always point to a valid seccomp-filter or NULL as it is
|
|
* accessed without locking during system call entry.
|
|
*
|
|
* @filter must only be accessed from the context of current as there
|
|
* is no read locking.
|
|
*/
|
|
struct seccomp {
|
|
int mode;
|
|
struct seccomp_filter *filter;
|
|
};
|
|
|
|
extern int __secure_computing(int);
|
|
static inline int secure_computing(int this_syscall)
|
|
{
|
|
if (unlikely(test_thread_flag(TIF_SECCOMP)))
|
|
return __secure_computing(this_syscall);
|
|
return 0;
|
|
}
|
|
|
|
/* A wrapper for architectures supporting only SECCOMP_MODE_STRICT. */
|
|
static inline void secure_computing_strict(int this_syscall)
|
|
{
|
|
BUG_ON(secure_computing(this_syscall) != 0);
|
|
}
|
|
|
|
extern long prctl_get_seccomp(void);
|
|
extern long prctl_set_seccomp(unsigned long, char __user *);
|
|
|
|
static inline int seccomp_mode(struct seccomp *s)
|
|
{
|
|
return s->mode;
|
|
}
|
|
|
|
#else /* CONFIG_SECCOMP */
|
|
|
|
#include <linux/errno.h>
|
|
|
|
struct seccomp { };
|
|
struct seccomp_filter { };
|
|
|
|
static inline int secure_computing(int this_syscall) { return 0; }
|
|
static inline void secure_computing_strict(int this_syscall) { return; }
|
|
|
|
static inline long prctl_get_seccomp(void)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline long prctl_set_seccomp(unsigned long arg2, char __user *arg3)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline int seccomp_mode(struct seccomp *s)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_SECCOMP */
|
|
|
|
#ifdef CONFIG_SECCOMP_FILTER
|
|
extern void put_seccomp_filter(struct task_struct *tsk);
|
|
extern void get_seccomp_filter(struct task_struct *tsk);
|
|
#else /* CONFIG_SECCOMP_FILTER */
|
|
static inline void put_seccomp_filter(struct task_struct *tsk)
|
|
{
|
|
return;
|
|
}
|
|
static inline void get_seccomp_filter(struct task_struct *tsk)
|
|
{
|
|
return;
|
|
}
|
|
#endif /* CONFIG_SECCOMP_FILTER */
|
|
#endif /* _LINUX_SECCOMP_H */
|