mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
clone3-v5.3
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCXSMhhgAKCRCRxhvAZXjc or7kAP9VzDcQaK/WoDd2ezh2C7Wh5hNy9z/qJVCa6Tb+N+g1UgEAxbhFUg55uGOA JNf7fGar5JF5hBMIXR+NqOi1/sb4swg= =ELWo -----END PGP SIGNATURE----- Merge tag 'clone3-v5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux Pull clone3 system call from Christian Brauner: "This adds the clone3 syscall which is an extensible successor to clone after we snagged the last flag with CLONE_PIDFD during the 5.2 merge window for clone(). It cleanly supports all of the flags from clone() and thus all legacy workloads. There are few user visible differences between clone3 and clone. First, CLONE_DETACHED will cause EINVAL with clone3 so we can reuse this flag. Second, the CSIGNAL flag is deprecated and will cause EINVAL to be reported. It is superseeded by a dedicated "exit_signal" argument in struct clone_args thus freeing up even more flags. And third, clone3 gives CLONE_PIDFD a dedicated return argument in struct clone_args instead of abusing CLONE_PARENT_SETTID's parent_tidptr argument. The clone3 uapi is designed to be easy to handle on 32- and 64 bit: /* uapi */ struct clone_args { __aligned_u64 flags; __aligned_u64 pidfd; __aligned_u64 child_tid; __aligned_u64 parent_tid; __aligned_u64 exit_signal; __aligned_u64 stack; __aligned_u64 stack_size; __aligned_u64 tls; }; and a separate kernel struct is used that uses proper kernel typing: /* kernel internal */ struct kernel_clone_args { u64 flags; int __user *pidfd; int __user *child_tid; int __user *parent_tid; int exit_signal; unsigned long stack; unsigned long stack_size; unsigned long tls; }; The system call comes with a size argument which enables the kernel to detect what version of clone_args userspace is passing in. clone3 validates that any additional bytes a given kernel does not know about are set to zero and that the size never exceeds a page. A nice feature is that this patchset allowed us to cleanup and simplify various core kernel codepaths in kernel/fork.c by making the internal _do_fork() function take struct kernel_clone_args even for legacy clone(). This patch also unblocks the time namespace patchset which wants to introduce a new CLONE_TIMENS flag. Note, that clone3 has only been wired up for x86{_32,64}, arm{64}, and xtensa. These were the architectures that did not require special massaging. Other architectures treat fork-like system calls individually and after some back and forth neither Arnd nor I felt confident that we dared to add clone3 unconditionally to all architectures. We agreed to leave this up to individual architecture maintainers. This is why there's an additional patch that introduces __ARCH_WANT_SYS_CLONE3 which any architecture can set once it has implemented support for clone3. The patch also adds a cond_syscall(clone3) for architectures such as nios2 or h8300 that generate their syscall table by simply including asm-generic/unistd.h. The hope is to get rid of __ARCH_WANT_SYS_CLONE3 and cond_syscall() rather soon" * tag 'clone3-v5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: arch: handle arches who do not yet define clone3 arch: wire-up clone3() syscall fork: add clone3
This commit is contained in:
commit
8f6ccf6159
@ -37,6 +37,7 @@
|
||||
#define __ARCH_WANT_SYS_FORK
|
||||
#define __ARCH_WANT_SYS_VFORK
|
||||
#define __ARCH_WANT_SYS_CLONE
|
||||
#define __ARCH_WANT_SYS_CLONE3
|
||||
|
||||
/*
|
||||
* Unimplemented (or alternatively implemented) syscalls
|
||||
|
@ -448,3 +448,4 @@
|
||||
432 common fsmount sys_fsmount
|
||||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
435 common clone3 sys_clone3
|
||||
|
@ -38,10 +38,11 @@
|
||||
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
|
||||
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
|
||||
|
||||
#define __NR_compat_syscalls 435
|
||||
#define __NR_compat_syscalls 436
|
||||
#endif
|
||||
|
||||
#define __ARCH_WANT_SYS_CLONE
|
||||
#define __ARCH_WANT_SYS_CLONE3
|
||||
|
||||
#ifndef __COMPAT_SYSCALL_NR
|
||||
#include <uapi/asm/unistd.h>
|
||||
|
@ -877,6 +877,8 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
|
||||
__SYSCALL(__NR_fspick, sys_fspick)
|
||||
#define __NR_pidfd_open 434
|
||||
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
|
||||
#define __NR_clone3 435
|
||||
__SYSCALL(__NR_clone3, sys_clone3)
|
||||
|
||||
/*
|
||||
* Please add new compat syscalls above this comment and update
|
||||
|
@ -440,3 +440,4 @@
|
||||
432 common fsmount sys_fsmount
|
||||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
435 common clone3 sys_clone3
|
||||
|
@ -439,3 +439,4 @@
|
||||
432 i386 fsmount sys_fsmount __ia32_sys_fsmount
|
||||
433 i386 fspick sys_fspick __ia32_sys_fspick
|
||||
434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open
|
||||
435 i386 clone3 sys_clone3 __ia32_sys_clone3
|
||||
|
@ -356,6 +356,7 @@
|
||||
432 common fsmount __x64_sys_fsmount
|
||||
433 common fspick __x64_sys_fspick
|
||||
434 common pidfd_open __x64_sys_pidfd_open
|
||||
435 common clone3 __x64_sys_clone3/ptregs
|
||||
|
||||
#
|
||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||
|
@ -237,6 +237,14 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
|
||||
unsigned long, newsp, int __user *, parent_tidptr,
|
||||
unsigned long, tls_val, int __user *, child_tidptr)
|
||||
{
|
||||
return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr,
|
||||
tls_val);
|
||||
struct kernel_clone_args args = {
|
||||
.flags = (clone_flags & ~CSIGNAL),
|
||||
.child_tid = child_tidptr,
|
||||
.parent_tid = parent_tidptr,
|
||||
.exit_signal = (clone_flags & CSIGNAL),
|
||||
.stack = newsp,
|
||||
.tls = tls_val,
|
||||
};
|
||||
|
||||
return _do_fork(&args);
|
||||
}
|
||||
|
@ -54,5 +54,6 @@
|
||||
# define __ARCH_WANT_SYS_FORK
|
||||
# define __ARCH_WANT_SYS_VFORK
|
||||
# define __ARCH_WANT_SYS_CLONE
|
||||
# define __ARCH_WANT_SYS_CLONE3
|
||||
|
||||
#endif /* _ASM_X86_UNISTD_H */
|
||||
|
@ -3,6 +3,7 @@
|
||||
#define _XTENSA_UNISTD_H
|
||||
|
||||
#define __ARCH_WANT_SYS_CLONE
|
||||
#define __ARCH_WANT_SYS_CLONE3
|
||||
#include <uapi/asm/unistd.h>
|
||||
|
||||
#define __ARCH_WANT_NEW_STAT
|
||||
|
@ -405,3 +405,4 @@
|
||||
432 common fsmount sys_fsmount
|
||||
433 common fspick sys_fspick
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
435 common clone3 sys_clone3
|
||||
|
@ -8,11 +8,26 @@
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
struct task_struct;
|
||||
struct rusage;
|
||||
union thread_union;
|
||||
|
||||
/* All the bits taken by the old clone syscall. */
|
||||
#define CLONE_LEGACY_FLAGS 0xffffffffULL
|
||||
|
||||
struct kernel_clone_args {
|
||||
u64 flags;
|
||||
int __user *pidfd;
|
||||
int __user *child_tid;
|
||||
int __user *parent_tid;
|
||||
int exit_signal;
|
||||
unsigned long stack;
|
||||
unsigned long stack_size;
|
||||
unsigned long tls;
|
||||
};
|
||||
|
||||
/*
|
||||
* This serializes "schedule()" and also protects
|
||||
* the run-queue from deletions/modifications (but
|
||||
@ -73,7 +88,7 @@ extern void do_group_exit(int);
|
||||
extern void exit_files(struct task_struct *);
|
||||
extern void exit_itimers(struct signal_struct *);
|
||||
|
||||
extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
|
||||
extern long _do_fork(struct kernel_clone_args *kargs);
|
||||
extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
|
||||
struct task_struct *fork_idle(int);
|
||||
struct mm_struct *copy_init_mm(void);
|
||||
|
@ -68,6 +68,7 @@ struct sigaltstack;
|
||||
struct rseq;
|
||||
union bpf_attr;
|
||||
struct io_uring_params;
|
||||
struct clone_args;
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/aio_abi.h>
|
||||
@ -850,6 +851,9 @@ asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
|
||||
int __user *, unsigned long);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
asmlinkage long sys_clone3(struct clone_args __user *uargs, size_t size);
|
||||
|
||||
asmlinkage long sys_execve(const char __user *filename,
|
||||
const char __user *const __user *argv,
|
||||
const char __user *const __user *envp);
|
||||
|
@ -846,9 +846,11 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
|
||||
__SYSCALL(__NR_fspick, sys_fspick)
|
||||
#define __NR_pidfd_open 434
|
||||
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
|
||||
#define __NR_clone3 435
|
||||
__SYSCALL(__NR_clone3, sys_clone3)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 435
|
||||
#define __NR_syscalls 436
|
||||
|
||||
/*
|
||||
* 32 bit systems traditionally used different
|
||||
|
@ -2,6 +2,8 @@
|
||||
#ifndef _UAPI_LINUX_SCHED_H
|
||||
#define _UAPI_LINUX_SCHED_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* cloning flags:
|
||||
*/
|
||||
@ -31,6 +33,20 @@
|
||||
#define CLONE_NEWNET 0x40000000 /* New network namespace */
|
||||
#define CLONE_IO 0x80000000 /* Clone io context */
|
||||
|
||||
/*
|
||||
* Arguments for the clone3 syscall
|
||||
*/
|
||||
struct clone_args {
|
||||
__aligned_u64 flags;
|
||||
__aligned_u64 pidfd;
|
||||
__aligned_u64 child_tid;
|
||||
__aligned_u64 parent_tid;
|
||||
__aligned_u64 exit_signal;
|
||||
__aligned_u64 stack;
|
||||
__aligned_u64 stack_size;
|
||||
__aligned_u64 tls;
|
||||
};
|
||||
|
||||
/*
|
||||
* Scheduling policies
|
||||
*/
|
||||
|
191
kernel/fork.c
191
kernel/fork.c
@ -1768,20 +1768,16 @@ static __always_inline void delayed_free_task(struct task_struct *tsk)
|
||||
* flags). The actual kick-off is left to the caller.
|
||||
*/
|
||||
static __latent_entropy struct task_struct *copy_process(
|
||||
unsigned long clone_flags,
|
||||
unsigned long stack_start,
|
||||
unsigned long stack_size,
|
||||
int __user *parent_tidptr,
|
||||
int __user *child_tidptr,
|
||||
struct pid *pid,
|
||||
int trace,
|
||||
unsigned long tls,
|
||||
int node)
|
||||
int node,
|
||||
struct kernel_clone_args *args)
|
||||
{
|
||||
int pidfd = -1, retval;
|
||||
struct task_struct *p;
|
||||
struct multiprocess_signals delayed;
|
||||
struct file *pidfile = NULL;
|
||||
u64 clone_flags = args->flags;
|
||||
|
||||
/*
|
||||
* Don't allow sharing the root directory with processes in a different
|
||||
@ -1831,14 +1827,11 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
|
||||
if (clone_flags & CLONE_PIDFD) {
|
||||
/*
|
||||
* - CLONE_PARENT_SETTID is useless for pidfds and also
|
||||
* parent_tidptr is used to return pidfds.
|
||||
* - CLONE_DETACHED is blocked so that we can potentially
|
||||
* reuse it later for CLONE_PIDFD.
|
||||
* - CLONE_THREAD is blocked until someone really needs it.
|
||||
*/
|
||||
if (clone_flags &
|
||||
(CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
|
||||
if (clone_flags & (CLONE_DETACHED | CLONE_THREAD))
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
@ -1871,11 +1864,11 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
* p->set_child_tid which is (ab)used as a kthread's data pointer for
|
||||
* kernel threads (PF_KTHREAD).
|
||||
*/
|
||||
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
|
||||
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
|
||||
/*
|
||||
* Clear TID on mm_release()?
|
||||
*/
|
||||
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
|
||||
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;
|
||||
|
||||
ftrace_graph_init_task(p);
|
||||
|
||||
@ -2031,7 +2024,8 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
retval = copy_io(clone_flags, p);
|
||||
if (retval)
|
||||
goto bad_fork_cleanup_namespaces;
|
||||
retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls);
|
||||
retval = copy_thread_tls(clone_flags, args->stack, args->stack_size, p,
|
||||
args->tls);
|
||||
if (retval)
|
||||
goto bad_fork_cleanup_io;
|
||||
|
||||
@ -2066,7 +2060,7 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
}
|
||||
get_pid(pid); /* held by pidfile now */
|
||||
|
||||
retval = put_user(pidfd, parent_tidptr);
|
||||
retval = put_user(pidfd, args->pidfd);
|
||||
if (retval)
|
||||
goto bad_fork_put_pidfd;
|
||||
}
|
||||
@ -2109,7 +2103,7 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
if (clone_flags & CLONE_PARENT)
|
||||
p->exit_signal = current->group_leader->exit_signal;
|
||||
else
|
||||
p->exit_signal = (clone_flags & CSIGNAL);
|
||||
p->exit_signal = args->exit_signal;
|
||||
p->group_leader = p;
|
||||
p->tgid = p->pid;
|
||||
}
|
||||
@ -2322,8 +2316,11 @@ static inline void init_idle_pids(struct task_struct *idle)
|
||||
struct task_struct *fork_idle(int cpu)
|
||||
{
|
||||
struct task_struct *task;
|
||||
task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0,
|
||||
cpu_to_node(cpu));
|
||||
struct kernel_clone_args args = {
|
||||
.flags = CLONE_VM,
|
||||
};
|
||||
|
||||
task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
|
||||
if (!IS_ERR(task)) {
|
||||
init_idle_pids(task);
|
||||
init_idle(task, cpu);
|
||||
@ -2343,13 +2340,9 @@ struct mm_struct *copy_init_mm(void)
|
||||
* It copies the process, and if successful kick-starts
|
||||
* it and waits for it to finish using the VM if required.
|
||||
*/
|
||||
long _do_fork(unsigned long clone_flags,
|
||||
unsigned long stack_start,
|
||||
unsigned long stack_size,
|
||||
int __user *parent_tidptr,
|
||||
int __user *child_tidptr,
|
||||
unsigned long tls)
|
||||
long _do_fork(struct kernel_clone_args *args)
|
||||
{
|
||||
u64 clone_flags = args->flags;
|
||||
struct completion vfork;
|
||||
struct pid *pid;
|
||||
struct task_struct *p;
|
||||
@ -2365,7 +2358,7 @@ long _do_fork(unsigned long clone_flags,
|
||||
if (!(clone_flags & CLONE_UNTRACED)) {
|
||||
if (clone_flags & CLONE_VFORK)
|
||||
trace = PTRACE_EVENT_VFORK;
|
||||
else if ((clone_flags & CSIGNAL) != SIGCHLD)
|
||||
else if (args->exit_signal != SIGCHLD)
|
||||
trace = PTRACE_EVENT_CLONE;
|
||||
else
|
||||
trace = PTRACE_EVENT_FORK;
|
||||
@ -2374,8 +2367,7 @@ long _do_fork(unsigned long clone_flags,
|
||||
trace = 0;
|
||||
}
|
||||
|
||||
p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr,
|
||||
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
|
||||
p = copy_process(NULL, trace, NUMA_NO_NODE, args);
|
||||
add_latent_entropy();
|
||||
|
||||
if (IS_ERR(p))
|
||||
@ -2391,7 +2383,7 @@ long _do_fork(unsigned long clone_flags,
|
||||
nr = pid_vnr(pid);
|
||||
|
||||
if (clone_flags & CLONE_PARENT_SETTID)
|
||||
put_user(nr, parent_tidptr);
|
||||
put_user(nr, args->parent_tid);
|
||||
|
||||
if (clone_flags & CLONE_VFORK) {
|
||||
p->vfork_done = &vfork;
|
||||
@ -2423,8 +2415,16 @@ long do_fork(unsigned long clone_flags,
|
||||
int __user *parent_tidptr,
|
||||
int __user *child_tidptr)
|
||||
{
|
||||
return _do_fork(clone_flags, stack_start, stack_size,
|
||||
parent_tidptr, child_tidptr, 0);
|
||||
struct kernel_clone_args args = {
|
||||
.flags = (clone_flags & ~CSIGNAL),
|
||||
.child_tid = child_tidptr,
|
||||
.parent_tid = parent_tidptr,
|
||||
.exit_signal = (clone_flags & CSIGNAL),
|
||||
.stack = stack_start,
|
||||
.stack_size = stack_size,
|
||||
};
|
||||
|
||||
return _do_fork(&args);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2433,15 +2433,25 @@ long do_fork(unsigned long clone_flags,
|
||||
*/
|
||||
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
|
||||
{
|
||||
return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
|
||||
(unsigned long)arg, NULL, NULL, 0);
|
||||
struct kernel_clone_args args = {
|
||||
.flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
|
||||
.exit_signal = (flags & CSIGNAL),
|
||||
.stack = (unsigned long)fn,
|
||||
.stack_size = (unsigned long)arg,
|
||||
};
|
||||
|
||||
return _do_fork(&args);
|
||||
}
|
||||
|
||||
#ifdef __ARCH_WANT_SYS_FORK
|
||||
SYSCALL_DEFINE0(fork)
|
||||
{
|
||||
#ifdef CONFIG_MMU
|
||||
return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0);
|
||||
struct kernel_clone_args args = {
|
||||
.exit_signal = SIGCHLD,
|
||||
};
|
||||
|
||||
return _do_fork(&args);
|
||||
#else
|
||||
/* can not support in nommu mode */
|
||||
return -EINVAL;
|
||||
@ -2452,8 +2462,12 @@ SYSCALL_DEFINE0(fork)
|
||||
#ifdef __ARCH_WANT_SYS_VFORK
|
||||
SYSCALL_DEFINE0(vfork)
|
||||
{
|
||||
return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
|
||||
0, NULL, NULL, 0);
|
||||
struct kernel_clone_args args = {
|
||||
.flags = CLONE_VFORK | CLONE_VM,
|
||||
.exit_signal = SIGCHLD,
|
||||
};
|
||||
|
||||
return _do_fork(&args);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2481,7 +2495,112 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
|
||||
unsigned long, tls)
|
||||
#endif
|
||||
{
|
||||
return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
|
||||
struct kernel_clone_args args = {
|
||||
.flags = (clone_flags & ~CSIGNAL),
|
||||
.pidfd = parent_tidptr,
|
||||
.child_tid = child_tidptr,
|
||||
.parent_tid = parent_tidptr,
|
||||
.exit_signal = (clone_flags & CSIGNAL),
|
||||
.stack = newsp,
|
||||
.tls = tls,
|
||||
};
|
||||
|
||||
/* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */
|
||||
if ((clone_flags & CLONE_PIDFD) && (clone_flags & CLONE_PARENT_SETTID))
|
||||
return -EINVAL;
|
||||
|
||||
return _do_fork(&args);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __ARCH_WANT_SYS_CLONE3
|
||||
noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
|
||||
struct clone_args __user *uargs,
|
||||
size_t size)
|
||||
{
|
||||
struct clone_args args;
|
||||
|
||||
if (unlikely(size > PAGE_SIZE))
|
||||
return -E2BIG;
|
||||
|
||||
if (unlikely(size < sizeof(struct clone_args)))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(!access_ok(uargs, size)))
|
||||
return -EFAULT;
|
||||
|
||||
if (size > sizeof(struct clone_args)) {
|
||||
unsigned char __user *addr;
|
||||
unsigned char __user *end;
|
||||
unsigned char val;
|
||||
|
||||
addr = (void __user *)uargs + sizeof(struct clone_args);
|
||||
end = (void __user *)uargs + size;
|
||||
|
||||
for (; addr < end; addr++) {
|
||||
if (get_user(val, addr))
|
||||
return -EFAULT;
|
||||
if (val)
|
||||
return -E2BIG;
|
||||
}
|
||||
|
||||
size = sizeof(struct clone_args);
|
||||
}
|
||||
|
||||
if (copy_from_user(&args, uargs, size))
|
||||
return -EFAULT;
|
||||
|
||||
*kargs = (struct kernel_clone_args){
|
||||
.flags = args.flags,
|
||||
.pidfd = u64_to_user_ptr(args.pidfd),
|
||||
.child_tid = u64_to_user_ptr(args.child_tid),
|
||||
.parent_tid = u64_to_user_ptr(args.parent_tid),
|
||||
.exit_signal = args.exit_signal,
|
||||
.stack = args.stack,
|
||||
.stack_size = args.stack_size,
|
||||
.tls = args.tls,
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool clone3_args_valid(const struct kernel_clone_args *kargs)
|
||||
{
|
||||
/*
|
||||
* All lower bits of the flag word are taken.
|
||||
* Verify that no other unknown flags are passed along.
|
||||
*/
|
||||
if (kargs->flags & ~CLONE_LEGACY_FLAGS)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* - make the CLONE_DETACHED bit reuseable for clone3
|
||||
* - make the CSIGNAL bits reuseable for clone3
|
||||
*/
|
||||
if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
|
||||
return false;
|
||||
|
||||
if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) &&
|
||||
kargs->exit_signal)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
|
||||
{
|
||||
int err;
|
||||
|
||||
struct kernel_clone_args kargs;
|
||||
|
||||
err = copy_clone_args_from_user(&kargs, uargs, size);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!clone3_args_valid(&kargs))
|
||||
return -EINVAL;
|
||||
|
||||
return _do_fork(&kargs);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -137,6 +137,8 @@ COND_SYSCALL(capset);
|
||||
/* kernel/exit.c */
|
||||
|
||||
/* kernel/fork.c */
|
||||
/* __ARCH_WANT_SYS_CLONE3 */
|
||||
COND_SYSCALL(clone3);
|
||||
|
||||
/* kernel/futex.c */
|
||||
COND_SYSCALL(futex);
|
||||
|
Loading…
Reference in New Issue
Block a user