mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-17 22:05:08 +00:00
1ec6574a3c
ordinary user mode tasks. In commit 40966e316f86 ("kthread: Ensure struct kthread is present for all kthreads") caused init and the user mode helper threads that call kernel_execve to have struct kthread allocated for them. This struct kthread going away during execve in turned made a use after free of struct kthread possible. The commit 343f4c49f243 ("kthread: Don't allocate kthread_struct for init and umh") is enough to fix the use after free and is simple enough to be backportable. The rest of the changes pass struct kernel_clone_args to clean things up and cause the code to make sense. In making init and the user mode helpers tasks purely user mode tasks I ran into two complications. The function task_tick_numa was detecting tasks without an mm by testing for the presence of PF_KTHREAD. The initramfs code in populate_initrd_image was using flush_delayed_fput to ensuere the closing of all it's file descriptors was complete, and flush_delayed_fput does not work in a userspace thread. I have looked and looked and more complications and in my code review I have not found any, and neither has anyone else with the code sitting in linux-next. Link: https://lkml.kernel.org/r/87mtfu4up3.fsf@email.froward.int.ebiederm.org Eric W. Biederman (8): kthread: Don't allocate kthread_struct for init and umh fork: Pass struct kernel_clone_args into copy_thread fork: Explicity test for idle tasks in copy_thread fork: Generalize PF_IO_WORKER handling init: Deal with the init process being a user mode process fork: Explicitly set PF_KTHREAD fork: Stop allowing kthreads to call execve sched: Update task_tick_numa to ignore tasks without an mm arch/alpha/kernel/process.c | 13 ++++++------ arch/arc/kernel/process.c | 13 ++++++------ arch/arm/kernel/process.c | 12 ++++++----- arch/arm64/kernel/process.c | 12 ++++++----- arch/csky/kernel/process.c | 15 ++++++------- arch/h8300/kernel/process.c | 10 ++++----- arch/hexagon/kernel/process.c | 12 ++++++----- arch/ia64/kernel/process.c | 15 +++++++------ arch/m68k/kernel/process.c | 12 ++++++----- arch/microblaze/kernel/process.c | 12 ++++++----- arch/mips/kernel/process.c | 13 ++++++------ arch/nios2/kernel/process.c | 12 ++++++----- arch/openrisc/kernel/process.c | 12 ++++++----- arch/parisc/kernel/process.c | 18 +++++++++------- arch/powerpc/kernel/process.c | 15 +++++++------ arch/riscv/kernel/process.c | 12 ++++++----- arch/s390/kernel/process.c | 12 ++++++----- arch/sh/kernel/process_32.c | 12 ++++++----- arch/sparc/kernel/process_32.c | 12 ++++++----- arch/sparc/kernel/process_64.c | 12 ++++++----- arch/um/kernel/process.c | 15 +++++++------ arch/x86/include/asm/fpu/sched.h | 2 +- arch/x86/include/asm/switch_to.h | 8 +++---- arch/x86/kernel/fpu/core.c | 4 ++-- arch/x86/kernel/process.c | 18 +++++++++------- arch/xtensa/kernel/process.c | 17 ++++++++------- fs/exec.c | 8 ++++--- include/linux/sched/task.h | 8 +++++-- init/initramfs.c | 2 ++ init/main.c | 2 +- kernel/fork.c | 46 +++++++++++++++++++++++++++++++++------- kernel/sched/fair.c | 2 +- kernel/umh.c | 6 +++--- 33 files changed, 234 insertions(+), 160 deletions(-) Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEgjlraLDcwBA2B+6cC/v6Eiajj0AFAmKaR/MACgkQC/v6Eiaj j0Aayg/7Bx66872d9c6igkJ+MPCTuh+v9QKCGwiYEmiU4Q5sVAFB0HPJO27qC14u 630X0RFNZTkPzNNEJNIW4kw6Dj8s8YRKf+FgQAVt4SzdRwT7eIPDjk1nGraopPJ3 O04pjvuTmUyidyViRyFcf2ptx/pnkrwP8jUSc+bGTgfASAKAgAokqKE5ecjewbBc Y/EAkQ6QW7KxPjeSmpAHwI+t3BpBev9WEC4PbhRhsBCQFO2+PJiklvqdhVNBnIjv qUezll/1xv9UYgniB15Q4Nb722SmnWSU3r8as1eFPugzTHizKhufrrpyP+KMK1A0 tdtEJNs5t2DZF7ZbGTFSPqJWmyTYLrghZdO+lOmnaSjHxK4Nda1d4NzbefJ0u+FE tutewowvHtBX6AFIbx+H3O+DOJM2IgNMf+ReQDU/TyNyVf3wBrTbsr9cLxypIJIp zze8npoLMlB7B4yxVo5ES5e63EXfi3iHl0L3/1EhoGwriRz1kWgVLUX/VZOUpscL RkJHsW6bT8sqxPWAA5kyWjEN+wNR2PxbXi8OE4arT0uJrEBMUgDCzydzOv5tJB00 mSQdytxH9LVdsmxBKAOBp5X6WOLGA4yb1cZ6E/mEhlqXMpBDF1DaMfwbWqxSYi4q sp5zU3SBAW0qceiZSsWZXInfbjrcQXNV/DkDRDO9OmzEZP4m1j0= =x6fy -----END PGP SIGNATURE----- Merge tag 'kthread-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace Pull kthread updates from Eric Biederman: "This updates init and user mode helper tasks to be ordinary user mode tasks. Commit 40966e316f86 ("kthread: Ensure struct kthread is present for all kthreads") caused init and the user mode helper threads that call kernel_execve to have struct kthread allocated for them. This struct kthread going away during execve in turned made a use after free of struct kthread possible. Here, commit 343f4c49f243 ("kthread: Don't allocate kthread_struct for init and umh") is enough to fix the use after free and is simple enough to be backportable. The rest of the changes pass struct kernel_clone_args to clean things up and cause the code to make sense. In making init and the user mode helpers tasks purely user mode tasks I ran into two complications. The function task_tick_numa was detecting tasks without an mm by testing for the presence of PF_KTHREAD. The initramfs code in populate_initrd_image was using flush_delayed_fput to ensuere the closing of all it's file descriptors was complete, and flush_delayed_fput does not work in a userspace thread. I have looked and looked and more complications and in my code review I have not found any, and neither has anyone else with the code sitting in linux-next" * tag 'kthread-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: sched: Update task_tick_numa to ignore tasks without an mm fork: Stop allowing kthreads to call execve fork: Explicitly set PF_KTHREAD init: Deal with the init process being a user mode process fork: Generalize PF_IO_WORKER handling fork: Explicity test for idle tasks in copy_thread fork: Pass struct kernel_clone_args into copy_thread kthread: Don't allocate kthread_struct for init and umh
190 lines
5.0 KiB
C
190 lines
5.0 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
|
|
* Chen Liqin <liqin.chen@sunplusct.com>
|
|
* Lennox Wu <lennox.wu@sunplusct.com>
|
|
* Copyright (C) 2012 Regents of the University of California
|
|
* Copyright (C) 2017 SiFive
|
|
*/
|
|
|
|
#include <linux/cpu.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/debug.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/tick.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <asm/unistd.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/csr.h>
|
|
#include <asm/stacktrace.h>
|
|
#include <asm/string.h>
|
|
#include <asm/switch_to.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/cpuidle.h>
|
|
|
|
register unsigned long gp_in_global __asm__("gp");
|
|
|
|
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
|
|
#include <linux/stackprotector.h>
|
|
unsigned long __stack_chk_guard __read_mostly;
|
|
EXPORT_SYMBOL(__stack_chk_guard);
|
|
#endif
|
|
|
|
extern asmlinkage void ret_from_fork(void);
|
|
extern asmlinkage void ret_from_kernel_thread(void);
|
|
|
|
void arch_cpu_idle(void)
|
|
{
|
|
cpu_do_idle();
|
|
raw_local_irq_enable();
|
|
}
|
|
|
|
void __show_regs(struct pt_regs *regs)
|
|
{
|
|
show_regs_print_info(KERN_DEFAULT);
|
|
|
|
if (!user_mode(regs)) {
|
|
pr_cont("epc : %pS\n", (void *)regs->epc);
|
|
pr_cont(" ra : %pS\n", (void *)regs->ra);
|
|
}
|
|
|
|
pr_cont("epc : " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
|
|
regs->epc, regs->ra, regs->sp);
|
|
pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n",
|
|
regs->gp, regs->tp, regs->t0);
|
|
pr_cont(" t1 : " REG_FMT " t2 : " REG_FMT " s0 : " REG_FMT "\n",
|
|
regs->t1, regs->t2, regs->s0);
|
|
pr_cont(" s1 : " REG_FMT " a0 : " REG_FMT " a1 : " REG_FMT "\n",
|
|
regs->s1, regs->a0, regs->a1);
|
|
pr_cont(" a2 : " REG_FMT " a3 : " REG_FMT " a4 : " REG_FMT "\n",
|
|
regs->a2, regs->a3, regs->a4);
|
|
pr_cont(" a5 : " REG_FMT " a6 : " REG_FMT " a7 : " REG_FMT "\n",
|
|
regs->a5, regs->a6, regs->a7);
|
|
pr_cont(" s2 : " REG_FMT " s3 : " REG_FMT " s4 : " REG_FMT "\n",
|
|
regs->s2, regs->s3, regs->s4);
|
|
pr_cont(" s5 : " REG_FMT " s6 : " REG_FMT " s7 : " REG_FMT "\n",
|
|
regs->s5, regs->s6, regs->s7);
|
|
pr_cont(" s8 : " REG_FMT " s9 : " REG_FMT " s10: " REG_FMT "\n",
|
|
regs->s8, regs->s9, regs->s10);
|
|
pr_cont(" s11: " REG_FMT " t3 : " REG_FMT " t4 : " REG_FMT "\n",
|
|
regs->s11, regs->t3, regs->t4);
|
|
pr_cont(" t5 : " REG_FMT " t6 : " REG_FMT "\n",
|
|
regs->t5, regs->t6);
|
|
|
|
pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n",
|
|
regs->status, regs->badaddr, regs->cause);
|
|
}
|
|
void show_regs(struct pt_regs *regs)
|
|
{
|
|
__show_regs(regs);
|
|
if (!user_mode(regs))
|
|
dump_backtrace(regs, NULL, KERN_DEFAULT);
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
static bool compat_mode_supported __read_mostly;
|
|
|
|
bool compat_elf_check_arch(Elf32_Ehdr *hdr)
|
|
{
|
|
return compat_mode_supported &&
|
|
hdr->e_machine == EM_RISCV &&
|
|
hdr->e_ident[EI_CLASS] == ELFCLASS32;
|
|
}
|
|
|
|
static int __init compat_mode_detect(void)
|
|
{
|
|
unsigned long tmp = csr_read(CSR_STATUS);
|
|
|
|
csr_write(CSR_STATUS, (tmp & ~SR_UXL) | SR_UXL_32);
|
|
compat_mode_supported =
|
|
(csr_read(CSR_STATUS) & SR_UXL) == SR_UXL_32;
|
|
|
|
csr_write(CSR_STATUS, tmp);
|
|
|
|
pr_info("riscv: ELF compat mode %s",
|
|
compat_mode_supported ? "supported" : "failed");
|
|
|
|
return 0;
|
|
}
|
|
early_initcall(compat_mode_detect);
|
|
#endif
|
|
|
|
void start_thread(struct pt_regs *regs, unsigned long pc,
|
|
unsigned long sp)
|
|
{
|
|
regs->status = SR_PIE;
|
|
if (has_fpu()) {
|
|
regs->status |= SR_FS_INITIAL;
|
|
/*
|
|
* Restore the initial value to the FP register
|
|
* before starting the user program.
|
|
*/
|
|
fstate_restore(current, regs);
|
|
}
|
|
regs->epc = pc;
|
|
regs->sp = sp;
|
|
|
|
#ifdef CONFIG_64BIT
|
|
regs->status &= ~SR_UXL;
|
|
|
|
if (is_compat_task())
|
|
regs->status |= SR_UXL_32;
|
|
else
|
|
regs->status |= SR_UXL_64;
|
|
#endif
|
|
}
|
|
|
|
void flush_thread(void)
|
|
{
|
|
#ifdef CONFIG_FPU
|
|
/*
|
|
* Reset FPU state and context
|
|
* frm: round to nearest, ties to even (IEEE default)
|
|
* fflags: accrued exceptions cleared
|
|
*/
|
|
fstate_off(current, task_pt_regs(current));
|
|
memset(¤t->thread.fstate, 0, sizeof(current->thread.fstate));
|
|
#endif
|
|
}
|
|
|
|
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
|
{
|
|
fstate_save(src, task_pt_regs(src));
|
|
*dst = *src;
|
|
return 0;
|
|
}
|
|
|
|
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
|
{
|
|
unsigned long clone_flags = args->flags;
|
|
unsigned long usp = args->stack;
|
|
unsigned long tls = args->tls;
|
|
struct pt_regs *childregs = task_pt_regs(p);
|
|
|
|
/* p->thread holds context to be restored by __switch_to() */
|
|
if (unlikely(args->fn)) {
|
|
/* Kernel thread */
|
|
memset(childregs, 0, sizeof(struct pt_regs));
|
|
childregs->gp = gp_in_global;
|
|
/* Supervisor/Machine, irqs on: */
|
|
childregs->status = SR_PP | SR_PIE;
|
|
|
|
p->thread.ra = (unsigned long)ret_from_kernel_thread;
|
|
p->thread.s[0] = (unsigned long)args->fn;
|
|
p->thread.s[1] = (unsigned long)args->fn_arg;
|
|
} else {
|
|
*childregs = *(current_pt_regs());
|
|
if (usp) /* User fork */
|
|
childregs->sp = usp;
|
|
if (clone_flags & CLONE_SETTLS)
|
|
childregs->tp = tls;
|
|
childregs->a0 = 0; /* Return value of fork() */
|
|
p->thread.ra = (unsigned long)ret_from_fork;
|
|
}
|
|
p->thread.sp = (unsigned long)childregs; /* kernel sp */
|
|
return 0;
|
|
}
|