Stefan O'Rear d14fa1fcf6
riscv: process: Fix kernel gp leakage
childregs represents the registers which are active for the new thread
in user context. For a kernel thread, childregs->gp is never used since
the kernel gp is not touched by switch_to. For a user mode helper, the
gp value can be observed in user space after execve or possibly by other
means.

[From the email thread]

The /* Kernel thread */ comment is somewhat inaccurate in that it is also used
for user_mode_helper threads, which exec a user process, e.g. /sbin/init or
when /proc/sys/kernel/core_pattern is a pipe. Such threads do not have
PF_KTHREAD set and are valid targets for ptrace etc. even before they exec.

childregs is the *user* context during syscall execution and it is observable
from userspace in at least five ways:

1. kernel_execve does not currently clear integer registers, so the starting
   register state for PID 1 and other user processes started by the kernel has
   sp = user stack, gp = kernel __global_pointer$, all other integer registers
   zeroed by the memset in the patch comment.

   This is a bug in its own right, but I'm unwilling to bet that it is the only
   way to exploit the issue addressed by this patch.

2. ptrace(PTRACE_GETREGSET): you can PTRACE_ATTACH to a user_mode_helper thread
   before it execs, but ptrace requires SIGSTOP to be delivered which can only
   happen at user/kernel boundaries.

3. /proc/*/task/*/syscall: this is perfectly happy to read pt_regs for
   user_mode_helpers before the exec completes, but gp is not one of the
   registers it returns.

4. PERF_SAMPLE_REGS_USER: LOCKDOWN_PERF normally prevents access to kernel
   addresses via PERF_SAMPLE_REGS_INTR, but due to this bug kernel addresses
   are also exposed via PERF_SAMPLE_REGS_USER which is permitted under
   LOCKDOWN_PERF. I have not attempted to write exploit code.

5. Much of the tracing infrastructure allows access to user registers. I have
   not attempted to determine which forms of tracing allow access to user
   registers without already allowing access to kernel registers.

Fixes: 7db91e57a0ac ("RISC-V: Task implementation")
Cc: stable@vger.kernel.org
Signed-off-by: Stefan O'Rear <sorear@fastmail.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20240327061258.2370291-1-sorear@fastmail.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2024-04-04 12:35:05 -07:00

236 lines
6.2 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
* Chen Liqin <liqin.chen@sunplusct.com>
* Lennox Wu <lennox.wu@sunplusct.com>
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2017 SiFive
*/
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/tick.h>
#include <linux/ptrace.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/csr.h>
#include <asm/stacktrace.h>
#include <asm/string.h>
#include <asm/switch_to.h>
#include <asm/thread_info.h>
#include <asm/cpuidle.h>
#include <asm/vector.h>
#include <asm/cpufeature.h>
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
extern asmlinkage void ret_from_fork(void);
void noinstr arch_cpu_idle(void)
{
cpu_do_idle();
}
int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
{
if (!unaligned_ctl_available())
return -EINVAL;
tsk->thread.align_ctl = val;
return 0;
}
int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
{
if (!unaligned_ctl_available())
return -EINVAL;
return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr);
}
void __show_regs(struct pt_regs *regs)
{
show_regs_print_info(KERN_DEFAULT);
if (!user_mode(regs)) {
pr_cont("epc : %pS\n", (void *)regs->epc);
pr_cont(" ra : %pS\n", (void *)regs->ra);
}
pr_cont("epc : " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
regs->epc, regs->ra, regs->sp);
pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n",
regs->gp, regs->tp, regs->t0);
pr_cont(" t1 : " REG_FMT " t2 : " REG_FMT " s0 : " REG_FMT "\n",
regs->t1, regs->t2, regs->s0);
pr_cont(" s1 : " REG_FMT " a0 : " REG_FMT " a1 : " REG_FMT "\n",
regs->s1, regs->a0, regs->a1);
pr_cont(" a2 : " REG_FMT " a3 : " REG_FMT " a4 : " REG_FMT "\n",
regs->a2, regs->a3, regs->a4);
pr_cont(" a5 : " REG_FMT " a6 : " REG_FMT " a7 : " REG_FMT "\n",
regs->a5, regs->a6, regs->a7);
pr_cont(" s2 : " REG_FMT " s3 : " REG_FMT " s4 : " REG_FMT "\n",
regs->s2, regs->s3, regs->s4);
pr_cont(" s5 : " REG_FMT " s6 : " REG_FMT " s7 : " REG_FMT "\n",
regs->s5, regs->s6, regs->s7);
pr_cont(" s8 : " REG_FMT " s9 : " REG_FMT " s10: " REG_FMT "\n",
regs->s8, regs->s9, regs->s10);
pr_cont(" s11: " REG_FMT " t3 : " REG_FMT " t4 : " REG_FMT "\n",
regs->s11, regs->t3, regs->t4);
pr_cont(" t5 : " REG_FMT " t6 : " REG_FMT "\n",
regs->t5, regs->t6);
pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n",
regs->status, regs->badaddr, regs->cause);
}
void show_regs(struct pt_regs *regs)
{
__show_regs(regs);
if (!user_mode(regs))
dump_backtrace(regs, NULL, KERN_DEFAULT);
}
#ifdef CONFIG_COMPAT
static bool compat_mode_supported __read_mostly;
bool compat_elf_check_arch(Elf32_Ehdr *hdr)
{
return compat_mode_supported &&
hdr->e_machine == EM_RISCV &&
hdr->e_ident[EI_CLASS] == ELFCLASS32;
}
static int __init compat_mode_detect(void)
{
unsigned long tmp = csr_read(CSR_STATUS);
csr_write(CSR_STATUS, (tmp & ~SR_UXL) | SR_UXL_32);
compat_mode_supported =
(csr_read(CSR_STATUS) & SR_UXL) == SR_UXL_32;
csr_write(CSR_STATUS, tmp);
pr_info("riscv: ELF compat mode %s",
compat_mode_supported ? "supported" : "unsupported");
return 0;
}
early_initcall(compat_mode_detect);
#endif
void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
regs->status = SR_PIE;
if (has_fpu()) {
regs->status |= SR_FS_INITIAL;
/*
* Restore the initial value to the FP register
* before starting the user program.
*/
fstate_restore(current, regs);
}
regs->epc = pc;
regs->sp = sp;
#ifdef CONFIG_64BIT
regs->status &= ~SR_UXL;
if (is_compat_task())
regs->status |= SR_UXL_32;
else
regs->status |= SR_UXL_64;
#endif
}
void flush_thread(void)
{
#ifdef CONFIG_FPU
/*
* Reset FPU state and context
* frm: round to nearest, ties to even (IEEE default)
* fflags: accrued exceptions cleared
*/
fstate_off(current, task_pt_regs(current));
memset(&current->thread.fstate, 0, sizeof(current->thread.fstate));
#endif
#ifdef CONFIG_RISCV_ISA_V
/* Reset vector state */
riscv_v_vstate_ctrl_init(current);
riscv_v_vstate_off(task_pt_regs(current));
kfree(current->thread.vstate.datap);
memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
#endif
}
void arch_release_task_struct(struct task_struct *tsk)
{
/* Free the vector context of datap. */
if (has_vector())
riscv_v_thread_free(tsk);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
fstate_save(src, task_pt_regs(src));
*dst = *src;
/* clear entire V context, including datap for a new task */
memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
return 0;
}
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long clone_flags = args->flags;
unsigned long usp = args->stack;
unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
memset(&p->thread.s, 0, sizeof(p->thread.s));
/* p->thread holds context to be restored by __switch_to() */
if (unlikely(args->fn)) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
/* Supervisor/Machine, irqs on: */
childregs->status = SR_PP | SR_PIE;
p->thread.s[0] = (unsigned long)args->fn;
p->thread.s[1] = (unsigned long)args->fn_arg;
} else {
*childregs = *(current_pt_regs());
/* Turn off status.VS */
riscv_v_vstate_off(childregs);
if (usp) /* User fork */
childregs->sp = usp;
if (clone_flags & CLONE_SETTLS)
childregs->tp = tls;
childregs->a0 = 0; /* Return value of fork() */
p->thread.s[0] = 0;
}
p->thread.riscv_v_flags = 0;
if (has_vector())
riscv_v_thread_alloc(p);
p->thread.ra = (unsigned long)ret_from_fork;
p->thread.sp = (unsigned long)childregs; /* kernel sp */
return 0;
}
void __init arch_task_cache_init(void)
{
riscv_v_setup_ctx_cache();
}