Jason Baron 909af768e8 coredump: remove VM_ALWAYSDUMP flag
The motivation for this patchset was that I was looking at a way for a
qemu-kvm process, to exclude the guest memory from its core dump, which
can be quite large.  There are already a number of filter flags in
/proc/<pid>/coredump_filter, however, these allow one to specify 'types'
of kernel memory, not specific address ranges (which is needed in this
case).

Since there are no more vma flags available, the first patch eliminates
the need for the 'VM_ALWAYSDUMP' flag.  The flag is used internally by
the kernel to mark vdso and vsyscall pages.  However, it is simple
enough to check if a vma covers a vdso or vsyscall page without the need
for this flag.

The second patch then replaces the 'VM_ALWAYSDUMP' flag with a new
'VM_NODUMP' flag, which can be set by userspace using new madvise flags:
'MADV_DONTDUMP', and unset via 'MADV_DODUMP'.  The core dump filters
continue to work the same as before unless 'MADV_DONTDUMP' is set on the
region.

The qemu code which implements this features is at:

  http://people.redhat.com/~jbaron/qemu-dump/qemu-dump.patch

In my testing the qemu core dump shrunk from 383MB -> 13MB with this
patch.

I also believe that the 'MADV_DONTDUMP' flag might be useful for
security sensitive apps, which might want to select which areas are
dumped.

This patch:

The VM_ALWAYSDUMP flag is currently used by the coredump code to
indicate that a vma is part of a vsyscall or vdso section.  However, we
can determine if a vma is in one these sections by checking it against
the gate_vma and checking for a non-NULL return value from
arch_vma_name().  Thus, freeing a valuable vma bit.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Acked-by: Roland McGrath <roland@hack.frob.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-03-23 16:58:42 -07:00

545 lines
13 KiB
C

/*
* linux/arch/arm/kernel/process.c
*
* Copyright (C) 1996-2000 Russell King - Converted to ARM.
* Original Copyright (C) 1995 Linus Torvalds
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <stdarg.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/user.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/elfcore.h>
#include <linux/pm.h>
#include <linux/tick.h>
#include <linux/utsname.h>
#include <linux/uaccess.h>
#include <linux/random.h>
#include <linux/hw_breakpoint.h>
#include <linux/cpuidle.h>
#include <asm/cacheflush.h>
#include <asm/leds.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/thread_notify.h>
#include <asm/stacktrace.h>
#include <asm/mach/time.h>
#ifdef CONFIG_CC_STACKPROTECTOR
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
static const char *processor_modes[] = {
"USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" ,
"UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26",
"USER_32", "FIQ_32" , "IRQ_32" , "SVC_32" , "UK4_32" , "UK5_32" , "UK6_32" , "ABT_32" ,
"UK8_32" , "UK9_32" , "UK10_32", "UND_32" , "UK12_32", "UK13_32", "UK14_32", "SYS_32"
};
static const char *isa_modes[] = {
"ARM" , "Thumb" , "Jazelle", "ThumbEE"
};
extern void setup_mm_for_reboot(void);
static volatile int hlt_counter;
#include <mach/system.h>
void disable_hlt(void)
{
hlt_counter++;
}
EXPORT_SYMBOL(disable_hlt);
void enable_hlt(void)
{
hlt_counter--;
}
EXPORT_SYMBOL(enable_hlt);
static int __init nohlt_setup(char *__unused)
{
hlt_counter = 1;
return 1;
}
static int __init hlt_setup(char *__unused)
{
hlt_counter = 0;
return 1;
}
__setup("nohlt", nohlt_setup);
__setup("hlt", hlt_setup);
extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
typedef void (*phys_reset_t)(unsigned long);
/*
* A temporary stack to use for CPU reset. This is static so that we
* don't clobber it with the identity mapping. When running with this
* stack, any references to the current task *will not work* so you
* should really do as little as possible before jumping to your reset
* code.
*/
static u64 soft_restart_stack[16];
static void __soft_restart(void *addr)
{
phys_reset_t phys_reset;
/* Take out a flat memory mapping. */
setup_mm_for_reboot();
/* Clean and invalidate caches */
flush_cache_all();
/* Turn off caching */
cpu_proc_fin();
/* Push out any further dirty data, and ensure cache is empty */
flush_cache_all();
/* Switch to the identity mapping. */
phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
phys_reset((unsigned long)addr);
/* Should never get here. */
BUG();
}
void soft_restart(unsigned long addr)
{
u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack);
/* Disable interrupts first */
local_irq_disable();
local_fiq_disable();
/* Disable the L2 if we're the last man standing. */
if (num_online_cpus() == 1)
outer_disable();
/* Change to the new stack and continue with the reset. */
call_with_stack(__soft_restart, (void *)addr, (void *)stack);
/* Should never get here. */
BUG();
}
static void null_restart(char mode, const char *cmd)
{
}
/*
* Function pointers to optional machine specific functions
*/
void (*pm_power_off)(void);
EXPORT_SYMBOL(pm_power_off);
void (*arm_pm_restart)(char str, const char *cmd) = null_restart;
EXPORT_SYMBOL_GPL(arm_pm_restart);
static void do_nothing(void *unused)
{
}
/*
* cpu_idle_wait - Used to ensure that all the CPUs discard old value of
* pm_idle and update to new pm_idle value. Required while changing pm_idle
* handler on SMP systems.
*
* Caller must have changed pm_idle to the new value before the call. Old
* pm_idle value will not be used by any CPU after the return of this function.
*/
void cpu_idle_wait(void)
{
smp_mb();
/* kick all the CPUs so that they exit out of pm_idle */
smp_call_function(do_nothing, NULL, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
/*
* This is our default idle handler. We need to disable
* interrupts here to ensure we don't miss a wakeup call.
*/
static void default_idle(void)
{
if (!need_resched())
arch_idle();
local_irq_enable();
}
void (*pm_idle)(void) = default_idle;
EXPORT_SYMBOL(pm_idle);
/*
* The idle thread, has rather strange semantics for calling pm_idle,
* but this is what x86 does and we need to do the same, so that
* things like cpuidle get called in the same way. The only difference
* is that we always respect 'hlt_counter' to prevent low power idle.
*/
void cpu_idle(void)
{
local_fiq_enable();
/* endless idle loop with no priority at all */
while (1) {
tick_nohz_idle_enter();
rcu_idle_enter();
leds_event(led_idle_start);
while (!need_resched()) {
#ifdef CONFIG_HOTPLUG_CPU
if (cpu_is_offline(smp_processor_id()))
cpu_die();
#endif
local_irq_disable();
#ifdef CONFIG_PL310_ERRATA_769419
wmb();
#endif
if (hlt_counter) {
local_irq_enable();
cpu_relax();
} else {
stop_critical_timings();
if (cpuidle_idle_call())
pm_idle();
start_critical_timings();
/*
* This will eventually be removed - pm_idle
* functions should always return with IRQs
* enabled.
*/
WARN_ON(irqs_disabled());
local_irq_enable();
}
}
leds_event(led_idle_end);
rcu_idle_exit();
tick_nohz_idle_exit();
schedule_preempt_disabled();
}
}
static char reboot_mode = 'h';
int __init reboot_setup(char *str)
{
reboot_mode = str[0];
return 1;
}
__setup("reboot=", reboot_setup);
void machine_shutdown(void)
{
#ifdef CONFIG_SMP
smp_send_stop();
#endif
}
void machine_halt(void)
{
machine_shutdown();
while (1);
}
void machine_power_off(void)
{
machine_shutdown();
if (pm_power_off)
pm_power_off();
}
void machine_restart(char *cmd)
{
machine_shutdown();
arm_pm_restart(reboot_mode, cmd);
/* Give a grace period for failure to restart of 1s */
mdelay(1000);
/* Whoops - the platform was unable to reboot. Tell the user! */
printk("Reboot failed -- System halted\n");
while (1);
}
void __show_regs(struct pt_regs *regs)
{
unsigned long flags;
char buf[64];
printk("CPU: %d %s (%s %.*s)\n",
raw_smp_processor_id(), print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
print_symbol("PC is at %s\n", instruction_pointer(regs));
print_symbol("LR is at %s\n", regs->ARM_lr);
printk("pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n"
"sp : %08lx ip : %08lx fp : %08lx\n",
regs->ARM_pc, regs->ARM_lr, regs->ARM_cpsr,
regs->ARM_sp, regs->ARM_ip, regs->ARM_fp);
printk("r10: %08lx r9 : %08lx r8 : %08lx\n",
regs->ARM_r10, regs->ARM_r9,
regs->ARM_r8);
printk("r7 : %08lx r6 : %08lx r5 : %08lx r4 : %08lx\n",
regs->ARM_r7, regs->ARM_r6,
regs->ARM_r5, regs->ARM_r4);
printk("r3 : %08lx r2 : %08lx r1 : %08lx r0 : %08lx\n",
regs->ARM_r3, regs->ARM_r2,
regs->ARM_r1, regs->ARM_r0);
flags = regs->ARM_cpsr;
buf[0] = flags & PSR_N_BIT ? 'N' : 'n';
buf[1] = flags & PSR_Z_BIT ? 'Z' : 'z';
buf[2] = flags & PSR_C_BIT ? 'C' : 'c';
buf[3] = flags & PSR_V_BIT ? 'V' : 'v';
buf[4] = '\0';
printk("Flags: %s IRQs o%s FIQs o%s Mode %s ISA %s Segment %s\n",
buf, interrupts_enabled(regs) ? "n" : "ff",
fast_interrupts_enabled(regs) ? "n" : "ff",
processor_modes[processor_mode(regs)],
isa_modes[isa_mode(regs)],
get_fs() == get_ds() ? "kernel" : "user");
#ifdef CONFIG_CPU_CP15
{
unsigned int ctrl;
buf[0] = '\0';
#ifdef CONFIG_CPU_CP15_MMU
{
unsigned int transbase, dac;
asm("mrc p15, 0, %0, c2, c0\n\t"
"mrc p15, 0, %1, c3, c0\n"
: "=r" (transbase), "=r" (dac));
snprintf(buf, sizeof(buf), " Table: %08x DAC: %08x",
transbase, dac);
}
#endif
asm("mrc p15, 0, %0, c1, c0\n" : "=r" (ctrl));
printk("Control: %08x%s\n", ctrl, buf);
}
#endif
}
void show_regs(struct pt_regs * regs)
{
printk("\n");
printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
__show_regs(regs);
dump_stack();
}
ATOMIC_NOTIFIER_HEAD(thread_notify_head);
EXPORT_SYMBOL_GPL(thread_notify_head);
/*
* Free current thread data structures etc..
*/
void exit_thread(void)
{
thread_notify(THREAD_NOTIFY_EXIT, current_thread_info());
}
void flush_thread(void)
{
struct thread_info *thread = current_thread_info();
struct task_struct *tsk = current;
flush_ptrace_hw_breakpoint(tsk);
memset(thread->used_cp, 0, sizeof(thread->used_cp));
memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
memset(&thread->fpstate, 0, sizeof(union fp_state));
thread_notify(THREAD_NOTIFY_FLUSH, thread);
}
void release_thread(struct task_struct *dead_task)
{
}
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
int
copy_thread(unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
{
struct thread_info *thread = task_thread_info(p);
struct pt_regs *childregs = task_pt_regs(p);
*childregs = *regs;
childregs->ARM_r0 = 0;
childregs->ARM_sp = stack_start;
memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
thread->cpu_context.sp = (unsigned long)childregs;
thread->cpu_context.pc = (unsigned long)ret_from_fork;
clear_ptrace_hw_breakpoint(p);
if (clone_flags & CLONE_SETTLS)
thread->tp_value = regs->ARM_r3;
thread_notify(THREAD_NOTIFY_COPY, thread);
return 0;
}
/*
* Fill in the task's elfregs structure for a core dump.
*/
int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs)
{
elf_core_copy_regs(elfregs, task_pt_regs(t));
return 1;
}
/*
* fill in the fpe structure for a core dump...
*/
int dump_fpu (struct pt_regs *regs, struct user_fp *fp)
{
struct thread_info *thread = current_thread_info();
int used_math = thread->used_cp[1] | thread->used_cp[2];
if (used_math)
memcpy(fp, &thread->fpstate.soft, sizeof (*fp));
return used_math != 0;
}
EXPORT_SYMBOL(dump_fpu);
/*
* Shuffle the argument into the correct register before calling the
* thread function. r4 is the thread argument, r5 is the pointer to
* the thread function, and r6 points to the exit function.
*/
extern void kernel_thread_helper(void);
asm( ".pushsection .text\n"
" .align\n"
" .type kernel_thread_helper, #function\n"
"kernel_thread_helper:\n"
#ifdef CONFIG_TRACE_IRQFLAGS
" bl trace_hardirqs_on\n"
#endif
" msr cpsr_c, r7\n"
" mov r0, r4\n"
" mov lr, r6\n"
" mov pc, r5\n"
" .size kernel_thread_helper, . - kernel_thread_helper\n"
" .popsection");
#ifdef CONFIG_ARM_UNWIND
extern void kernel_thread_exit(long code);
asm( ".pushsection .text\n"
" .align\n"
" .type kernel_thread_exit, #function\n"
"kernel_thread_exit:\n"
" .fnstart\n"
" .cantunwind\n"
" bl do_exit\n"
" nop\n"
" .fnend\n"
" .size kernel_thread_exit, . - kernel_thread_exit\n"
" .popsection");
#else
#define kernel_thread_exit do_exit
#endif
/*
* Create a kernel thread.
*/
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
struct pt_regs regs;
memset(&regs, 0, sizeof(regs));
regs.ARM_r4 = (unsigned long)arg;
regs.ARM_r5 = (unsigned long)fn;
regs.ARM_r6 = (unsigned long)kernel_thread_exit;
regs.ARM_r7 = SVC_MODE | PSR_ENDSTATE | PSR_ISETSTATE;
regs.ARM_pc = (unsigned long)kernel_thread_helper;
regs.ARM_cpsr = regs.ARM_r7 | PSR_I_BIT;
return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
}
EXPORT_SYMBOL(kernel_thread);
unsigned long get_wchan(struct task_struct *p)
{
struct stackframe frame;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
frame.fp = thread_saved_fp(p);
frame.sp = thread_saved_sp(p);
frame.lr = 0; /* recovered from the stack */
frame.pc = thread_saved_pc(p);
do {
int ret = unwind_frame(&frame);
if (ret < 0)
return 0;
if (!in_sched_functions(frame.pc))
return frame.pc;
} while (count ++ < 16);
return 0;
}
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
unsigned long range_end = mm->brk + 0x02000000;
return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}
#ifdef CONFIG_MMU
/*
* The vectors page is always readable from user space for the
* atomic helpers and the signal restart code. Let's declare a mapping
* for it so it is visible through ptrace and /proc/<pid>/mem.
*/
int vectors_user_mapping(void)
{
struct mm_struct *mm = current->mm;
return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYEXEC | VM_RESERVED,
NULL);
}
const char *arch_vma_name(struct vm_area_struct *vma)
{
return (vma->vm_start == 0xffff0000) ? "[vectors]" : NULL;
}
#endif