Shaohua Li 45cac65b0f readahead: fault retry breaks mmap file read random detection
.fault now can retry.  The retry can break state machine of .fault.  In
filemap_fault, if page is miss, ra->mmap_miss is increased.  In the second
try, since the page is in page cache now, ra->mmap_miss is decreased.  And
these are done in one fault, so we can't detect random mmap file access.

Add a new flag to indicate .fault is tried once.  In the second try, skip
ra->mmap_miss decreasing.  The filemap_fault state machine is ok with it.

I only tested x86, didn't test other archs, but looks the change for other
archs is obvious, but who knows :)

Signed-off-by: Shaohua Li <shaohua.li@fusionio.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-10-09 16:22:47 +09:00

265 lines
6.1 KiB
C

/*
* Copyright (C) 2004-2006 Atmel Corporation
*
* Based on linux/arch/sh/mm/fault.c:
* Copyright (C) 1999 Niibe Yutaka
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/kdebug.h>
#include <linux/kprobes.h>
#include <asm/mmu_context.h>
#include <asm/sysreg.h>
#include <asm/tlb.h>
#include <asm/uaccess.h>
#ifdef CONFIG_KPROBES
static inline int notify_page_fault(struct pt_regs *regs, int trap)
{
int ret = 0;
if (!user_mode(regs)) {
if (kprobe_running() && kprobe_fault_handler(regs, trap))
ret = 1;
}
return ret;
}
#else
static inline int notify_page_fault(struct pt_regs *regs, int trap)
{
return 0;
}
#endif
int exception_trace = 1;
/*
* This routine handles page faults. It determines the address and the
* problem, and then passes it off to one of the appropriate routines.
*
* ecr is the Exception Cause Register. Possible values are:
* 6: Protection fault (instruction access)
* 15: Protection fault (read access)
* 16: Protection fault (write access)
* 20: Page not found (instruction access)
* 24: Page not found (read access)
* 28: Page not found (write access)
*/
asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
{
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
const struct exception_table_entry *fixup;
unsigned long address;
unsigned long page;
long signr;
int code;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (notify_page_fault(regs, ecr))
return;
address = sysreg_read(TLBEAR);
tsk = current;
mm = tsk->mm;
signr = SIGSEGV;
code = SEGV_MAPERR;
/*
* If we're in an interrupt or have no user context, we must
* not take the fault...
*/
if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
goto no_context;
local_irq_enable();
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (!vma)
goto bad_area;
if (vma->vm_start <= address)
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
if (expand_stack(vma, address))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so we
* can handle it...
*/
good_area:
code = SEGV_ACCERR;
switch (ecr) {
case ECR_PROTECTION_X:
case ECR_TLB_MISS_X:
if (!(vma->vm_flags & VM_EXEC))
goto bad_area;
break;
case ECR_PROTECTION_R:
case ECR_TLB_MISS_R:
if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
goto bad_area;
break;
case ECR_PROTECTION_W:
case ECR_TLB_MISS_W:
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
break;
default:
panic("Unhandled case %lu in do_page_fault!", ecr);
}
/*
* If for any reason at all we couldn't handle the fault, make
* sure we exit gracefully rather than endlessly redo the
* fault.
*/
fault = handle_mm_fault(mm, vma, address, flags);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
return;
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
else if (fault & VM_FAULT_SIGBUS)
goto do_sigbus;
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
if (fault & VM_FAULT_MAJOR)
tsk->maj_flt++;
else
tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would have
* already released it in __lock_page_or_retry() in
* mm/filemap.c.
*/
goto retry;
}
}
up_read(&mm->mmap_sem);
return;
/*
* Something tried to access memory that isn't in our memory
* map. Fix it, but check if it's kernel or user first...
*/
bad_area:
up_read(&mm->mmap_sem);
if (user_mode(regs)) {
if (exception_trace && printk_ratelimit())
printk("%s%s[%d]: segfault at %08lx pc %08lx "
"sp %08lx ecr %lu\n",
is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
tsk->comm, tsk->pid, address, regs->pc,
regs->sp, ecr);
_exception(SIGSEGV, regs, code, address);
return;
}
no_context:
/* Are we prepared to handle this kernel fault? */
fixup = search_exception_tables(regs->pc);
if (fixup) {
regs->pc = fixup->fixup;
return;
}
/*
* Oops. The kernel tried to access some bad page. We'll have
* to terminate things with extreme prejudice.
*/
if (address < PAGE_SIZE)
printk(KERN_ALERT
"Unable to handle kernel NULL pointer dereference");
else
printk(KERN_ALERT
"Unable to handle kernel paging request");
printk(" at virtual address %08lx\n", address);
page = sysreg_read(PTBR);
printk(KERN_ALERT "ptbr = %08lx", page);
if (address >= TASK_SIZE)
page = (unsigned long)swapper_pg_dir;
if (page) {
page = ((unsigned long *)page)[address >> 22];
printk(" pgd = %08lx", page);
if (page & _PAGE_PRESENT) {
page &= PAGE_MASK;
address &= 0x003ff000;
page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT];
printk(" pte = %08lx", page);
}
}
printk("\n");
die("Kernel access of bad area", regs, signr);
return;
/*
* We ran out of memory, or some other thing happened to us
* that made us unable to handle the page fault gracefully.
*/
out_of_memory:
up_read(&mm->mmap_sem);
pagefault_out_of_memory();
if (!user_mode(regs))
goto no_context;
return;
do_sigbus:
up_read(&mm->mmap_sem);
/* Kernel mode? Handle exceptions or die */
signr = SIGBUS;
code = BUS_ADRERR;
if (!user_mode(regs))
goto no_context;
if (exception_trace)
printk("%s%s[%d]: bus error at %08lx pc %08lx "
"sp %08lx ecr %lu\n",
is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
tsk->comm, tsk->pid, address, regs->pc,
regs->sp, ecr);
_exception(SIGBUS, regs, BUS_ADRERR, address);
}
asmlinkage void do_bus_error(unsigned long addr, int write_access,
struct pt_regs *regs)
{
printk(KERN_ALERT
"Bus error at physical address 0x%08lx (%s access)\n",
addr, write_access ? "write" : "read");
printk(KERN_INFO "DTLB dump:\n");
dump_dtlb();
die("Bus Error", regs, SIGKILL);
}