mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-08 14:13:53 +00:00
70ffdb9393
Introduce faulthandler_disabled() and use it to check for irq context and disabled pagefaults (via pagefault_disable()) in the pagefault handlers. Please note that we keep the in_atomic() checks in place - to detect whether in irq context (in which case preemption is always properly disabled). In contrast, preempt_disable() should never be used to disable pagefaults. With !CONFIG_PREEMPT_COUNT, preempt_disable() doesn't modify the preempt counter, and therefore the result of in_atomic() differs. We validate that condition by using might_fault() checks when calling might_sleep(). Therefore, add a comment to faulthandler_disabled(), describing why this is needed. faulthandler_disabled() and pagefault_disable() are defined in linux/uaccess.h, so let's properly add that include to all relevant files. This patch is based on a patch from Thomas Gleixner. Reviewed-and-tested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-7-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
145 lines
4.4 KiB
C
145 lines
4.4 KiB
C
#ifndef __LINUX_UACCESS_H__
|
|
#define __LINUX_UACCESS_H__
|
|
|
|
#include <linux/preempt.h>
|
|
#include <linux/sched.h>
|
|
#include <asm/uaccess.h>
|
|
|
|
static __always_inline void pagefault_disabled_inc(void)
|
|
{
|
|
current->pagefault_disabled++;
|
|
}
|
|
|
|
static __always_inline void pagefault_disabled_dec(void)
|
|
{
|
|
current->pagefault_disabled--;
|
|
WARN_ON(current->pagefault_disabled < 0);
|
|
}
|
|
|
|
/*
|
|
* These routines enable/disable the pagefault handler. If disabled, it will
|
|
* not take any locks and go straight to the fixup table.
|
|
*
|
|
* We increase the preempt and the pagefault count, to be able to distinguish
|
|
* whether we run in simple atomic context or in a real pagefault_disable()
|
|
* context.
|
|
*
|
|
* For now, after pagefault_disabled() has been called, we run in atomic
|
|
* context. User access methods will not sleep.
|
|
*
|
|
*/
|
|
static inline void pagefault_disable(void)
|
|
{
|
|
preempt_count_inc();
|
|
pagefault_disabled_inc();
|
|
/*
|
|
* make sure to have issued the store before a pagefault
|
|
* can hit.
|
|
*/
|
|
barrier();
|
|
}
|
|
|
|
static inline void pagefault_enable(void)
|
|
{
|
|
/*
|
|
* make sure to issue those last loads/stores before enabling
|
|
* the pagefault handler again.
|
|
*/
|
|
barrier();
|
|
pagefault_disabled_dec();
|
|
#ifndef CONFIG_PREEMPT
|
|
preempt_count_dec();
|
|
#else
|
|
preempt_enable();
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Is the pagefault handler disabled? If so, user access methods will not sleep.
|
|
*/
|
|
#define pagefault_disabled() (current->pagefault_disabled != 0)
|
|
|
|
/*
|
|
* The pagefault handler is in general disabled by pagefault_disable() or
|
|
* when in irq context (via in_atomic()).
|
|
*
|
|
* This function should only be used by the fault handlers. Other users should
|
|
* stick to pagefault_disabled().
|
|
* Please NEVER use preempt_disable() to disable the fault handler. With
|
|
* !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled.
|
|
* in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT.
|
|
*/
|
|
#define faulthandler_disabled() (pagefault_disabled() || in_atomic())
|
|
|
|
#ifndef ARCH_HAS_NOCACHE_UACCESS
|
|
|
|
static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
|
|
const void __user *from, unsigned long n)
|
|
{
|
|
return __copy_from_user_inatomic(to, from, n);
|
|
}
|
|
|
|
static inline unsigned long __copy_from_user_nocache(void *to,
|
|
const void __user *from, unsigned long n)
|
|
{
|
|
return __copy_from_user(to, from, n);
|
|
}
|
|
|
|
#endif /* ARCH_HAS_NOCACHE_UACCESS */
|
|
|
|
/**
|
|
* probe_kernel_address(): safely attempt to read from a location
|
|
* @addr: address to read from - its type is type typeof(retval)*
|
|
* @retval: read into this variable
|
|
*
|
|
* Safely read from address @addr into variable @revtal. If a kernel fault
|
|
* happens, handle that and return -EFAULT.
|
|
* We ensure that the __get_user() is executed in atomic context so that
|
|
* do_page_fault() doesn't attempt to take mmap_sem. This makes
|
|
* probe_kernel_address() suitable for use within regions where the caller
|
|
* already holds mmap_sem, or other locks which nest inside mmap_sem.
|
|
* This must be a macro because __get_user() needs to know the types of the
|
|
* args.
|
|
*
|
|
* We don't include enough header files to be able to do the set_fs(). We
|
|
* require that the probe_kernel_address() caller will do that.
|
|
*/
|
|
#define probe_kernel_address(addr, retval) \
|
|
({ \
|
|
long ret; \
|
|
mm_segment_t old_fs = get_fs(); \
|
|
\
|
|
set_fs(KERNEL_DS); \
|
|
pagefault_disable(); \
|
|
ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \
|
|
pagefault_enable(); \
|
|
set_fs(old_fs); \
|
|
ret; \
|
|
})
|
|
|
|
/*
|
|
* probe_kernel_read(): safely attempt to read from a location
|
|
* @dst: pointer to the buffer that shall take the data
|
|
* @src: address to read from
|
|
* @size: size of the data chunk
|
|
*
|
|
* Safely read from address @src to the buffer at @dst. If a kernel fault
|
|
* happens, handle that and return -EFAULT.
|
|
*/
|
|
extern long probe_kernel_read(void *dst, const void *src, size_t size);
|
|
extern long __probe_kernel_read(void *dst, const void *src, size_t size);
|
|
|
|
/*
|
|
* probe_kernel_write(): safely attempt to write to a location
|
|
* @dst: address to write to
|
|
* @src: pointer to the data that shall be written
|
|
* @size: size of the data chunk
|
|
*
|
|
* Safely write to address @dst from the buffer at @src. If a kernel fault
|
|
* happens, handle that and return -EFAULT.
|
|
*/
|
|
extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
|
|
extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
|
|
|
|
#endif /* __LINUX_UACCESS_H__ */
|