mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-11 00:08:50 +00:00
17666f02b1
Impact: fix crashes that can occur in NMI handlers, if their code is modified Modifying code is something that needs special care. On SMP boxes, if code that is being modified is also being executed on another CPU, that CPU will have undefined results. The dynamic ftrace uses kstop_machine to make the system act like a uniprocessor system. But this does not address NMIs, that can still run on other CPUs. One approach to handle this is to make all code that are used by NMIs not be traced. But NMIs can call notifiers that spread throughout the kernel and this will be very hard to maintain, and the chance of missing a function is very high. The approach that this patch takes is to have the NMIs modify the code if the modification is taking place. The way this works is that just writing to code executing on another CPU is not harmful if what is written is the same as what exists. Two buffers are used: an IP buffer and a "code" buffer. The steps that the patcher takes are: 1) Put in the instruction pointer into the IP buffer and the new code into the "code" buffer. 2) Set a flag that says we are modifying code 3) Wait for any running NMIs to finish. 4) Write the code 5) clear the flag. 6) Wait for any running NMIs to finish. If an NMI is executed, it will also write the pending code. Multiple writes are OK, because what is being written is the same. Then the patcher must wait for all running NMIs to finish before going to the next line that must be patched. This is basically the RCU approach to code modification. Thanks to Ingo Molnar for suggesting the idea, and to Arjan van de Ven for his guidence on what is safe and what is not. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
179 lines
4.6 KiB
C
179 lines
4.6 KiB
C
#ifndef LINUX_HARDIRQ_H
|
|
#define LINUX_HARDIRQ_H
|
|
|
|
#include <linux/preempt.h>
|
|
#include <linux/smp_lock.h>
|
|
#include <linux/lockdep.h>
|
|
#include <asm/hardirq.h>
|
|
#include <asm/ftrace.h>
|
|
#include <asm/system.h>
|
|
|
|
/*
|
|
* We put the hardirq and softirq counter into the preemption
|
|
* counter. The bitmask has the following meaning:
|
|
*
|
|
* - bits 0-7 are the preemption count (max preemption depth: 256)
|
|
* - bits 8-15 are the softirq count (max # of softirqs: 256)
|
|
*
|
|
* The hardirq count can be overridden per architecture, the default is:
|
|
*
|
|
* - bits 16-27 are the hardirq count (max # of hardirqs: 4096)
|
|
* - ( bit 28 is the PREEMPT_ACTIVE flag. )
|
|
*
|
|
* PREEMPT_MASK: 0x000000ff
|
|
* SOFTIRQ_MASK: 0x0000ff00
|
|
* HARDIRQ_MASK: 0x0fff0000
|
|
*/
|
|
#define PREEMPT_BITS 8
|
|
#define SOFTIRQ_BITS 8
|
|
|
|
#ifndef HARDIRQ_BITS
|
|
#define HARDIRQ_BITS 12
|
|
|
|
#ifndef MAX_HARDIRQS_PER_CPU
|
|
#define MAX_HARDIRQS_PER_CPU NR_IRQS
|
|
#endif
|
|
|
|
/*
|
|
* The hardirq mask has to be large enough to have space for potentially
|
|
* all IRQ sources in the system nesting on a single CPU.
|
|
*/
|
|
#if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU
|
|
# error HARDIRQ_BITS is too low!
|
|
#endif
|
|
#endif
|
|
|
|
#define PREEMPT_SHIFT 0
|
|
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
|
|
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
|
|
|
|
#define __IRQ_MASK(x) ((1UL << (x))-1)
|
|
|
|
#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
|
|
#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
|
|
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
|
|
|
|
#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
|
|
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
|
|
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
|
|
|
|
#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS))
|
|
#error PREEMPT_ACTIVE is too low!
|
|
#endif
|
|
|
|
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
|
|
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
|
|
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
|
|
|
|
/*
|
|
* Are we doing bottom half or hardware interrupt processing?
|
|
* Are we in a softirq context? Interrupt context?
|
|
*/
|
|
#define in_irq() (hardirq_count())
|
|
#define in_softirq() (softirq_count())
|
|
#define in_interrupt() (irq_count())
|
|
|
|
#if defined(CONFIG_PREEMPT)
|
|
# define PREEMPT_INATOMIC_BASE kernel_locked()
|
|
# define PREEMPT_CHECK_OFFSET 1
|
|
#else
|
|
# define PREEMPT_INATOMIC_BASE 0
|
|
# define PREEMPT_CHECK_OFFSET 0
|
|
#endif
|
|
|
|
/*
|
|
* Are we running in atomic context? WARNING: this macro cannot
|
|
* always detect atomic context; in particular, it cannot know about
|
|
* held spinlocks in non-preemptible kernels. Thus it should not be
|
|
* used in the general case to determine whether sleeping is possible.
|
|
* Do not use in_atomic() in driver code.
|
|
*/
|
|
#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE)
|
|
|
|
/*
|
|
* Check whether we were atomic before we did preempt_disable():
|
|
* (used by the scheduler, *after* releasing the kernel lock)
|
|
*/
|
|
#define in_atomic_preempt_off() \
|
|
((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
# define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
|
# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
|
|
#else
|
|
# define preemptible() 0
|
|
# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
extern void synchronize_irq(unsigned int irq);
|
|
#else
|
|
# define synchronize_irq(irq) barrier()
|
|
#endif
|
|
|
|
struct task_struct;
|
|
|
|
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
|
static inline void account_system_vtime(struct task_struct *tsk)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ)
|
|
extern void rcu_irq_enter(void);
|
|
extern void rcu_irq_exit(void);
|
|
#else
|
|
# define rcu_irq_enter() do { } while (0)
|
|
# define rcu_irq_exit() do { } while (0)
|
|
#endif /* CONFIG_PREEMPT_RCU */
|
|
|
|
/*
|
|
* It is safe to do non-atomic ops on ->hardirq_context,
|
|
* because NMI handlers may not preempt and the ops are
|
|
* always balanced, so the interrupted value of ->hardirq_context
|
|
* will always be restored.
|
|
*/
|
|
#define __irq_enter() \
|
|
do { \
|
|
rcu_irq_enter(); \
|
|
account_system_vtime(current); \
|
|
add_preempt_count(HARDIRQ_OFFSET); \
|
|
trace_hardirq_enter(); \
|
|
} while (0)
|
|
|
|
/*
|
|
* Enter irq context (on NO_HZ, update jiffies):
|
|
*/
|
|
extern void irq_enter(void);
|
|
|
|
/*
|
|
* Exit irq context without processing softirqs:
|
|
*/
|
|
#define __irq_exit() \
|
|
do { \
|
|
trace_hardirq_exit(); \
|
|
account_system_vtime(current); \
|
|
sub_preempt_count(HARDIRQ_OFFSET); \
|
|
rcu_irq_exit(); \
|
|
} while (0)
|
|
|
|
/*
|
|
* Exit irq context and process softirqs if needed:
|
|
*/
|
|
extern void irq_exit(void);
|
|
|
|
#define nmi_enter() \
|
|
do { \
|
|
ftrace_nmi_enter(); \
|
|
lockdep_off(); \
|
|
__irq_enter(); \
|
|
} while (0)
|
|
#define nmi_exit() \
|
|
do { \
|
|
__irq_exit(); \
|
|
lockdep_on(); \
|
|
ftrace_nmi_exit(); \
|
|
} while (0)
|
|
|
|
#endif /* LINUX_HARDIRQ_H */
|