mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 23:39:18 +00:00
9b1d82fa16
This patch is a version of RCU designed for !SMP provided for a small-footprint RCU implementation. In particular, the implementation of synchronize_rcu() is extremely lightweight and high performance. It passes rcutorture testing in each of the four relevant configurations (combinations of NO_HZ and PREEMPT) on x86. This saves about 1K bytes compared to old Classic RCU (which is no longer in mainline), and more than three kilobytes compared to Hierarchical RCU (updated to 2.6.30): CONFIG_TREE_RCU: text data bss dec filename 183 4 0 187 kernel/rcupdate.o 2783 520 36 3339 kernel/rcutree.o 3526 Total (vs 4565 for v7) CONFIG_TREE_PREEMPT_RCU: text data bss dec filename 263 4 0 267 kernel/rcupdate.o 4594 776 52 5422 kernel/rcutree.o 5689 Total (6155 for v7) CONFIG_TINY_RCU: text data bss dec filename 96 4 0 100 kernel/rcupdate.o 734 24 0 758 kernel/rcutiny.o 858 Total (vs 848 for v7) The above is for x86. Your mileage may vary on other platforms. Further compression is possible, but is being procrastinated. Changes from v7 (http://lkml.org/lkml/2009/10/9/388) o Apply Lai Jiangshan's review comments (aside from might_sleep() in synchronize_sched(), which is covered by SMP builds). o Fix up expedited primitives. Changes from v6 (http://lkml.org/lkml/2009/9/23/293). o Forward ported to put it into the 2.6.33 stream. o Added lockdep support. o Make lightweight rcu_barrier. Changes from v5 (http://lkml.org/lkml/2009/6/23/12). o Ported to latest pre-2.6.32 merge window kernel. - Renamed rcu_qsctr_inc() to rcu_sched_qs(). - Renamed rcu_bh_qsctr_inc() to rcu_bh_qs(). - Provided trivial rcu_cpu_notify(). - Provided trivial exit_rcu(). - Provided trivial rcu_needs_cpu(). - Fixed up the rcu_*_enter/exit() functions in linux/hardirq.h. o Removed the dependence on EMBEDDED, with a view to making TINY_RCU default for !SMP at some time in the future. o Added (trivial) support for expedited grace periods. Changes from v4 (http://lkml.org/lkml/2009/5/2/91) include: o Squeeze the size down a bit further by removing the ->completed field from struct rcu_ctrlblk. o This permits synchronize_rcu() to become the empty function. Previous concerns about rcutorture were unfounded, as rcutorture correctly handles a constant value from rcu_batches_completed() and rcu_batches_completed_bh(). Changes from v3 (http://lkml.org/lkml/2009/3/29/221) include: o Changed rcu_batches_completed(), rcu_batches_completed_bh() rcu_enter_nohz(), rcu_exit_nohz(), rcu_nmi_enter(), and rcu_nmi_exit(), to be static inlines, as suggested by David Howells. Doing this saves about 100 bytes from rcutiny.o. (The numbers between v3 and this v4 of the patch are not directly comparable, since they are against different versions of Linux.) Changes from v2 (http://lkml.org/lkml/2009/2/3/333) include: o Fix whitespace issues. o Change short-circuit "||" operator to instead be "+" in order to fix performance bug noted by "kraai" on LWN. (http://lwn.net/Articles/324348/) Changes from v1 (http://lkml.org/lkml/2009/1/13/440) include: o This version depends on EMBEDDED as well as !SMP, as suggested by Ingo. o Updated rcu_needs_cpu() to unconditionally return zero, permitting the CPU to enter dynticks-idle mode at any time. This works because callbacks can be invoked upon entry to dynticks-idle mode. o Paul is now OK with this being included, based on a poll at the Kernel Miniconf at linux.conf.au, where about ten people said that they cared about saving 900 bytes on single-CPU systems. o Applies to both mainline and tip/core/rcu. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Acked-by: David Howells <dhowells@redhat.com> Acked-by: Josh Triplett <josh@joshtriplett.org> Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: avi@redhat.com Cc: mtosatti@redhat.com LKML-Reference: <12565226351355-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
231 lines
5.9 KiB
C
231 lines
5.9 KiB
C
#ifndef LINUX_HARDIRQ_H
|
|
#define LINUX_HARDIRQ_H
|
|
|
|
#include <linux/preempt.h>
|
|
#ifdef CONFIG_PREEMPT
|
|
#include <linux/smp_lock.h>
|
|
#endif
|
|
#include <linux/lockdep.h>
|
|
#include <linux/ftrace_irq.h>
|
|
#include <asm/hardirq.h>
|
|
#include <asm/system.h>
|
|
|
|
/*
|
|
* We put the hardirq and softirq counter into the preemption
|
|
* counter. The bitmask has the following meaning:
|
|
*
|
|
* - bits 0-7 are the preemption count (max preemption depth: 256)
|
|
* - bits 8-15 are the softirq count (max # of softirqs: 256)
|
|
*
|
|
* The hardirq count can in theory reach the same as NR_IRQS.
|
|
* In reality, the number of nested IRQS is limited to the stack
|
|
* size as well. For archs with over 1000 IRQS it is not practical
|
|
* to expect that they will all nest. We give a max of 10 bits for
|
|
* hardirq nesting. An arch may choose to give less than 10 bits.
|
|
* m68k expects it to be 8.
|
|
*
|
|
* - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
|
|
* - bit 26 is the NMI_MASK
|
|
* - bit 28 is the PREEMPT_ACTIVE flag
|
|
*
|
|
* PREEMPT_MASK: 0x000000ff
|
|
* SOFTIRQ_MASK: 0x0000ff00
|
|
* HARDIRQ_MASK: 0x03ff0000
|
|
* NMI_MASK: 0x04000000
|
|
*/
|
|
#define PREEMPT_BITS 8
|
|
#define SOFTIRQ_BITS 8
|
|
#define NMI_BITS 1
|
|
|
|
#define MAX_HARDIRQ_BITS 10
|
|
|
|
#ifndef HARDIRQ_BITS
|
|
# define HARDIRQ_BITS MAX_HARDIRQ_BITS
|
|
#endif
|
|
|
|
#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
|
|
#error HARDIRQ_BITS too high!
|
|
#endif
|
|
|
|
#define PREEMPT_SHIFT 0
|
|
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
|
|
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
|
|
#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
|
|
|
|
#define __IRQ_MASK(x) ((1UL << (x))-1)
|
|
|
|
#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
|
|
#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
|
|
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
|
|
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
|
|
|
|
#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
|
|
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
|
|
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
|
|
#define NMI_OFFSET (1UL << NMI_SHIFT)
|
|
|
|
#ifndef PREEMPT_ACTIVE
|
|
#define PREEMPT_ACTIVE_BITS 1
|
|
#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
|
|
#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
|
|
#endif
|
|
|
|
#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
|
|
#error PREEMPT_ACTIVE is too low!
|
|
#endif
|
|
|
|
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
|
|
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
|
|
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
|
|
| NMI_MASK))
|
|
|
|
/*
|
|
* Are we doing bottom half or hardware interrupt processing?
|
|
* Are we in a softirq context? Interrupt context?
|
|
*/
|
|
#define in_irq() (hardirq_count())
|
|
#define in_softirq() (softirq_count())
|
|
#define in_interrupt() (irq_count())
|
|
|
|
/*
|
|
* Are we in NMI context?
|
|
*/
|
|
#define in_nmi() (preempt_count() & NMI_MASK)
|
|
|
|
#if defined(CONFIG_PREEMPT)
|
|
# define PREEMPT_INATOMIC_BASE kernel_locked()
|
|
# define PREEMPT_CHECK_OFFSET 1
|
|
#else
|
|
# define PREEMPT_INATOMIC_BASE 0
|
|
# define PREEMPT_CHECK_OFFSET 0
|
|
#endif
|
|
|
|
/*
|
|
* Are we running in atomic context? WARNING: this macro cannot
|
|
* always detect atomic context; in particular, it cannot know about
|
|
* held spinlocks in non-preemptible kernels. Thus it should not be
|
|
* used in the general case to determine whether sleeping is possible.
|
|
* Do not use in_atomic() in driver code.
|
|
*/
|
|
#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE)
|
|
|
|
/*
|
|
* Check whether we were atomic before we did preempt_disable():
|
|
* (used by the scheduler, *after* releasing the kernel lock)
|
|
*/
|
|
#define in_atomic_preempt_off() \
|
|
((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
# define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
|
# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
|
|
#else
|
|
# define preemptible() 0
|
|
# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
|
|
#endif
|
|
|
|
#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
|
|
extern void synchronize_irq(unsigned int irq);
|
|
#else
|
|
# define synchronize_irq(irq) barrier()
|
|
#endif
|
|
|
|
struct task_struct;
|
|
|
|
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
|
static inline void account_system_vtime(struct task_struct *tsk)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#if defined(CONFIG_NO_HZ)
|
|
#if defined(CONFIG_TINY_RCU)
|
|
extern void rcu_enter_nohz(void);
|
|
extern void rcu_exit_nohz(void);
|
|
|
|
static inline void rcu_irq_enter(void)
|
|
{
|
|
rcu_exit_nohz();
|
|
}
|
|
|
|
static inline void rcu_irq_exit(void)
|
|
{
|
|
rcu_enter_nohz();
|
|
}
|
|
|
|
static inline void rcu_nmi_enter(void)
|
|
{
|
|
}
|
|
|
|
static inline void rcu_nmi_exit(void)
|
|
{
|
|
}
|
|
|
|
#else
|
|
extern void rcu_irq_enter(void);
|
|
extern void rcu_irq_exit(void);
|
|
extern void rcu_nmi_enter(void);
|
|
extern void rcu_nmi_exit(void);
|
|
#endif
|
|
#else
|
|
# define rcu_irq_enter() do { } while (0)
|
|
# define rcu_irq_exit() do { } while (0)
|
|
# define rcu_nmi_enter() do { } while (0)
|
|
# define rcu_nmi_exit() do { } while (0)
|
|
#endif /* #if defined(CONFIG_NO_HZ) */
|
|
|
|
/*
|
|
* It is safe to do non-atomic ops on ->hardirq_context,
|
|
* because NMI handlers may not preempt and the ops are
|
|
* always balanced, so the interrupted value of ->hardirq_context
|
|
* will always be restored.
|
|
*/
|
|
#define __irq_enter() \
|
|
do { \
|
|
account_system_vtime(current); \
|
|
add_preempt_count(HARDIRQ_OFFSET); \
|
|
trace_hardirq_enter(); \
|
|
} while (0)
|
|
|
|
/*
|
|
* Enter irq context (on NO_HZ, update jiffies):
|
|
*/
|
|
extern void irq_enter(void);
|
|
|
|
/*
|
|
* Exit irq context without processing softirqs:
|
|
*/
|
|
#define __irq_exit() \
|
|
do { \
|
|
trace_hardirq_exit(); \
|
|
account_system_vtime(current); \
|
|
sub_preempt_count(HARDIRQ_OFFSET); \
|
|
} while (0)
|
|
|
|
/*
|
|
* Exit irq context and process softirqs if needed:
|
|
*/
|
|
extern void irq_exit(void);
|
|
|
|
#define nmi_enter() \
|
|
do { \
|
|
ftrace_nmi_enter(); \
|
|
BUG_ON(in_nmi()); \
|
|
add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
|
|
lockdep_off(); \
|
|
rcu_nmi_enter(); \
|
|
trace_hardirq_enter(); \
|
|
} while (0)
|
|
|
|
#define nmi_exit() \
|
|
do { \
|
|
trace_hardirq_exit(); \
|
|
rcu_nmi_exit(); \
|
|
lockdep_on(); \
|
|
BUG_ON(!in_nmi()); \
|
|
sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
|
|
ftrace_nmi_exit(); \
|
|
} while (0)
|
|
|
|
#endif /* LINUX_HARDIRQ_H */
|