timers: Keep the pinned timers separate from the others

Separate the storage space for pinned timers. Deferrable timers (doesn't
matter if pinned or non pinned) are still enqueued into their own base.

This is preparatory work for changing the NOHZ timer placement from a push
at enqueue time to a pull at expiry time model.

Originally-by: Richard Cochran (linutronix GmbH) <richardcochran@gmail.com>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/r/20240221090548.36600-11-anna-maria@linutronix.de
This commit is contained in:
Anna-Maria Behnsen 2024-02-21 10:05:38 +01:00 committed by Thomas Gleixner
parent 9f6a3c602c
commit 83a665dc99

View File

@ -187,12 +187,18 @@ EXPORT_SYMBOL(jiffies_64);
#define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH) #define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH)
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
# define NR_BASES 2 /*
# define BASE_STD 0 * If multiple bases need to be locked, use the base ordering for lock
# define BASE_DEF 1 * nesting, i.e. lowest number first.
*/
# define NR_BASES 3
# define BASE_LOCAL 0
# define BASE_GLOBAL 1
# define BASE_DEF 2
#else #else
# define NR_BASES 1 # define NR_BASES 1
# define BASE_STD 0 # define BASE_LOCAL 0
# define BASE_GLOBAL 0
# define BASE_DEF 0 # define BASE_DEF 0
#endif #endif
@ -944,7 +950,10 @@ static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
{ {
struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); int index = tflags & TIMER_PINNED ? BASE_LOCAL : BASE_GLOBAL;
struct timer_base *base;
base = per_cpu_ptr(&timer_bases[index], cpu);
/* /*
* If the timer is deferrable and NO_HZ_COMMON is set then we need * If the timer is deferrable and NO_HZ_COMMON is set then we need
@ -957,7 +966,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
{ {
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); int index = tflags & TIMER_PINNED ? BASE_LOCAL : BASE_GLOBAL;
struct timer_base *base;
base = this_cpu_ptr(&timer_bases[index]);
/* /*
* If the timer is deferrable and NO_HZ_COMMON is set then we need * If the timer is deferrable and NO_HZ_COMMON is set then we need
@ -2006,6 +2018,9 @@ static unsigned long next_timer_interrupt(struct timer_base *base,
* Move next_expiry for the empty base into the future to prevent an * Move next_expiry for the empty base into the future to prevent an
* unnecessary raise of the timer softirq when the next_expiry value * unnecessary raise of the timer softirq when the next_expiry value
* will be reached even if there is no timer pending. * will be reached even if there is no timer pending.
*
* This update is also required to make timer_base::next_expiry values
* easy comparable to find out which base holds the first pending timer.
*/ */
if (!base->timers_pending) if (!base->timers_pending)
base->next_expiry = basej + NEXT_TIMER_MAX_DELTA; base->next_expiry = basej + NEXT_TIMER_MAX_DELTA;
@ -2016,9 +2031,10 @@ static unsigned long next_timer_interrupt(struct timer_base *base,
static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem, static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
bool *idle) bool *idle)
{ {
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); unsigned long nextevt, nextevt_local, nextevt_global;
struct timer_base *base_local, *base_global;
u64 expires = KTIME_MAX; u64 expires = KTIME_MAX;
unsigned long nextevt; bool local_first;
/* /*
* Pretend that there is no timer pending if the cpu is offline. * Pretend that there is no timer pending if the cpu is offline.
@ -2030,10 +2046,20 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
return expires; return expires;
} }
raw_spin_lock(&base->lock); base_local = this_cpu_ptr(&timer_bases[BASE_LOCAL]);
nextevt = next_timer_interrupt(base, basej); base_global = this_cpu_ptr(&timer_bases[BASE_GLOBAL]);
if (base->timers_pending) { raw_spin_lock(&base_local->lock);
raw_spin_lock_nested(&base_global->lock, SINGLE_DEPTH_NESTING);
nextevt_local = next_timer_interrupt(base_local, basej);
nextevt_global = next_timer_interrupt(base_global, basej);
local_first = time_before_eq(nextevt_local, nextevt_global);
nextevt = local_first ? nextevt_local : nextevt_global;
if (base_local->timers_pending || base_global->timers_pending) {
/* If we missed a tick already, force 0 delta */ /* If we missed a tick already, force 0 delta */
if (time_before(nextevt, basej)) if (time_before(nextevt, basej))
nextevt = basej; nextevt = basej;
@ -2044,31 +2070,31 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
* We have a fresh next event. Check whether we can forward the * We have a fresh next event. Check whether we can forward the
* base. * base.
*/ */
__forward_timer_base(base, basej); __forward_timer_base(base_local, basej);
__forward_timer_base(base_global, basej);
/* /*
* Set base->is_idle only when caller is timer_base_try_to_set_idle() * Set base->is_idle only when caller is timer_base_try_to_set_idle()
*/ */
if (idle) { if (idle) {
/* /*
* Base is idle if the next event is more than a tick away. * Bases are idle if the next event is more than a tick away.
* *
* If the base is marked idle then any timer add operation must * If the base is marked idle then any timer add operation must
* forward the base clk itself to keep granularity small. This * forward the base clk itself to keep granularity small. This
* idle logic is only maintained for the BASE_STD base, * idle logic is only maintained for the BASE_LOCAL and
* deferrable timers may still see large granularity skew (by * BASE_GLOBAL base, deferrable timers may still see large
* design). * granularity skew (by design).
*/ */
if (!base->is_idle) { if (!base_local->is_idle && time_after(nextevt, basej + 1)) {
if (time_after(nextevt, basej + 1)) { base_local->is_idle = base_global->is_idle = true;
base->is_idle = true; trace_timer_base_idle(true, base_local->cpu);
trace_timer_base_idle(true, base->cpu);
}
} }
*idle = base->is_idle; *idle = base_local->is_idle;
} }
raw_spin_unlock(&base->lock); raw_spin_unlock(&base_global->lock);
raw_spin_unlock(&base_local->lock);
return cmp_next_hrtimer_event(basem, expires); return cmp_next_hrtimer_event(basem, expires);
} }
@ -2112,15 +2138,14 @@ u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle)
*/ */
void timer_clear_idle(void) void timer_clear_idle(void)
{ {
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
/* /*
* We do this unlocked. The worst outcome is a remote enqueue sending * We do this unlocked. The worst outcome is a remote enqueue sending
* a pointless IPI, but taking the lock would just make the window for * a pointless IPI, but taking the lock would just make the window for
* sending the IPI a few instructions smaller for the cost of taking * sending the IPI a few instructions smaller for the cost of taking
* the lock in the exit from idle path. * the lock in the exit from idle path.
*/ */
base->is_idle = false; __this_cpu_write(timer_bases[BASE_LOCAL].is_idle, false);
__this_cpu_write(timer_bases[BASE_GLOBAL].is_idle, false);
trace_timer_base_idle(false, smp_processor_id()); trace_timer_base_idle(false, smp_processor_id());
} }
#endif #endif
@ -2171,11 +2196,13 @@ static inline void __run_timers(struct timer_base *base)
*/ */
static __latent_entropy void run_timer_softirq(struct softirq_action *h) static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{ {
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_LOCAL]);
__run_timers(base); __run_timers(base);
if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) {
__run_timers(this_cpu_ptr(&timer_bases[BASE_GLOBAL]));
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
}
} }
/* /*
@ -2183,7 +2210,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
*/ */
static void run_local_timers(void) static void run_local_timers(void)
{ {
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_LOCAL]);
hrtimer_run_queues(); hrtimer_run_queues();