sched: Add Lazy preemption model

Change fair to use resched_curr_lazy(), which, when the lazy
preemption model is selected, will set TIF_NEED_RESCHED_LAZY.

This LAZY bit will be promoted to the full NEED_RESCHED bit on tick.
As such, the average delay between setting LAZY and actually
rescheduling will be TICK_NSEC/2.

In short, Lazy preemption will delay preemption for fair class but
will function as Full preemption for all the other classes, most
notably the realtime (RR/FIFO/DEADLINE) classes.

The goal is to bridge the performance gap with Voluntary, such that we
might eventually remove that option entirely.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lkml.kernel.org/r/20241007075055.331243614@infradead.org
This commit is contained in:
Peter Zijlstra 2024-10-04 14:46:58 +02:00
parent 26baa1f1c4
commit 7c70cb94d2
6 changed files with 107 additions and 8 deletions

View File

@ -486,6 +486,7 @@ DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
extern bool preempt_model_none(void); extern bool preempt_model_none(void);
extern bool preempt_model_voluntary(void); extern bool preempt_model_voluntary(void);
extern bool preempt_model_full(void); extern bool preempt_model_full(void);
extern bool preempt_model_lazy(void);
#else #else
@ -502,6 +503,11 @@ static inline bool preempt_model_full(void)
return IS_ENABLED(CONFIG_PREEMPT); return IS_ENABLED(CONFIG_PREEMPT);
} }
static inline bool preempt_model_lazy(void)
{
return IS_ENABLED(CONFIG_PREEMPT_LAZY);
}
#endif #endif
static inline bool preempt_model_rt(void) static inline bool preempt_model_rt(void)
@ -519,7 +525,7 @@ static inline bool preempt_model_rt(void)
*/ */
static inline bool preempt_model_preemptible(void) static inline bool preempt_model_preemptible(void)
{ {
return preempt_model_full() || preempt_model_rt(); return preempt_model_full() || preempt_model_lazy() || preempt_model_rt();
} }
#endif /* __LINUX_PREEMPT_H */ #endif /* __LINUX_PREEMPT_H */

View File

@ -11,6 +11,9 @@ config PREEMPT_BUILD
select PREEMPTION select PREEMPTION
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
config ARCH_HAS_PREEMPT_LAZY
bool
choice choice
prompt "Preemption Model" prompt "Preemption Model"
default PREEMPT_NONE default PREEMPT_NONE
@ -67,6 +70,18 @@ config PREEMPT
embedded system with latency requirements in the milliseconds embedded system with latency requirements in the milliseconds
range. range.
config PREEMPT_LAZY
bool "Scheduler controlled preemption model"
depends on !ARCH_NO_PREEMPT
depends on ARCH_HAS_PREEMPT_LAZY
select PREEMPT_BUILD
help
This option provides a scheduler driven preemption model that
is fundamentally similar to full preemption, but is less
eager to preempt SCHED_NORMAL tasks in an attempt to
reduce lock holder preemption and recover some of the performance
gains seen from using Voluntary preemption.
config PREEMPT_RT config PREEMPT_RT
bool "Fully Preemptible Kernel (Real-Time)" bool "Fully Preemptible Kernel (Real-Time)"
depends on EXPERT && ARCH_SUPPORTS_RT depends on EXPERT && ARCH_SUPPORTS_RT

View File

@ -1083,6 +1083,13 @@ static void __resched_curr(struct rq *rq, int tif)
lockdep_assert_rq_held(rq); lockdep_assert_rq_held(rq);
/*
* Always immediately preempt the idle task; no point in delaying doing
* actual work.
*/
if (is_idle_task(curr) && tif == TIF_NEED_RESCHED_LAZY)
tif = TIF_NEED_RESCHED;
if (cti->flags & ((1 << tif) | _TIF_NEED_RESCHED)) if (cti->flags & ((1 << tif) | _TIF_NEED_RESCHED))
return; return;
@ -1108,6 +1115,32 @@ void resched_curr(struct rq *rq)
__resched_curr(rq, TIF_NEED_RESCHED); __resched_curr(rq, TIF_NEED_RESCHED);
} }
#ifdef CONFIG_PREEMPT_DYNAMIC
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_lazy);
static __always_inline bool dynamic_preempt_lazy(void)
{
return static_branch_unlikely(&sk_dynamic_preempt_lazy);
}
#else
static __always_inline bool dynamic_preempt_lazy(void)
{
return IS_ENABLED(CONFIG_PREEMPT_LAZY);
}
#endif
static __always_inline int get_lazy_tif_bit(void)
{
if (dynamic_preempt_lazy())
return TIF_NEED_RESCHED_LAZY;
return TIF_NEED_RESCHED;
}
void resched_curr_lazy(struct rq *rq)
{
__resched_curr(rq, get_lazy_tif_bit());
}
void resched_cpu(int cpu) void resched_cpu(int cpu)
{ {
struct rq *rq = cpu_rq(cpu); struct rq *rq = cpu_rq(cpu);
@ -5612,6 +5645,10 @@ void sched_tick(void)
update_rq_clock(rq); update_rq_clock(rq);
hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure); update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
if (dynamic_preempt_lazy() && tif_test_bit(TIF_NEED_RESCHED_LAZY))
resched_curr(rq);
donor->sched_class->task_tick(rq, donor, 0); donor->sched_class->task_tick(rq, donor, 0);
if (sched_feat(LATENCY_WARN)) if (sched_feat(LATENCY_WARN))
resched_latency = cpu_resched_latency(rq); resched_latency = cpu_resched_latency(rq);
@ -7374,6 +7411,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
* preempt_schedule <- NOP * preempt_schedule <- NOP
* preempt_schedule_notrace <- NOP * preempt_schedule_notrace <- NOP
* irqentry_exit_cond_resched <- NOP * irqentry_exit_cond_resched <- NOP
* dynamic_preempt_lazy <- false
* *
* VOLUNTARY: * VOLUNTARY:
* cond_resched <- __cond_resched * cond_resched <- __cond_resched
@ -7381,6 +7419,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
* preempt_schedule <- NOP * preempt_schedule <- NOP
* preempt_schedule_notrace <- NOP * preempt_schedule_notrace <- NOP
* irqentry_exit_cond_resched <- NOP * irqentry_exit_cond_resched <- NOP
* dynamic_preempt_lazy <- false
* *
* FULL: * FULL:
* cond_resched <- RET0 * cond_resched <- RET0
@ -7388,6 +7427,15 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
* preempt_schedule <- preempt_schedule * preempt_schedule <- preempt_schedule
* preempt_schedule_notrace <- preempt_schedule_notrace * preempt_schedule_notrace <- preempt_schedule_notrace
* irqentry_exit_cond_resched <- irqentry_exit_cond_resched * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
* dynamic_preempt_lazy <- false
*
* LAZY:
* cond_resched <- RET0
* might_resched <- RET0
* preempt_schedule <- preempt_schedule
* preempt_schedule_notrace <- preempt_schedule_notrace
* irqentry_exit_cond_resched <- irqentry_exit_cond_resched
* dynamic_preempt_lazy <- true
*/ */
enum { enum {
@ -7395,6 +7443,7 @@ enum {
preempt_dynamic_none, preempt_dynamic_none,
preempt_dynamic_voluntary, preempt_dynamic_voluntary,
preempt_dynamic_full, preempt_dynamic_full,
preempt_dynamic_lazy,
}; };
int preempt_dynamic_mode = preempt_dynamic_undefined; int preempt_dynamic_mode = preempt_dynamic_undefined;
@ -7410,15 +7459,23 @@ int sched_dynamic_mode(const char *str)
if (!strcmp(str, "full")) if (!strcmp(str, "full"))
return preempt_dynamic_full; return preempt_dynamic_full;
#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY
if (!strcmp(str, "lazy"))
return preempt_dynamic_lazy;
#endif
return -EINVAL; return -EINVAL;
} }
#define preempt_dynamic_key_enable(f) static_key_enable(&sk_dynamic_##f.key)
#define preempt_dynamic_key_disable(f) static_key_disable(&sk_dynamic_##f.key)
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
#define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled) #define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled)
#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled) #define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled)
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key) #define preempt_dynamic_enable(f) preempt_dynamic_key_enable(f)
#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key) #define preempt_dynamic_disable(f) preempt_dynamic_key_disable(f)
#else #else
#error "Unsupported PREEMPT_DYNAMIC mechanism" #error "Unsupported PREEMPT_DYNAMIC mechanism"
#endif #endif
@ -7438,6 +7495,7 @@ static void __sched_dynamic_update(int mode)
preempt_dynamic_enable(preempt_schedule); preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace); preempt_dynamic_enable(preempt_schedule_notrace);
preempt_dynamic_enable(irqentry_exit_cond_resched); preempt_dynamic_enable(irqentry_exit_cond_resched);
preempt_dynamic_key_disable(preempt_lazy);
switch (mode) { switch (mode) {
case preempt_dynamic_none: case preempt_dynamic_none:
@ -7447,6 +7505,7 @@ static void __sched_dynamic_update(int mode)
preempt_dynamic_disable(preempt_schedule); preempt_dynamic_disable(preempt_schedule);
preempt_dynamic_disable(preempt_schedule_notrace); preempt_dynamic_disable(preempt_schedule_notrace);
preempt_dynamic_disable(irqentry_exit_cond_resched); preempt_dynamic_disable(irqentry_exit_cond_resched);
preempt_dynamic_key_disable(preempt_lazy);
if (mode != preempt_dynamic_mode) if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: none\n"); pr_info("Dynamic Preempt: none\n");
break; break;
@ -7458,6 +7517,7 @@ static void __sched_dynamic_update(int mode)
preempt_dynamic_disable(preempt_schedule); preempt_dynamic_disable(preempt_schedule);
preempt_dynamic_disable(preempt_schedule_notrace); preempt_dynamic_disable(preempt_schedule_notrace);
preempt_dynamic_disable(irqentry_exit_cond_resched); preempt_dynamic_disable(irqentry_exit_cond_resched);
preempt_dynamic_key_disable(preempt_lazy);
if (mode != preempt_dynamic_mode) if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: voluntary\n"); pr_info("Dynamic Preempt: voluntary\n");
break; break;
@ -7469,9 +7529,22 @@ static void __sched_dynamic_update(int mode)
preempt_dynamic_enable(preempt_schedule); preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace); preempt_dynamic_enable(preempt_schedule_notrace);
preempt_dynamic_enable(irqentry_exit_cond_resched); preempt_dynamic_enable(irqentry_exit_cond_resched);
preempt_dynamic_key_disable(preempt_lazy);
if (mode != preempt_dynamic_mode) if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: full\n"); pr_info("Dynamic Preempt: full\n");
break; break;
case preempt_dynamic_lazy:
if (!klp_override)
preempt_dynamic_disable(cond_resched);
preempt_dynamic_disable(might_resched);
preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace);
preempt_dynamic_enable(irqentry_exit_cond_resched);
preempt_dynamic_key_enable(preempt_lazy);
if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: lazy\n");
break;
} }
preempt_dynamic_mode = mode; preempt_dynamic_mode = mode;
@ -7534,6 +7607,8 @@ static void __init preempt_dynamic_init(void)
sched_dynamic_update(preempt_dynamic_none); sched_dynamic_update(preempt_dynamic_none);
} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
sched_dynamic_update(preempt_dynamic_voluntary); sched_dynamic_update(preempt_dynamic_voluntary);
} else if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) {
sched_dynamic_update(preempt_dynamic_lazy);
} else { } else {
/* Default static call setting, nothing to do */ /* Default static call setting, nothing to do */
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
@ -7554,6 +7629,7 @@ static void __init preempt_dynamic_init(void)
PREEMPT_MODEL_ACCESSOR(none); PREEMPT_MODEL_ACCESSOR(none);
PREEMPT_MODEL_ACCESSOR(voluntary); PREEMPT_MODEL_ACCESSOR(voluntary);
PREEMPT_MODEL_ACCESSOR(full); PREEMPT_MODEL_ACCESSOR(full);
PREEMPT_MODEL_ACCESSOR(lazy);
#else /* !CONFIG_PREEMPT_DYNAMIC: */ #else /* !CONFIG_PREEMPT_DYNAMIC: */

View File

@ -245,11 +245,12 @@ static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf,
static int sched_dynamic_show(struct seq_file *m, void *v) static int sched_dynamic_show(struct seq_file *m, void *v)
{ {
static const char * preempt_modes[] = { static const char * preempt_modes[] = {
"none", "voluntary", "full" "none", "voluntary", "full", "lazy",
}; };
int j = ARRAY_SIZE(preempt_modes) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
int i; int i;
for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { for (i = 0; i < j; i++) {
if (preempt_dynamic_mode == i) if (preempt_dynamic_mode == i)
seq_puts(m, "("); seq_puts(m, "(");
seq_puts(m, preempt_modes[i]); seq_puts(m, preempt_modes[i]);

View File

@ -1251,7 +1251,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
return; return;
if (resched || did_preempt_short(cfs_rq, curr)) { if (resched || did_preempt_short(cfs_rq, curr)) {
resched_curr(rq); resched_curr_lazy(rq);
clear_buddies(cfs_rq, curr); clear_buddies(cfs_rq, curr);
} }
} }
@ -5677,7 +5677,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
* validating it and just reschedule. * validating it and just reschedule.
*/ */
if (queued) { if (queued) {
resched_curr(rq_of(cfs_rq)); resched_curr_lazy(rq_of(cfs_rq));
return; return;
} }
#endif #endif
@ -8829,7 +8829,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
return; return;
preempt: preempt:
resched_curr(rq); resched_curr_lazy(rq);
} }
static struct task_struct *pick_task_fair(struct rq *rq) static struct task_struct *pick_task_fair(struct rq *rq)

View File

@ -2689,6 +2689,7 @@ extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void); extern void init_sched_fair_class(void);
extern void resched_curr(struct rq *rq); extern void resched_curr(struct rq *rq);
extern void resched_curr_lazy(struct rq *rq);
extern void resched_cpu(int cpu); extern void resched_cpu(int cpu);
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);