sched_ext: Fixes for v6.13-rc6

- Fix corner case bug where ops.dispatch() couldn't extend the execution of
   the current task if SCX_OPS_ENQ_LAST is set.
 
 - Fix ops.cpu_release() not being called when a SCX task is preempted by a
   higher priority sched class task.
 
 - Fix buitin idle mask being incorrectly left as busy after an idle CPU is
   picked and kicked.
 
 - scx_ops_bypass() was unnecessarily using rq_lock() which comes with rq
   pinning related sanity checks which could trigger spuriously. Switch to
   raw_spin_rq_lock().
 -----BEGIN PGP SIGNATURE-----
 
 iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCZ4Gmpw4cdGpAa2VybmVs
 Lm9yZwAKCRCxYfJx3gVYGVntAP0b4i4PEIkupj9+i8ZzlwqvYX3gFJ7E4v3wmjDp
 1VYdrAD/ZetrhrM+9RyyKpMIDFnN+xE6YbslBSlAzGzgfdsbXA0=
 =zGXi
 -----END PGP SIGNATURE-----

Merge tag 'sched_ext-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:

 - Fix corner case bug where ops.dispatch() couldn't extend the
   execution of the current task if SCX_OPS_ENQ_LAST is set.

 - Fix ops.cpu_release() not being called when a SCX task is preempted
   by a higher priority sched class task.

 - Fix buitin idle mask being incorrectly left as busy after an idle CPU
   is picked and kicked.

 - scx_ops_bypass() was unnecessarily using rq_lock() which comes with
   rq pinning related sanity checks which could trigger spuriously.
   Switch to raw_spin_rq_lock().

* tag 'sched_ext-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: idle: Refresh idle masks during idle-to-idle transitions
  sched_ext: switch class when preempted by higher priority scheduler
  sched_ext: Replace rq_lock() to raw_spin_rq_lock() in scx_ops_bypass()
  sched_ext: keep running prev when prev->scx.slice != 0
This commit is contained in:
Linus Torvalds 2025-01-10 15:11:58 -08:00
commit 2e3f3090bd
3 changed files with 74 additions and 26 deletions

View File

@ -2747,6 +2747,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
{
struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
bool prev_on_scx = prev->sched_class == &ext_sched_class;
bool prev_on_rq = prev->scx.flags & SCX_TASK_QUEUED;
int nr_loops = SCX_DSP_MAX_LOOPS;
lockdep_assert_rq_held(rq);
@ -2779,8 +2780,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
* See scx_ops_disable_workfn() for the explanation on the
* bypassing test.
*/
if ((prev->scx.flags & SCX_TASK_QUEUED) &&
prev->scx.slice && !scx_rq_bypassing(rq)) {
if (prev_on_rq && prev->scx.slice && !scx_rq_bypassing(rq)) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
goto has_tasks;
}
@ -2813,6 +2813,10 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
flush_dispatch_buf(rq);
if (prev_on_rq && prev->scx.slice) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
goto has_tasks;
}
if (rq->scx.local_dsq.nr)
goto has_tasks;
if (consume_global_dsq(rq))
@ -2838,8 +2842,7 @@ no_tasks:
* Didn't find another task to run. Keep running @prev unless
* %SCX_OPS_ENQ_LAST is in effect.
*/
if ((prev->scx.flags & SCX_TASK_QUEUED) &&
(!static_branch_unlikely(&scx_ops_enq_last) ||
if (prev_on_rq && (!static_branch_unlikely(&scx_ops_enq_last) ||
scx_rq_bypassing(rq))) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
goto has_tasks;
@ -3034,7 +3037,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
*/
if (p->scx.slice && !scx_rq_bypassing(rq)) {
dispatch_enqueue(&rq->scx.local_dsq, p, SCX_ENQ_HEAD);
return;
goto switch_class;
}
/*
@ -3051,6 +3054,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
}
}
switch_class:
if (next && next->sched_class != &ext_sched_class)
switch_class(rq, next);
}
@ -3586,16 +3590,8 @@ static void reset_idle_masks(void)
cpumask_copy(idle_masks.smt, cpu_online_mask);
}
void __scx_update_idle(struct rq *rq, bool idle)
static void update_builtin_idle(int cpu, bool idle)
{
int cpu = cpu_of(rq);
if (SCX_HAS_OP(update_idle) && !scx_rq_bypassing(rq)) {
SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
if (!static_branch_unlikely(&scx_builtin_idle_enabled))
return;
}
if (idle)
cpumask_set_cpu(cpu, idle_masks.cpu);
else
@ -3622,6 +3618,57 @@ void __scx_update_idle(struct rq *rq, bool idle)
#endif
}
/*
* Update the idle state of a CPU to @idle.
*
* If @do_notify is true, ops.update_idle() is invoked to notify the scx
* scheduler of an actual idle state transition (idle to busy or vice
* versa). If @do_notify is false, only the idle state in the idle masks is
* refreshed without invoking ops.update_idle().
*
* This distinction is necessary, because an idle CPU can be "reserved" and
* awakened via scx_bpf_pick_idle_cpu() + scx_bpf_kick_cpu(), marking it as
* busy even if no tasks are dispatched. In this case, the CPU may return
* to idle without a true state transition. Refreshing the idle masks
* without invoking ops.update_idle() ensures accurate idle state tracking
* while avoiding unnecessary updates and maintaining balanced state
* transitions.
*/
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify)
{
int cpu = cpu_of(rq);
lockdep_assert_rq_held(rq);
/*
* Trigger ops.update_idle() only when transitioning from a task to
* the idle thread and vice versa.
*
* Idle transitions are indicated by do_notify being set to true,
* managed by put_prev_task_idle()/set_next_task_idle().
*/
if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq))
SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
/*
* Update the idle masks:
* - for real idle transitions (do_notify == true)
* - for idle-to-idle transitions (indicated by the previous task
* being the idle thread, managed by pick_task_idle())
*
* Skip updating idle masks if the previous task is not the idle
* thread, since set_next_task_idle() has already handled it when
* transitioning from a task to the idle thread (calling this
* function with do_notify == true).
*
* In this way we can avoid updating the idle masks twice,
* unnecessarily.
*/
if (static_branch_likely(&scx_builtin_idle_enabled))
if (do_notify || is_idle_task(rq->curr))
update_builtin_idle(cpu, idle);
}
static void handle_hotplug(struct rq *rq, bool online)
{
int cpu = cpu_of(rq);
@ -4744,10 +4791,9 @@ static void scx_ops_bypass(bool bypass)
*/
for_each_possible_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
struct task_struct *p, *n;
rq_lock(rq, &rf);
raw_spin_rq_lock(rq);
if (bypass) {
WARN_ON_ONCE(rq->scx.flags & SCX_RQ_BYPASSING);
@ -4763,7 +4809,7 @@ static void scx_ops_bypass(bool bypass)
* sees scx_rq_bypassing() before moving tasks to SCX.
*/
if (!scx_enabled()) {
rq_unlock(rq, &rf);
raw_spin_rq_unlock(rq);
continue;
}
@ -4783,10 +4829,11 @@ static void scx_ops_bypass(bool bypass)
sched_enq_and_set_task(&ctx);
}
rq_unlock(rq, &rf);
/* resched to restore ticks and idle state */
resched_cpu(cpu);
if (cpu_online(cpu) || cpu == smp_processor_id())
resched_curr(rq);
raw_spin_rq_unlock(rq);
}
atomic_dec(&scx_ops_breather_depth);

View File

@ -57,15 +57,15 @@ static inline void init_sched_ext_class(void) {}
#endif /* CONFIG_SCHED_CLASS_EXT */
#if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP)
void __scx_update_idle(struct rq *rq, bool idle);
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify);
static inline void scx_update_idle(struct rq *rq, bool idle)
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify)
{
if (scx_enabled())
__scx_update_idle(rq, idle);
__scx_update_idle(rq, idle, do_notify);
}
#else
static inline void scx_update_idle(struct rq *rq, bool idle) {}
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {}
#endif
#ifdef CONFIG_CGROUP_SCHED

View File

@ -452,19 +452,20 @@ static void wakeup_preempt_idle(struct rq *rq, struct task_struct *p, int flags)
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct task_struct *next)
{
dl_server_update_idle_time(rq, prev);
scx_update_idle(rq, false);
scx_update_idle(rq, false, true);
}
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
{
update_idle_core(rq);
scx_update_idle(rq, true);
scx_update_idle(rq, true, true);
schedstat_inc(rq->sched_goidle);
next->se.exec_start = rq_clock_task(rq);
}
struct task_struct *pick_task_idle(struct rq *rq)
{
scx_update_idle(rq, true, false);
return rq->idle;
}