mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 11:37:47 +00:00
a2a3374c47
With the consolidation of put_prev_task/set_next_task(), see commit 436f3eed5c69 ("sched: Combine the last put_prev_task() and the first set_next_task()"), we are now skipping the transition between these two functions when the previous and the next tasks are the same. As a result, the scx idle state of a CPU is updated only when transitioning to or from the idle thread. While this is generally correct, it can lead to uneven and inefficient core utilization in certain scenarios [1]. A typical scenario involves proactive wake-ups: scx_bpf_pick_idle_cpu() selects and marks an idle CPU as busy, followed by a wake-up via scx_bpf_kick_cpu(), without dispatching any tasks. In this case, the CPU continues running the idle thread, returns to idle, but remains marked as busy, preventing it from being selected again as an idle CPU (until a task eventually runs on it and releases the CPU). For example, running a workload that uses 20% of each CPU, combined with an scx scheduler using proactive wake-ups, results in the following core utilization: CPU 0: 25.7% CPU 1: 29.3% CPU 2: 26.5% CPU 3: 25.5% CPU 4: 0.0% CPU 5: 25.5% CPU 6: 0.0% CPU 7: 10.5% To address this, refresh the idle state also in pick_task_idle(), during idle-to-idle transitions, but only trigger ops.update_idle() on actual state changes to prevent unnecessary updates to the scx scheduler and maintain balanced state transitions. With this change in place, the core utilization in the previous example becomes the following: CPU 0: 18.8% CPU 1: 19.4% CPU 2: 18.0% CPU 3: 18.7% CPU 4: 19.3% CPU 5: 18.9% CPU 6: 18.7% CPU 7: 19.3% [1] https://github.com/sched-ext/scx/pull/1139 Fixes: 7c65ae81ea86 ("sched_ext: Don't call put_prev_task_scx() before picking the next task") Signed-off-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Tejun Heo <tj@kernel.org>
92 lines
3.4 KiB
C
92 lines
3.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
|
|
*
|
|
* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
|
|
* Copyright (c) 2022 Tejun Heo <tj@kernel.org>
|
|
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
|
|
*/
|
|
#ifdef CONFIG_SCHED_CLASS_EXT
|
|
|
|
void scx_tick(struct rq *rq);
|
|
void init_scx_entity(struct sched_ext_entity *scx);
|
|
void scx_pre_fork(struct task_struct *p);
|
|
int scx_fork(struct task_struct *p);
|
|
void scx_post_fork(struct task_struct *p);
|
|
void scx_cancel_fork(struct task_struct *p);
|
|
bool scx_can_stop_tick(struct rq *rq);
|
|
void scx_rq_activate(struct rq *rq);
|
|
void scx_rq_deactivate(struct rq *rq);
|
|
int scx_check_setscheduler(struct task_struct *p, int policy);
|
|
bool task_should_scx(int policy);
|
|
void init_sched_ext_class(void);
|
|
|
|
static inline u32 scx_cpuperf_target(s32 cpu)
|
|
{
|
|
if (scx_enabled())
|
|
return cpu_rq(cpu)->scx.cpuperf_target;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static inline bool task_on_scx(const struct task_struct *p)
|
|
{
|
|
return scx_enabled() && p->sched_class == &ext_sched_class;
|
|
}
|
|
|
|
#ifdef CONFIG_SCHED_CORE
|
|
bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
|
|
bool in_fi);
|
|
#endif
|
|
|
|
#else /* CONFIG_SCHED_CLASS_EXT */
|
|
|
|
static inline void scx_tick(struct rq *rq) {}
|
|
static inline void scx_pre_fork(struct task_struct *p) {}
|
|
static inline int scx_fork(struct task_struct *p) { return 0; }
|
|
static inline void scx_post_fork(struct task_struct *p) {}
|
|
static inline void scx_cancel_fork(struct task_struct *p) {}
|
|
static inline u32 scx_cpuperf_target(s32 cpu) { return 0; }
|
|
static inline bool scx_can_stop_tick(struct rq *rq) { return true; }
|
|
static inline void scx_rq_activate(struct rq *rq) {}
|
|
static inline void scx_rq_deactivate(struct rq *rq) {}
|
|
static inline int scx_check_setscheduler(struct task_struct *p, int policy) { return 0; }
|
|
static inline bool task_on_scx(const struct task_struct *p) { return false; }
|
|
static inline void init_sched_ext_class(void) {}
|
|
|
|
#endif /* CONFIG_SCHED_CLASS_EXT */
|
|
|
|
#if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP)
|
|
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify);
|
|
|
|
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify)
|
|
{
|
|
if (scx_enabled())
|
|
__scx_update_idle(rq, idle, do_notify);
|
|
}
|
|
#else
|
|
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {}
|
|
#endif
|
|
|
|
#ifdef CONFIG_CGROUP_SCHED
|
|
#ifdef CONFIG_EXT_GROUP_SCHED
|
|
int scx_tg_online(struct task_group *tg);
|
|
void scx_tg_offline(struct task_group *tg);
|
|
int scx_cgroup_can_attach(struct cgroup_taskset *tset);
|
|
void scx_move_task(struct task_struct *p);
|
|
void scx_cgroup_finish_attach(void);
|
|
void scx_cgroup_cancel_attach(struct cgroup_taskset *tset);
|
|
void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight);
|
|
void scx_group_set_idle(struct task_group *tg, bool idle);
|
|
#else /* CONFIG_EXT_GROUP_SCHED */
|
|
static inline int scx_tg_online(struct task_group *tg) { return 0; }
|
|
static inline void scx_tg_offline(struct task_group *tg) {}
|
|
static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; }
|
|
static inline void scx_move_task(struct task_struct *p) {}
|
|
static inline void scx_cgroup_finish_attach(void) {}
|
|
static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {}
|
|
static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {}
|
|
static inline void scx_group_set_idle(struct task_group *tg, bool idle) {}
|
|
#endif /* CONFIG_EXT_GROUP_SCHED */
|
|
#endif /* CONFIG_CGROUP_SCHED */
|