mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-04 04:04:19 +00:00
sched_ext: Fixes for v6.12-rc2
- ops.enqueue() didn't have a way to tell whether select_task_rq_scx() and thus ops.select() were skipped. Some schedulers were incorrectly using SCX_ENQ_WAKEUP. Add SCX_ENQ_CPU_SELECTED and fix scx_qmap using it. - Remove a spurious WARN_ON_ONCE() in scx_cgroup_exit(). - Fix error information clobbering during load. - Add missing __weak markers to BPF helper declarations. - Doc update. -----BEGIN PGP SIGNATURE----- iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCZwWKkA4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGelnAQDTA8GSIahTEHKM0c3yXE6K1/M56zo8Spp5OOA7 kXHR3AD/Y0RcXgaCvMI13aozmQWq756gyB6/qczN0+X3jx6wZwI= =6xbe -----END PGP SIGNATURE----- Merge tag 'sched_ext-for-6.12-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext Pull sched_ext fixes from Tejun Heo: - ops.enqueue() didn't have a way to tell whether select_task_rq_scx() and thus ops.select() were skipped. Some schedulers were incorrectly using SCX_ENQ_WAKEUP. Add SCX_ENQ_CPU_SELECTED and fix scx_qmap using it. - Remove a spurious WARN_ON_ONCE() in scx_cgroup_exit() - Fix error information clobbering during load - Add missing __weak markers to BPF helper declarations - Doc update * tag 'sched_ext-for-6.12-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: Documentation: Update instructions for running example schedulers sched_ext, scx_qmap: Add and use SCX_ENQ_CPU_SELECTED sched/core: Add ENQUEUE_RQ_SELECTED to indicate whether ->select_task_rq() was called sched/core: Make select_task_rq() take the pointer to wake_flags instead of value sched_ext: scx_cgroup_exit() may be called without successful scx_cgroup_init() sched_ext: Improve error reporting during loading sched_ext: Add __weak markers to BPF helper function decalarations
This commit is contained in:
commit
75b607fab3
@ -66,7 +66,7 @@ BPF scheduler and reverts all tasks back to CFS.
|
||||
.. code-block:: none
|
||||
|
||||
# make -j16 -C tools/sched_ext
|
||||
# tools/sched_ext/scx_simple
|
||||
# tools/sched_ext/build/bin/scx_simple
|
||||
local=0 global=3
|
||||
local=5 global=24
|
||||
local=9 global=44
|
||||
|
@ -3518,14 +3518,16 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
|
||||
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
|
||||
*/
|
||||
static inline
|
||||
int select_task_rq(struct task_struct *p, int cpu, int wake_flags)
|
||||
int select_task_rq(struct task_struct *p, int cpu, int *wake_flags)
|
||||
{
|
||||
lockdep_assert_held(&p->pi_lock);
|
||||
|
||||
if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
|
||||
cpu = p->sched_class->select_task_rq(p, cpu, wake_flags);
|
||||
else
|
||||
if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) {
|
||||
cpu = p->sched_class->select_task_rq(p, cpu, *wake_flags);
|
||||
*wake_flags |= WF_RQ_SELECTED;
|
||||
} else {
|
||||
cpu = cpumask_any(p->cpus_ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
* In order not to call set_task_cpu() on a blocking task we need
|
||||
@ -3659,6 +3661,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
|
||||
rq->nr_uninterruptible--;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (wake_flags & WF_RQ_SELECTED)
|
||||
en_flags |= ENQUEUE_RQ_SELECTED;
|
||||
if (wake_flags & WF_MIGRATED)
|
||||
en_flags |= ENQUEUE_MIGRATED;
|
||||
else
|
||||
@ -4120,6 +4124,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
guard(preempt)();
|
||||
int cpu, success = 0;
|
||||
|
||||
wake_flags |= WF_TTWU;
|
||||
|
||||
if (p == current) {
|
||||
/*
|
||||
* We're waking current, this means 'p->on_rq' and 'task_cpu(p)
|
||||
@ -4252,7 +4258,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
*/
|
||||
smp_cond_load_acquire(&p->on_cpu, !VAL);
|
||||
|
||||
cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
|
||||
cpu = select_task_rq(p, p->wake_cpu, &wake_flags);
|
||||
if (task_cpu(p) != cpu) {
|
||||
if (p->in_iowait) {
|
||||
delayacct_blkio_end(p);
|
||||
@ -4793,6 +4799,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
{
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
int wake_flags = WF_FORK;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
|
||||
WRITE_ONCE(p->__state, TASK_RUNNING);
|
||||
@ -4807,7 +4814,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
*/
|
||||
p->recent_used_cpu = task_cpu(p);
|
||||
rseq_migrate(p);
|
||||
__set_task_cpu(p, select_task_rq(p, task_cpu(p), WF_FORK));
|
||||
__set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags));
|
||||
#endif
|
||||
rq = __task_rq_lock(p, &rf);
|
||||
update_rq_clock(rq);
|
||||
@ -4815,7 +4822,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
|
||||
activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL);
|
||||
trace_sched_wakeup_new(p);
|
||||
wakeup_preempt(rq, p, WF_FORK);
|
||||
wakeup_preempt(rq, p, wake_flags);
|
||||
#ifdef CONFIG_SMP
|
||||
if (p->sched_class->task_woken) {
|
||||
/*
|
||||
|
@ -625,6 +625,10 @@ struct sched_ext_ops {
|
||||
/**
|
||||
* exit - Clean up after the BPF scheduler
|
||||
* @info: Exit info
|
||||
*
|
||||
* ops.exit() is also called on ops.init() failure, which is a bit
|
||||
* unusual. This is to allow rich reporting through @info on how
|
||||
* ops.init() failed.
|
||||
*/
|
||||
void (*exit)(struct scx_exit_info *info);
|
||||
|
||||
@ -692,6 +696,7 @@ enum scx_enq_flags {
|
||||
/* expose select ENQUEUE_* flags as enums */
|
||||
SCX_ENQ_WAKEUP = ENQUEUE_WAKEUP,
|
||||
SCX_ENQ_HEAD = ENQUEUE_HEAD,
|
||||
SCX_ENQ_CPU_SELECTED = ENQUEUE_RQ_SELECTED,
|
||||
|
||||
/* high 32bits are SCX specific */
|
||||
|
||||
@ -4048,7 +4053,6 @@ static void scx_cgroup_exit(void)
|
||||
|
||||
percpu_rwsem_assert_held(&scx_cgroup_rwsem);
|
||||
|
||||
WARN_ON_ONCE(!scx_cgroup_enabled);
|
||||
scx_cgroup_enabled = false;
|
||||
|
||||
/*
|
||||
@ -4117,6 +4121,7 @@ static int scx_cgroup_init(void)
|
||||
css->cgroup, &args);
|
||||
if (ret) {
|
||||
css_put(css);
|
||||
scx_ops_error("ops.cgroup_init() failed (%d)", ret);
|
||||
return ret;
|
||||
}
|
||||
tg->scx_flags |= SCX_TG_INITED;
|
||||
@ -5041,6 +5046,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
if (ret) {
|
||||
ret = ops_sanitize_err("init", ret);
|
||||
cpus_read_unlock();
|
||||
scx_ops_error("ops.init() failed (%d)", ret);
|
||||
goto err_disable;
|
||||
}
|
||||
}
|
||||
@ -5150,8 +5156,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
spin_lock_irq(&scx_tasks_lock);
|
||||
scx_task_iter_exit(&sti);
|
||||
spin_unlock_irq(&scx_tasks_lock);
|
||||
pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n",
|
||||
ret, p->comm, p->pid);
|
||||
scx_ops_error("ops.init_task() failed (%d) for %s[%d]",
|
||||
ret, p->comm, p->pid);
|
||||
goto err_disable_unlock_all;
|
||||
}
|
||||
|
||||
@ -5199,14 +5205,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
|
||||
scx_ops_bypass(false);
|
||||
|
||||
/*
|
||||
* Returning an error code here would lose the recorded error
|
||||
* information. Exit indicating success so that the error is notified
|
||||
* through ops.exit() with all the details.
|
||||
*/
|
||||
if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
|
||||
WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE);
|
||||
ret = 0;
|
||||
goto err_disable;
|
||||
}
|
||||
|
||||
@ -5241,10 +5241,18 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
scx_ops_bypass(false);
|
||||
err_disable:
|
||||
mutex_unlock(&scx_ops_enable_mutex);
|
||||
/* must be fully disabled before returning */
|
||||
scx_ops_disable(SCX_EXIT_ERROR);
|
||||
/*
|
||||
* Returning an error code here would not pass all the error information
|
||||
* to userspace. Record errno using scx_ops_error() for cases
|
||||
* scx_ops_error() wasn't already invoked and exit indicating success so
|
||||
* that the error is notified through ops.exit() with all the details.
|
||||
*
|
||||
* Flush scx_ops_disable_work to ensure that error is reported before
|
||||
* init completion.
|
||||
*/
|
||||
scx_ops_error("scx_ops_enable() failed (%d)", ret);
|
||||
kthread_flush_work(&scx_ops_disable_work);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2292,6 +2292,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
|
||||
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
|
||||
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
|
||||
#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */
|
||||
#define WF_RQ_SELECTED 0x80 /* ->select_task_rq() was called */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static_assert(WF_EXEC == SD_BALANCE_EXEC);
|
||||
@ -2334,6 +2335,7 @@ extern const u32 sched_prio_to_wmult[40];
|
||||
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
|
||||
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
|
||||
* ENQUEUE_MIGRATED - the task was migrated during wakeup
|
||||
* ENQUEUE_RQ_SELECTED - ->select_task_rq() was called
|
||||
*
|
||||
*/
|
||||
|
||||
@ -2360,6 +2362,7 @@ extern const u32 sched_prio_to_wmult[40];
|
||||
#define ENQUEUE_INITIAL 0x80
|
||||
#define ENQUEUE_MIGRATING 0x100
|
||||
#define ENQUEUE_DELAYED 0x200
|
||||
#define ENQUEUE_RQ_SELECTED 0x400
|
||||
|
||||
#define RETRY_TASK ((void *)-1UL)
|
||||
|
||||
|
@ -41,8 +41,8 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vt
|
||||
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
|
||||
void scx_bpf_dispatch_cancel(void) __ksym;
|
||||
bool scx_bpf_consume(u64 dsq_id) __ksym;
|
||||
void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym;
|
||||
void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym;
|
||||
void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
|
||||
void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
|
||||
bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
|
||||
bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
|
||||
u32 scx_bpf_reenqueue_local(void) __ksym;
|
||||
@ -71,7 +71,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
|
||||
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
|
||||
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
|
||||
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
|
||||
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym;
|
||||
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
|
||||
|
||||
/*
|
||||
* Use the following as @it__iter when calling
|
||||
|
@ -230,8 +230,8 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
return;
|
||||
}
|
||||
|
||||
/* if !WAKEUP, select_cpu() wasn't called, try direct dispatch */
|
||||
if (!(enq_flags & SCX_ENQ_WAKEUP) &&
|
||||
/* if select_cpu() wasn't called, try direct dispatch */
|
||||
if (!(enq_flags & SCX_ENQ_CPU_SELECTED) &&
|
||||
(cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
|
||||
__sync_fetch_and_add(&nr_ddsp_from_enq, 1);
|
||||
scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags);
|
||||
|
Loading…
Reference in New Issue
Block a user