mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 18:53:30 +00:00
sched_ext: Implement sched_ext_ops.cpu_online/offline()
Add ops.cpu_online/offline() which are invoked when CPUs come online and offline respectively. As the enqueue path already automatically bypasses tasks to the local dsq on a deactivated CPU, BPF schedulers are guaranteed to see tasks only on CPUs which are between online() and offline(). If the BPF scheduler doesn't implement ops.cpu_online/offline(), the scheduler is automatically exited with SCX_ECODE_RESTART | SCX_ECODE_RSN_HOTPLUG. Userspace can implement CPU hotpplug support trivially by simply reinitializing and reloading the scheduler. scx_qmap is updated to print out online CPUs on hotplug events. Other schedulers are updated to restart based on ecode. v3: - The previous implementation added @reason to sched_class.rq_on/offline() to distinguish between CPU hotplug events and topology updates. This was buggy and fragile as the methods are skipped if the current state equals the target state. Instead, add scx_rq_[de]activate() which are directly called from sched_cpu_de/activate(). This also allows ops.cpu_on/offline() to sleep which can be useful. - ops.dispatch() could be called on a CPU that the BPF scheduler was told to be offline. The dispatch patch is updated to bypass in such cases. v2: - To accommodate lock ordering change between scx_cgroup_rwsem and cpus_read_lock(), CPU hotplug operations are put into its own SCX_OPI block and enabled eariler during scx_ope_enable() so that cpus_read_lock() can be dropped before acquiring scx_cgroup_rwsem. - Auto exit with ECODE added. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: David Vernet <dvernet@meta.com> Acked-by: Josh Don <joshdon@google.com> Acked-by: Hao Luo <haoluo@google.com> Acked-by: Barret Rhoden <brho@google.com>
This commit is contained in:
parent
245254f708
commit
60c27fb59f
@ -7984,6 +7984,8 @@ int sched_cpu_activate(unsigned int cpu)
|
|||||||
cpuset_cpu_active();
|
cpuset_cpu_active();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
scx_rq_activate(rq);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Put the rq online, if not already. This happens:
|
* Put the rq online, if not already. This happens:
|
||||||
*
|
*
|
||||||
@ -8044,6 +8046,8 @@ int sched_cpu_deactivate(unsigned int cpu)
|
|||||||
}
|
}
|
||||||
rq_unlock_irqrestore(rq, &rf);
|
rq_unlock_irqrestore(rq, &rf);
|
||||||
|
|
||||||
|
scx_rq_deactivate(rq);
|
||||||
|
|
||||||
#ifdef CONFIG_SCHED_SMT
|
#ifdef CONFIG_SCHED_SMT
|
||||||
/*
|
/*
|
||||||
* When going down, decrement the number of cores with SMT present.
|
* When going down, decrement the number of cores with SMT present.
|
||||||
|
@ -30,6 +30,29 @@ enum scx_exit_kind {
|
|||||||
SCX_EXIT_ERROR_STALL, /* watchdog detected stalled runnable tasks */
|
SCX_EXIT_ERROR_STALL, /* watchdog detected stalled runnable tasks */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* An exit code can be specified when exiting with scx_bpf_exit() or
|
||||||
|
* scx_ops_exit(), corresponding to exit_kind UNREG_BPF and UNREG_KERN
|
||||||
|
* respectively. The codes are 64bit of the format:
|
||||||
|
*
|
||||||
|
* Bits: [63 .. 48 47 .. 32 31 .. 0]
|
||||||
|
* [ SYS ACT ] [ SYS RSN ] [ USR ]
|
||||||
|
*
|
||||||
|
* SYS ACT: System-defined exit actions
|
||||||
|
* SYS RSN: System-defined exit reasons
|
||||||
|
* USR : User-defined exit codes and reasons
|
||||||
|
*
|
||||||
|
* Using the above, users may communicate intention and context by ORing system
|
||||||
|
* actions and/or system reasons with a user-defined exit code.
|
||||||
|
*/
|
||||||
|
enum scx_exit_code {
|
||||||
|
/* Reasons */
|
||||||
|
SCX_ECODE_RSN_HOTPLUG = 1LLU << 32,
|
||||||
|
|
||||||
|
/* Actions */
|
||||||
|
SCX_ECODE_ACT_RESTART = 1LLU << 48,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* scx_exit_info is passed to ops.exit() to describe why the BPF scheduler is
|
* scx_exit_info is passed to ops.exit() to describe why the BPF scheduler is
|
||||||
* being disabled.
|
* being disabled.
|
||||||
@ -457,7 +480,29 @@ struct sched_ext_ops {
|
|||||||
void (*dump_task)(struct scx_dump_ctx *ctx, struct task_struct *p);
|
void (*dump_task)(struct scx_dump_ctx *ctx, struct task_struct *p);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* All online ops must come before ops.init().
|
* All online ops must come before ops.cpu_online().
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpu_online - A CPU became online
|
||||||
|
* @cpu: CPU which just came up
|
||||||
|
*
|
||||||
|
* @cpu just came online. @cpu will not call ops.enqueue() or
|
||||||
|
* ops.dispatch(), nor run tasks associated with other CPUs beforehand.
|
||||||
|
*/
|
||||||
|
void (*cpu_online)(s32 cpu);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpu_offline - A CPU is going offline
|
||||||
|
* @cpu: CPU which is going offline
|
||||||
|
*
|
||||||
|
* @cpu is going offline. @cpu will not call ops.enqueue() or
|
||||||
|
* ops.dispatch(), nor run tasks associated with other CPUs afterwards.
|
||||||
|
*/
|
||||||
|
void (*cpu_offline)(s32 cpu);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All CPU hotplug ops must come before ops.init().
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -496,6 +541,15 @@ struct sched_ext_ops {
|
|||||||
*/
|
*/
|
||||||
u32 exit_dump_len;
|
u32 exit_dump_len;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hotplug_seq - A sequence number that may be set by the scheduler to
|
||||||
|
* detect when a hotplug event has occurred during the loading process.
|
||||||
|
* If 0, no detection occurs. Otherwise, the scheduler will fail to
|
||||||
|
* load if the sequence number does not match @scx_hotplug_seq on the
|
||||||
|
* enable path.
|
||||||
|
*/
|
||||||
|
u64 hotplug_seq;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* name - BPF scheduler's name
|
* name - BPF scheduler's name
|
||||||
*
|
*
|
||||||
@ -509,7 +563,9 @@ struct sched_ext_ops {
|
|||||||
enum scx_opi {
|
enum scx_opi {
|
||||||
SCX_OPI_BEGIN = 0,
|
SCX_OPI_BEGIN = 0,
|
||||||
SCX_OPI_NORMAL_BEGIN = 0,
|
SCX_OPI_NORMAL_BEGIN = 0,
|
||||||
SCX_OPI_NORMAL_END = SCX_OP_IDX(init),
|
SCX_OPI_NORMAL_END = SCX_OP_IDX(cpu_online),
|
||||||
|
SCX_OPI_CPU_HOTPLUG_BEGIN = SCX_OP_IDX(cpu_online),
|
||||||
|
SCX_OPI_CPU_HOTPLUG_END = SCX_OP_IDX(init),
|
||||||
SCX_OPI_END = SCX_OP_IDX(init),
|
SCX_OPI_END = SCX_OP_IDX(init),
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -694,6 +750,7 @@ static atomic_t scx_exit_kind = ATOMIC_INIT(SCX_EXIT_DONE);
|
|||||||
static struct scx_exit_info *scx_exit_info;
|
static struct scx_exit_info *scx_exit_info;
|
||||||
|
|
||||||
static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
|
static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
|
||||||
|
static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The maximum amount of time in jiffies that a task may be runnable without
|
* The maximum amount of time in jiffies that a task may be runnable without
|
||||||
@ -1419,11 +1476,7 @@ static void direct_dispatch(struct task_struct *p, u64 enq_flags)
|
|||||||
|
|
||||||
static bool scx_rq_online(struct rq *rq)
|
static bool scx_rq_online(struct rq *rq)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_SMP
|
return likely(rq->scx.flags & SCX_RQ_ONLINE);
|
||||||
return likely(rq->online);
|
|
||||||
#else
|
|
||||||
return true;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
|
static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
|
||||||
@ -1438,6 +1491,11 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
|
|||||||
if (sticky_cpu == cpu_of(rq))
|
if (sticky_cpu == cpu_of(rq))
|
||||||
goto local_norefill;
|
goto local_norefill;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If !scx_rq_online(), we already told the BPF scheduler that the CPU
|
||||||
|
* is offline and are just running the hotplug path. Don't bother the
|
||||||
|
* BPF scheduler.
|
||||||
|
*/
|
||||||
if (!scx_rq_online(rq))
|
if (!scx_rq_online(rq))
|
||||||
goto local;
|
goto local;
|
||||||
|
|
||||||
@ -2673,6 +2731,42 @@ void __scx_update_idle(struct rq *rq, bool idle)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void handle_hotplug(struct rq *rq, bool online)
|
||||||
|
{
|
||||||
|
int cpu = cpu_of(rq);
|
||||||
|
|
||||||
|
atomic_long_inc(&scx_hotplug_seq);
|
||||||
|
|
||||||
|
if (online && SCX_HAS_OP(cpu_online))
|
||||||
|
SCX_CALL_OP(SCX_KF_SLEEPABLE, cpu_online, cpu);
|
||||||
|
else if (!online && SCX_HAS_OP(cpu_offline))
|
||||||
|
SCX_CALL_OP(SCX_KF_SLEEPABLE, cpu_offline, cpu);
|
||||||
|
else
|
||||||
|
scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
|
||||||
|
"cpu %d going %s, exiting scheduler", cpu,
|
||||||
|
online ? "online" : "offline");
|
||||||
|
}
|
||||||
|
|
||||||
|
void scx_rq_activate(struct rq *rq)
|
||||||
|
{
|
||||||
|
handle_hotplug(rq, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void scx_rq_deactivate(struct rq *rq)
|
||||||
|
{
|
||||||
|
handle_hotplug(rq, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rq_online_scx(struct rq *rq)
|
||||||
|
{
|
||||||
|
rq->scx.flags |= SCX_RQ_ONLINE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rq_offline_scx(struct rq *rq)
|
||||||
|
{
|
||||||
|
rq->scx.flags &= ~SCX_RQ_ONLINE;
|
||||||
|
}
|
||||||
|
|
||||||
#else /* CONFIG_SMP */
|
#else /* CONFIG_SMP */
|
||||||
|
|
||||||
static bool test_and_clear_cpu_idle(int cpu) { return false; }
|
static bool test_and_clear_cpu_idle(int cpu) { return false; }
|
||||||
@ -3104,6 +3198,9 @@ DEFINE_SCHED_CLASS(ext) = {
|
|||||||
.balance = balance_scx,
|
.balance = balance_scx,
|
||||||
.select_task_rq = select_task_rq_scx,
|
.select_task_rq = select_task_rq_scx,
|
||||||
.set_cpus_allowed = set_cpus_allowed_scx,
|
.set_cpus_allowed = set_cpus_allowed_scx,
|
||||||
|
|
||||||
|
.rq_online = rq_online_scx,
|
||||||
|
.rq_offline = rq_offline_scx,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.task_tick = task_tick_scx,
|
.task_tick = task_tick_scx,
|
||||||
@ -3235,10 +3332,18 @@ static ssize_t scx_attr_nr_rejected_show(struct kobject *kobj,
|
|||||||
}
|
}
|
||||||
SCX_ATTR(nr_rejected);
|
SCX_ATTR(nr_rejected);
|
||||||
|
|
||||||
|
static ssize_t scx_attr_hotplug_seq_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *ka, char *buf)
|
||||||
|
{
|
||||||
|
return sysfs_emit(buf, "%ld\n", atomic_long_read(&scx_hotplug_seq));
|
||||||
|
}
|
||||||
|
SCX_ATTR(hotplug_seq);
|
||||||
|
|
||||||
static struct attribute *scx_global_attrs[] = {
|
static struct attribute *scx_global_attrs[] = {
|
||||||
&scx_attr_state.attr,
|
&scx_attr_state.attr,
|
||||||
&scx_attr_switch_all.attr,
|
&scx_attr_switch_all.attr,
|
||||||
&scx_attr_nr_rejected.attr,
|
&scx_attr_nr_rejected.attr,
|
||||||
|
&scx_attr_hotplug_seq.attr,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -3941,6 +4046,25 @@ static struct kthread_worker *scx_create_rt_helper(const char *name)
|
|||||||
return helper;
|
return helper;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void check_hotplug_seq(const struct sched_ext_ops *ops)
|
||||||
|
{
|
||||||
|
unsigned long long global_hotplug_seq;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If a hotplug event has occurred between when a scheduler was
|
||||||
|
* initialized, and when we were able to attach, exit and notify user
|
||||||
|
* space about it.
|
||||||
|
*/
|
||||||
|
if (ops->hotplug_seq) {
|
||||||
|
global_hotplug_seq = atomic_long_read(&scx_hotplug_seq);
|
||||||
|
if (ops->hotplug_seq != global_hotplug_seq) {
|
||||||
|
scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
|
||||||
|
"expected hotplug seq %llu did not match actual %llu",
|
||||||
|
ops->hotplug_seq, global_hotplug_seq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int validate_ops(const struct sched_ext_ops *ops)
|
static int validate_ops(const struct sched_ext_ops *ops)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -4023,6 +4147,10 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = SCX_OPI_CPU_HOTPLUG_BEGIN; i < SCX_OPI_CPU_HOTPLUG_END; i++)
|
||||||
|
if (((void (**)(void))ops)[i])
|
||||||
|
static_branch_enable_cpuslocked(&scx_has_op[i]);
|
||||||
|
|
||||||
cpus_read_unlock();
|
cpus_read_unlock();
|
||||||
|
|
||||||
ret = validate_ops(ops);
|
ret = validate_ops(ops);
|
||||||
@ -4064,6 +4192,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
|||||||
percpu_down_write(&scx_fork_rwsem);
|
percpu_down_write(&scx_fork_rwsem);
|
||||||
cpus_read_lock();
|
cpus_read_lock();
|
||||||
|
|
||||||
|
check_hotplug_seq(ops);
|
||||||
|
|
||||||
for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++)
|
for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++)
|
||||||
if (((void (**)(void))ops)[i])
|
if (((void (**)(void))ops)[i])
|
||||||
static_branch_enable_cpuslocked(&scx_has_op[i]);
|
static_branch_enable_cpuslocked(&scx_has_op[i]);
|
||||||
@ -4374,6 +4504,9 @@ static int bpf_scx_init_member(const struct btf_type *t,
|
|||||||
ops->exit_dump_len =
|
ops->exit_dump_len =
|
||||||
*(u32 *)(udata + moff) ?: SCX_EXIT_DUMP_DFL_LEN;
|
*(u32 *)(udata + moff) ?: SCX_EXIT_DUMP_DFL_LEN;
|
||||||
return 1;
|
return 1;
|
||||||
|
case offsetof(struct sched_ext_ops, hotplug_seq):
|
||||||
|
ops->hotplug_seq = *(u64 *)(udata + moff);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -4387,6 +4520,8 @@ static int bpf_scx_check_member(const struct btf_type *t,
|
|||||||
|
|
||||||
switch (moff) {
|
switch (moff) {
|
||||||
case offsetof(struct sched_ext_ops, init_task):
|
case offsetof(struct sched_ext_ops, init_task):
|
||||||
|
case offsetof(struct sched_ext_ops, cpu_online):
|
||||||
|
case offsetof(struct sched_ext_ops, cpu_offline):
|
||||||
case offsetof(struct sched_ext_ops, init):
|
case offsetof(struct sched_ext_ops, init):
|
||||||
case offsetof(struct sched_ext_ops, exit):
|
case offsetof(struct sched_ext_ops, exit):
|
||||||
break;
|
break;
|
||||||
@ -4457,6 +4592,8 @@ static s32 init_task_stub(struct task_struct *p, struct scx_init_task_args *args
|
|||||||
static void exit_task_stub(struct task_struct *p, struct scx_exit_task_args *args) {}
|
static void exit_task_stub(struct task_struct *p, struct scx_exit_task_args *args) {}
|
||||||
static void enable_stub(struct task_struct *p) {}
|
static void enable_stub(struct task_struct *p) {}
|
||||||
static void disable_stub(struct task_struct *p) {}
|
static void disable_stub(struct task_struct *p) {}
|
||||||
|
static void cpu_online_stub(s32 cpu) {}
|
||||||
|
static void cpu_offline_stub(s32 cpu) {}
|
||||||
static s32 init_stub(void) { return -EINVAL; }
|
static s32 init_stub(void) { return -EINVAL; }
|
||||||
static void exit_stub(struct scx_exit_info *info) {}
|
static void exit_stub(struct scx_exit_info *info) {}
|
||||||
|
|
||||||
@ -4479,6 +4616,8 @@ static struct sched_ext_ops __bpf_ops_sched_ext_ops = {
|
|||||||
.exit_task = exit_task_stub,
|
.exit_task = exit_task_stub,
|
||||||
.enable = enable_stub,
|
.enable = enable_stub,
|
||||||
.disable = disable_stub,
|
.disable = disable_stub,
|
||||||
|
.cpu_online = cpu_online_stub,
|
||||||
|
.cpu_offline = cpu_offline_stub,
|
||||||
.init = init_stub,
|
.init = init_stub,
|
||||||
.exit = exit_stub,
|
.exit = exit_stub,
|
||||||
};
|
};
|
||||||
@ -4719,6 +4858,9 @@ void __init init_sched_ext_class(void)
|
|||||||
BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_preempt, GFP_KERNEL));
|
BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_preempt, GFP_KERNEL));
|
||||||
BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_wait, GFP_KERNEL));
|
BUG_ON(!zalloc_cpumask_var(&rq->scx.cpus_to_wait, GFP_KERNEL));
|
||||||
init_irq_work(&rq->scx.kick_cpus_irq_work, kick_cpus_irq_workfn);
|
init_irq_work(&rq->scx.kick_cpus_irq_work, kick_cpus_irq_workfn);
|
||||||
|
|
||||||
|
if (cpu_online(cpu))
|
||||||
|
cpu_rq(cpu)->scx.flags |= SCX_RQ_ONLINE;
|
||||||
}
|
}
|
||||||
|
|
||||||
register_sysrq_key('S', &sysrq_sched_ext_reset_op);
|
register_sysrq_key('S', &sysrq_sched_ext_reset_op);
|
||||||
|
@ -40,6 +40,8 @@ int scx_fork(struct task_struct *p);
|
|||||||
void scx_post_fork(struct task_struct *p);
|
void scx_post_fork(struct task_struct *p);
|
||||||
void scx_cancel_fork(struct task_struct *p);
|
void scx_cancel_fork(struct task_struct *p);
|
||||||
bool scx_can_stop_tick(struct rq *rq);
|
bool scx_can_stop_tick(struct rq *rq);
|
||||||
|
void scx_rq_activate(struct rq *rq);
|
||||||
|
void scx_rq_deactivate(struct rq *rq);
|
||||||
int scx_check_setscheduler(struct task_struct *p, int policy);
|
int scx_check_setscheduler(struct task_struct *p, int policy);
|
||||||
bool task_should_scx(struct task_struct *p);
|
bool task_should_scx(struct task_struct *p);
|
||||||
void init_sched_ext_class(void);
|
void init_sched_ext_class(void);
|
||||||
@ -81,6 +83,8 @@ static inline int scx_fork(struct task_struct *p) { return 0; }
|
|||||||
static inline void scx_post_fork(struct task_struct *p) {}
|
static inline void scx_post_fork(struct task_struct *p) {}
|
||||||
static inline void scx_cancel_fork(struct task_struct *p) {}
|
static inline void scx_cancel_fork(struct task_struct *p) {}
|
||||||
static inline bool scx_can_stop_tick(struct rq *rq) { return true; }
|
static inline bool scx_can_stop_tick(struct rq *rq) { return true; }
|
||||||
|
static inline void scx_rq_activate(struct rq *rq) {}
|
||||||
|
static inline void scx_rq_deactivate(struct rq *rq) {}
|
||||||
static inline int scx_check_setscheduler(struct task_struct *p, int policy) { return 0; }
|
static inline int scx_check_setscheduler(struct task_struct *p, int policy) { return 0; }
|
||||||
static inline bool task_on_scx(const struct task_struct *p) { return false; }
|
static inline bool task_on_scx(const struct task_struct *p) { return false; }
|
||||||
static inline void init_sched_ext_class(void) {}
|
static inline void init_sched_ext_class(void) {}
|
||||||
|
@ -726,6 +726,12 @@ struct cfs_rq {
|
|||||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||||
/* scx_rq->flags, protected by the rq lock */
|
/* scx_rq->flags, protected by the rq lock */
|
||||||
enum scx_rq_flags {
|
enum scx_rq_flags {
|
||||||
|
/*
|
||||||
|
* A hotplugged CPU starts scheduling before rq_online_scx(). Track
|
||||||
|
* ops.cpu_on/offline() state so that ops.enqueue/dispatch() are called
|
||||||
|
* only while the BPF scheduler considers the CPU to be online.
|
||||||
|
*/
|
||||||
|
SCX_RQ_ONLINE = 1 << 0,
|
||||||
SCX_RQ_BALANCING = 1 << 1,
|
SCX_RQ_BALANCING = 1 << 1,
|
||||||
SCX_RQ_CAN_STOP_TICK = 1 << 2,
|
SCX_RQ_CAN_STOP_TICK = 1 << 2,
|
||||||
};
|
};
|
||||||
|
@ -8,6 +8,9 @@
|
|||||||
#define __SCX_COMPAT_H
|
#define __SCX_COMPAT_H
|
||||||
|
|
||||||
#include <bpf/btf.h>
|
#include <bpf/btf.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
struct btf *__COMPAT_vmlinux_btf __attribute__((weak));
|
struct btf *__COMPAT_vmlinux_btf __attribute__((weak));
|
||||||
|
|
||||||
@ -106,6 +109,28 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field
|
|||||||
#define SCX_OPS_SWITCH_PARTIAL \
|
#define SCX_OPS_SWITCH_PARTIAL \
|
||||||
__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")
|
__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")
|
||||||
|
|
||||||
|
static inline long scx_hotplug_seq(void)
|
||||||
|
{
|
||||||
|
int fd;
|
||||||
|
char buf[32];
|
||||||
|
ssize_t len;
|
||||||
|
long val;
|
||||||
|
|
||||||
|
fd = open("/sys/kernel/sched_ext/hotplug_seq", O_RDONLY);
|
||||||
|
if (fd < 0)
|
||||||
|
return -ENOENT;
|
||||||
|
|
||||||
|
len = read(fd, buf, sizeof(buf) - 1);
|
||||||
|
SCX_BUG_ON(len <= 0, "read failed (%ld)", len);
|
||||||
|
buf[len] = 0;
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
val = strtoul(buf, NULL, 10);
|
||||||
|
SCX_BUG_ON(val < 0, "invalid num hotplug events: %lu", val);
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* struct sched_ext_ops can change over time. If compat.bpf.h::SCX_OPS_DEFINE()
|
* struct sched_ext_ops can change over time. If compat.bpf.h::SCX_OPS_DEFINE()
|
||||||
* is used to define ops and compat.h::SCX_OPS_LOAD/ATTACH() are used to load
|
* is used to define ops and compat.h::SCX_OPS_LOAD/ATTACH() are used to load
|
||||||
@ -123,6 +148,7 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field
|
|||||||
\
|
\
|
||||||
__skel = __scx_name##__open(); \
|
__skel = __scx_name##__open(); \
|
||||||
SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \
|
SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \
|
||||||
|
__skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \
|
||||||
__skel; \
|
__skel; \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -77,7 +77,35 @@ struct user_exit_info {
|
|||||||
if (__uei->msg[0] != '\0') \
|
if (__uei->msg[0] != '\0') \
|
||||||
fprintf(stderr, " (%s)", __uei->msg); \
|
fprintf(stderr, " (%s)", __uei->msg); \
|
||||||
fputs("\n", stderr); \
|
fputs("\n", stderr); \
|
||||||
|
__uei->exit_code; \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can't import vmlinux.h while compiling user C code. Let's duplicate
|
||||||
|
* scx_exit_code definition.
|
||||||
|
*/
|
||||||
|
enum scx_exit_code {
|
||||||
|
/* Reasons */
|
||||||
|
SCX_ECODE_RSN_HOTPLUG = 1LLU << 32,
|
||||||
|
|
||||||
|
/* Actions */
|
||||||
|
SCX_ECODE_ACT_RESTART = 1LLU << 48,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum uei_ecode_mask {
|
||||||
|
UEI_ECODE_USER_MASK = ((1LLU << 32) - 1),
|
||||||
|
UEI_ECODE_SYS_RSN_MASK = ((1LLU << 16) - 1) << 32,
|
||||||
|
UEI_ECODE_SYS_ACT_MASK = ((1LLU << 16) - 1) << 48,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* These macro interpret the ecode returned from UEI_REPORT().
|
||||||
|
*/
|
||||||
|
#define UEI_ECODE_USER(__ecode) ((__ecode) & UEI_ECODE_USER_MASK)
|
||||||
|
#define UEI_ECODE_SYS_RSN(__ecode) ((__ecode) & UEI_ECODE_SYS_RSN_MASK)
|
||||||
|
#define UEI_ECODE_SYS_ACT(__ecode) ((__ecode) & UEI_ECODE_SYS_ACT_MASK)
|
||||||
|
|
||||||
|
#define UEI_ECODE_RESTART(__ecode) (UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART)
|
||||||
|
|
||||||
#endif /* __bpf__ */
|
#endif /* __bpf__ */
|
||||||
#endif /* __USER_EXIT_INFO_H */
|
#endif /* __USER_EXIT_INFO_H */
|
||||||
|
@ -46,14 +46,14 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
struct scx_central *skel;
|
struct scx_central *skel;
|
||||||
struct bpf_link *link;
|
struct bpf_link *link;
|
||||||
__u64 seq = 0;
|
__u64 seq = 0, ecode;
|
||||||
__s32 opt;
|
__s32 opt;
|
||||||
cpu_set_t *cpuset;
|
cpu_set_t *cpuset;
|
||||||
|
|
||||||
libbpf_set_print(libbpf_print_fn);
|
libbpf_set_print(libbpf_print_fn);
|
||||||
signal(SIGINT, sigint_handler);
|
signal(SIGINT, sigint_handler);
|
||||||
signal(SIGTERM, sigint_handler);
|
signal(SIGTERM, sigint_handler);
|
||||||
|
restart:
|
||||||
skel = SCX_OPS_OPEN(central_ops, scx_central);
|
skel = SCX_OPS_OPEN(central_ops, scx_central);
|
||||||
|
|
||||||
skel->rodata->central_cpu = 0;
|
skel->rodata->central_cpu = 0;
|
||||||
@ -126,7 +126,10 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bpf_link__destroy(link);
|
bpf_link__destroy(link);
|
||||||
UEI_REPORT(skel, uei);
|
ecode = UEI_REPORT(skel, uei);
|
||||||
scx_central__destroy(skel);
|
scx_central__destroy(skel);
|
||||||
|
|
||||||
|
if (UEI_ECODE_RESTART(ecode))
|
||||||
|
goto restart;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -358,8 +358,63 @@ void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struc
|
|||||||
taskc->force_local);
|
taskc->force_local);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Print out the online and possible CPU map using bpf_printk() as a
|
||||||
|
* demonstration of using the cpumask kfuncs and ops.cpu_on/offline().
|
||||||
|
*/
|
||||||
|
static void print_cpus(void)
|
||||||
|
{
|
||||||
|
const struct cpumask *possible, *online;
|
||||||
|
s32 cpu;
|
||||||
|
char buf[128] = "", *p;
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
possible = scx_bpf_get_possible_cpumask();
|
||||||
|
online = scx_bpf_get_online_cpumask();
|
||||||
|
|
||||||
|
idx = 0;
|
||||||
|
bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) {
|
||||||
|
if (!(p = MEMBER_VPTR(buf, [idx++])))
|
||||||
|
break;
|
||||||
|
if (bpf_cpumask_test_cpu(cpu, online))
|
||||||
|
*p++ = 'O';
|
||||||
|
else if (bpf_cpumask_test_cpu(cpu, possible))
|
||||||
|
*p++ = 'X';
|
||||||
|
else
|
||||||
|
*p++ = ' ';
|
||||||
|
|
||||||
|
if ((cpu & 7) == 7) {
|
||||||
|
if (!(p = MEMBER_VPTR(buf, [idx++])))
|
||||||
|
break;
|
||||||
|
*p++ = '|';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf[sizeof(buf) - 1] = '\0';
|
||||||
|
|
||||||
|
scx_bpf_put_cpumask(online);
|
||||||
|
scx_bpf_put_cpumask(possible);
|
||||||
|
|
||||||
|
bpf_printk("CPUS: |%s", buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BPF_STRUCT_OPS(qmap_cpu_online, s32 cpu)
|
||||||
|
{
|
||||||
|
bpf_printk("CPU %d coming online", cpu);
|
||||||
|
/* @cpu is already online at this point */
|
||||||
|
print_cpus();
|
||||||
|
}
|
||||||
|
|
||||||
|
void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu)
|
||||||
|
{
|
||||||
|
bpf_printk("CPU %d going offline", cpu);
|
||||||
|
/* @cpu is still online at this point */
|
||||||
|
print_cpus();
|
||||||
|
}
|
||||||
|
|
||||||
s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
|
s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
|
||||||
{
|
{
|
||||||
|
print_cpus();
|
||||||
|
|
||||||
return scx_bpf_create_dsq(SHARED_DSQ, -1);
|
return scx_bpf_create_dsq(SHARED_DSQ, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -378,6 +433,8 @@ SCX_OPS_DEFINE(qmap_ops,
|
|||||||
.dump = (void *)qmap_dump,
|
.dump = (void *)qmap_dump,
|
||||||
.dump_cpu = (void *)qmap_dump_cpu,
|
.dump_cpu = (void *)qmap_dump_cpu,
|
||||||
.dump_task = (void *)qmap_dump_task,
|
.dump_task = (void *)qmap_dump_task,
|
||||||
|
.cpu_online = (void *)qmap_cpu_online,
|
||||||
|
.cpu_offline = (void *)qmap_cpu_offline,
|
||||||
.init = (void *)qmap_init,
|
.init = (void *)qmap_init,
|
||||||
.exit = (void *)qmap_exit,
|
.exit = (void *)qmap_exit,
|
||||||
.timeout_ms = 5000U,
|
.timeout_ms = 5000U,
|
||||||
|
@ -122,5 +122,9 @@ int main(int argc, char **argv)
|
|||||||
bpf_link__destroy(link);
|
bpf_link__destroy(link);
|
||||||
UEI_REPORT(skel, uei);
|
UEI_REPORT(skel, uei);
|
||||||
scx_qmap__destroy(skel);
|
scx_qmap__destroy(skel);
|
||||||
|
/*
|
||||||
|
* scx_qmap implements ops.cpu_on/offline() and doesn't need to restart
|
||||||
|
* on CPU hotplug events.
|
||||||
|
*/
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -62,11 +62,12 @@ int main(int argc, char **argv)
|
|||||||
struct scx_simple *skel;
|
struct scx_simple *skel;
|
||||||
struct bpf_link *link;
|
struct bpf_link *link;
|
||||||
__u32 opt;
|
__u32 opt;
|
||||||
|
__u64 ecode;
|
||||||
|
|
||||||
libbpf_set_print(libbpf_print_fn);
|
libbpf_set_print(libbpf_print_fn);
|
||||||
signal(SIGINT, sigint_handler);
|
signal(SIGINT, sigint_handler);
|
||||||
signal(SIGTERM, sigint_handler);
|
signal(SIGTERM, sigint_handler);
|
||||||
|
restart:
|
||||||
skel = SCX_OPS_OPEN(simple_ops, scx_simple);
|
skel = SCX_OPS_OPEN(simple_ops, scx_simple);
|
||||||
|
|
||||||
while ((opt = getopt(argc, argv, "vh")) != -1) {
|
while ((opt = getopt(argc, argv, "vh")) != -1) {
|
||||||
@ -93,7 +94,10 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bpf_link__destroy(link);
|
bpf_link__destroy(link);
|
||||||
UEI_REPORT(skel, uei);
|
ecode = UEI_REPORT(skel, uei);
|
||||||
scx_simple__destroy(skel);
|
scx_simple__destroy(skel);
|
||||||
|
|
||||||
|
if (UEI_ECODE_RESTART(ecode))
|
||||||
|
goto restart;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user