mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 02:05:33 +00:00
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git
This commit is contained in:
commit
965fa966c8
@ -242,9 +242,9 @@ The following briefly shows how a waking task is scheduled and executed.
|
||||
task was inserted directly from ``ops.select_cpu()``). ``ops.enqueue()``
|
||||
can make one of the following decisions:
|
||||
|
||||
* Immediately insert the task into either the global or local DSQ by
|
||||
calling ``scx_bpf_dsq_insert()`` with ``SCX_DSQ_GLOBAL`` or
|
||||
``SCX_DSQ_LOCAL``, respectively.
|
||||
* Immediately insert the task into either the global or a local DSQ by
|
||||
calling ``scx_bpf_dsq_insert()`` with one of the following options:
|
||||
``SCX_DSQ_GLOBAL``, ``SCX_DSQ_LOCAL``, or ``SCX_DSQ_LOCAL_ON | cpu``.
|
||||
|
||||
* Immediately insert the task into a custom DSQ by calling
|
||||
``scx_bpf_dsq_insert()`` with a DSQ ID which is smaller than 2^63.
|
||||
|
@ -791,6 +791,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
|
||||
void update_rq_clock(struct rq *rq)
|
||||
{
|
||||
s64 delta;
|
||||
u64 clock;
|
||||
|
||||
lockdep_assert_rq_held(rq);
|
||||
|
||||
@ -802,11 +803,14 @@ void update_rq_clock(struct rq *rq)
|
||||
SCHED_WARN_ON(rq->clock_update_flags & RQCF_UPDATED);
|
||||
rq->clock_update_flags |= RQCF_UPDATED;
|
||||
#endif
|
||||
clock = sched_clock_cpu(cpu_of(rq));
|
||||
scx_rq_clock_update(rq, clock);
|
||||
|
||||
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
|
||||
delta = clock - rq->clock;
|
||||
if (delta < 0)
|
||||
return;
|
||||
rq->clock += delta;
|
||||
|
||||
update_rq_clock_task(rq, delta);
|
||||
}
|
||||
|
||||
|
@ -206,7 +206,7 @@ struct scx_dump_ctx {
|
||||
*/
|
||||
struct sched_ext_ops {
|
||||
/**
|
||||
* select_cpu - Pick the target CPU for a task which is being woken up
|
||||
* @select_cpu: Pick the target CPU for a task which is being woken up
|
||||
* @p: task being woken up
|
||||
* @prev_cpu: the cpu @p was on before sleeping
|
||||
* @wake_flags: SCX_WAKE_*
|
||||
@ -233,7 +233,7 @@ struct sched_ext_ops {
|
||||
s32 (*select_cpu)(struct task_struct *p, s32 prev_cpu, u64 wake_flags);
|
||||
|
||||
/**
|
||||
* enqueue - Enqueue a task on the BPF scheduler
|
||||
* @enqueue: Enqueue a task on the BPF scheduler
|
||||
* @p: task being enqueued
|
||||
* @enq_flags: %SCX_ENQ_*
|
||||
*
|
||||
@ -248,7 +248,7 @@ struct sched_ext_ops {
|
||||
void (*enqueue)(struct task_struct *p, u64 enq_flags);
|
||||
|
||||
/**
|
||||
* dequeue - Remove a task from the BPF scheduler
|
||||
* @dequeue: Remove a task from the BPF scheduler
|
||||
* @p: task being dequeued
|
||||
* @deq_flags: %SCX_DEQ_*
|
||||
*
|
||||
@ -264,7 +264,7 @@ struct sched_ext_ops {
|
||||
void (*dequeue)(struct task_struct *p, u64 deq_flags);
|
||||
|
||||
/**
|
||||
* dispatch - Dispatch tasks from the BPF scheduler and/or user DSQs
|
||||
* @dispatch: Dispatch tasks from the BPF scheduler and/or user DSQs
|
||||
* @cpu: CPU to dispatch tasks for
|
||||
* @prev: previous task being switched out
|
||||
*
|
||||
@ -287,7 +287,7 @@ struct sched_ext_ops {
|
||||
void (*dispatch)(s32 cpu, struct task_struct *prev);
|
||||
|
||||
/**
|
||||
* tick - Periodic tick
|
||||
* @tick: Periodic tick
|
||||
* @p: task running currently
|
||||
*
|
||||
* This operation is called every 1/HZ seconds on CPUs which are
|
||||
@ -297,7 +297,7 @@ struct sched_ext_ops {
|
||||
void (*tick)(struct task_struct *p);
|
||||
|
||||
/**
|
||||
* runnable - A task is becoming runnable on its associated CPU
|
||||
* @runnable: A task is becoming runnable on its associated CPU
|
||||
* @p: task becoming runnable
|
||||
* @enq_flags: %SCX_ENQ_*
|
||||
*
|
||||
@ -324,7 +324,7 @@ struct sched_ext_ops {
|
||||
void (*runnable)(struct task_struct *p, u64 enq_flags);
|
||||
|
||||
/**
|
||||
* running - A task is starting to run on its associated CPU
|
||||
* @running: A task is starting to run on its associated CPU
|
||||
* @p: task starting to run
|
||||
*
|
||||
* See ->runnable() for explanation on the task state notifiers.
|
||||
@ -332,7 +332,7 @@ struct sched_ext_ops {
|
||||
void (*running)(struct task_struct *p);
|
||||
|
||||
/**
|
||||
* stopping - A task is stopping execution
|
||||
* @stopping: A task is stopping execution
|
||||
* @p: task stopping to run
|
||||
* @runnable: is task @p still runnable?
|
||||
*
|
||||
@ -343,7 +343,7 @@ struct sched_ext_ops {
|
||||
void (*stopping)(struct task_struct *p, bool runnable);
|
||||
|
||||
/**
|
||||
* quiescent - A task is becoming not runnable on its associated CPU
|
||||
* @quiescent: A task is becoming not runnable on its associated CPU
|
||||
* @p: task becoming not runnable
|
||||
* @deq_flags: %SCX_DEQ_*
|
||||
*
|
||||
@ -363,7 +363,7 @@ struct sched_ext_ops {
|
||||
void (*quiescent)(struct task_struct *p, u64 deq_flags);
|
||||
|
||||
/**
|
||||
* yield - Yield CPU
|
||||
* @yield: Yield CPU
|
||||
* @from: yielding task
|
||||
* @to: optional yield target task
|
||||
*
|
||||
@ -378,7 +378,7 @@ struct sched_ext_ops {
|
||||
bool (*yield)(struct task_struct *from, struct task_struct *to);
|
||||
|
||||
/**
|
||||
* core_sched_before - Task ordering for core-sched
|
||||
* @core_sched_before: Task ordering for core-sched
|
||||
* @a: task A
|
||||
* @b: task B
|
||||
*
|
||||
@ -396,7 +396,7 @@ struct sched_ext_ops {
|
||||
bool (*core_sched_before)(struct task_struct *a, struct task_struct *b);
|
||||
|
||||
/**
|
||||
* set_weight - Set task weight
|
||||
* @set_weight: Set task weight
|
||||
* @p: task to set weight for
|
||||
* @weight: new weight [1..10000]
|
||||
*
|
||||
@ -405,7 +405,7 @@ struct sched_ext_ops {
|
||||
void (*set_weight)(struct task_struct *p, u32 weight);
|
||||
|
||||
/**
|
||||
* set_cpumask - Set CPU affinity
|
||||
* @set_cpumask: Set CPU affinity
|
||||
* @p: task to set CPU affinity for
|
||||
* @cpumask: cpumask of cpus that @p can run on
|
||||
*
|
||||
@ -415,7 +415,7 @@ struct sched_ext_ops {
|
||||
const struct cpumask *cpumask);
|
||||
|
||||
/**
|
||||
* update_idle - Update the idle state of a CPU
|
||||
* @update_idle: Update the idle state of a CPU
|
||||
* @cpu: CPU to udpate the idle state for
|
||||
* @idle: whether entering or exiting the idle state
|
||||
*
|
||||
@ -436,7 +436,7 @@ struct sched_ext_ops {
|
||||
void (*update_idle)(s32 cpu, bool idle);
|
||||
|
||||
/**
|
||||
* cpu_acquire - A CPU is becoming available to the BPF scheduler
|
||||
* @cpu_acquire: A CPU is becoming available to the BPF scheduler
|
||||
* @cpu: The CPU being acquired by the BPF scheduler.
|
||||
* @args: Acquire arguments, see the struct definition.
|
||||
*
|
||||
@ -446,7 +446,7 @@ struct sched_ext_ops {
|
||||
void (*cpu_acquire)(s32 cpu, struct scx_cpu_acquire_args *args);
|
||||
|
||||
/**
|
||||
* cpu_release - A CPU is taken away from the BPF scheduler
|
||||
* @cpu_release: A CPU is taken away from the BPF scheduler
|
||||
* @cpu: The CPU being released by the BPF scheduler.
|
||||
* @args: Release arguments, see the struct definition.
|
||||
*
|
||||
@ -458,7 +458,7 @@ struct sched_ext_ops {
|
||||
void (*cpu_release)(s32 cpu, struct scx_cpu_release_args *args);
|
||||
|
||||
/**
|
||||
* init_task - Initialize a task to run in a BPF scheduler
|
||||
* @init_task: Initialize a task to run in a BPF scheduler
|
||||
* @p: task to initialize for BPF scheduling
|
||||
* @args: init arguments, see the struct definition
|
||||
*
|
||||
@ -473,8 +473,9 @@ struct sched_ext_ops {
|
||||
s32 (*init_task)(struct task_struct *p, struct scx_init_task_args *args);
|
||||
|
||||
/**
|
||||
* exit_task - Exit a previously-running task from the system
|
||||
* @exit_task: Exit a previously-running task from the system
|
||||
* @p: task to exit
|
||||
* @args: exit arguments, see the struct definition
|
||||
*
|
||||
* @p is exiting or the BPF scheduler is being unloaded. Perform any
|
||||
* necessary cleanup for @p.
|
||||
@ -482,7 +483,7 @@ struct sched_ext_ops {
|
||||
void (*exit_task)(struct task_struct *p, struct scx_exit_task_args *args);
|
||||
|
||||
/**
|
||||
* enable - Enable BPF scheduling for a task
|
||||
* @enable: Enable BPF scheduling for a task
|
||||
* @p: task to enable BPF scheduling for
|
||||
*
|
||||
* Enable @p for BPF scheduling. enable() is called on @p any time it
|
||||
@ -491,7 +492,7 @@ struct sched_ext_ops {
|
||||
void (*enable)(struct task_struct *p);
|
||||
|
||||
/**
|
||||
* disable - Disable BPF scheduling for a task
|
||||
* @disable: Disable BPF scheduling for a task
|
||||
* @p: task to disable BPF scheduling for
|
||||
*
|
||||
* @p is exiting, leaving SCX or the BPF scheduler is being unloaded.
|
||||
@ -501,7 +502,7 @@ struct sched_ext_ops {
|
||||
void (*disable)(struct task_struct *p);
|
||||
|
||||
/**
|
||||
* dump - Dump BPF scheduler state on error
|
||||
* @dump: Dump BPF scheduler state on error
|
||||
* @ctx: debug dump context
|
||||
*
|
||||
* Use scx_bpf_dump() to generate BPF scheduler specific debug dump.
|
||||
@ -509,7 +510,7 @@ struct sched_ext_ops {
|
||||
void (*dump)(struct scx_dump_ctx *ctx);
|
||||
|
||||
/**
|
||||
* dump_cpu - Dump BPF scheduler state for a CPU on error
|
||||
* @dump_cpu: Dump BPF scheduler state for a CPU on error
|
||||
* @ctx: debug dump context
|
||||
* @cpu: CPU to generate debug dump for
|
||||
* @idle: @cpu is currently idle without any runnable tasks
|
||||
@ -521,7 +522,7 @@ struct sched_ext_ops {
|
||||
void (*dump_cpu)(struct scx_dump_ctx *ctx, s32 cpu, bool idle);
|
||||
|
||||
/**
|
||||
* dump_task - Dump BPF scheduler state for a runnable task on error
|
||||
* @dump_task: Dump BPF scheduler state for a runnable task on error
|
||||
* @ctx: debug dump context
|
||||
* @p: runnable task to generate debug dump for
|
||||
*
|
||||
@ -532,7 +533,7 @@ struct sched_ext_ops {
|
||||
|
||||
#ifdef CONFIG_EXT_GROUP_SCHED
|
||||
/**
|
||||
* cgroup_init - Initialize a cgroup
|
||||
* @cgroup_init: Initialize a cgroup
|
||||
* @cgrp: cgroup being initialized
|
||||
* @args: init arguments, see the struct definition
|
||||
*
|
||||
@ -547,7 +548,7 @@ struct sched_ext_ops {
|
||||
struct scx_cgroup_init_args *args);
|
||||
|
||||
/**
|
||||
* cgroup_exit - Exit a cgroup
|
||||
* @cgroup_exit: Exit a cgroup
|
||||
* @cgrp: cgroup being exited
|
||||
*
|
||||
* Either the BPF scheduler is being unloaded or @cgrp destroyed, exit
|
||||
@ -556,7 +557,7 @@ struct sched_ext_ops {
|
||||
void (*cgroup_exit)(struct cgroup *cgrp);
|
||||
|
||||
/**
|
||||
* cgroup_prep_move - Prepare a task to be moved to a different cgroup
|
||||
* @cgroup_prep_move: Prepare a task to be moved to a different cgroup
|
||||
* @p: task being moved
|
||||
* @from: cgroup @p is being moved from
|
||||
* @to: cgroup @p is being moved to
|
||||
@ -571,7 +572,7 @@ struct sched_ext_ops {
|
||||
struct cgroup *from, struct cgroup *to);
|
||||
|
||||
/**
|
||||
* cgroup_move - Commit cgroup move
|
||||
* @cgroup_move: Commit cgroup move
|
||||
* @p: task being moved
|
||||
* @from: cgroup @p is being moved from
|
||||
* @to: cgroup @p is being moved to
|
||||
@ -582,7 +583,7 @@ struct sched_ext_ops {
|
||||
struct cgroup *from, struct cgroup *to);
|
||||
|
||||
/**
|
||||
* cgroup_cancel_move - Cancel cgroup move
|
||||
* @cgroup_cancel_move: Cancel cgroup move
|
||||
* @p: task whose cgroup move is being canceled
|
||||
* @from: cgroup @p was being moved from
|
||||
* @to: cgroup @p was being moved to
|
||||
@ -594,7 +595,7 @@ struct sched_ext_ops {
|
||||
struct cgroup *from, struct cgroup *to);
|
||||
|
||||
/**
|
||||
* cgroup_set_weight - A cgroup's weight is being changed
|
||||
* @cgroup_set_weight: A cgroup's weight is being changed
|
||||
* @cgrp: cgroup whose weight is being updated
|
||||
* @weight: new weight [1..10000]
|
||||
*
|
||||
@ -608,7 +609,7 @@ struct sched_ext_ops {
|
||||
*/
|
||||
|
||||
/**
|
||||
* cpu_online - A CPU became online
|
||||
* @cpu_online: A CPU became online
|
||||
* @cpu: CPU which just came up
|
||||
*
|
||||
* @cpu just came online. @cpu will not call ops.enqueue() or
|
||||
@ -617,7 +618,7 @@ struct sched_ext_ops {
|
||||
void (*cpu_online)(s32 cpu);
|
||||
|
||||
/**
|
||||
* cpu_offline - A CPU is going offline
|
||||
* @cpu_offline: A CPU is going offline
|
||||
* @cpu: CPU which is going offline
|
||||
*
|
||||
* @cpu is going offline. @cpu will not call ops.enqueue() or
|
||||
@ -630,12 +631,12 @@ struct sched_ext_ops {
|
||||
*/
|
||||
|
||||
/**
|
||||
* init - Initialize the BPF scheduler
|
||||
* @init: Initialize the BPF scheduler
|
||||
*/
|
||||
s32 (*init)(void);
|
||||
|
||||
/**
|
||||
* exit - Clean up after the BPF scheduler
|
||||
* @exit: Clean up after the BPF scheduler
|
||||
* @info: Exit info
|
||||
*
|
||||
* ops.exit() is also called on ops.init() failure, which is a bit
|
||||
@ -645,17 +646,17 @@ struct sched_ext_ops {
|
||||
void (*exit)(struct scx_exit_info *info);
|
||||
|
||||
/**
|
||||
* dispatch_max_batch - Max nr of tasks that dispatch() can dispatch
|
||||
* @dispatch_max_batch: Max nr of tasks that dispatch() can dispatch
|
||||
*/
|
||||
u32 dispatch_max_batch;
|
||||
|
||||
/**
|
||||
* flags - %SCX_OPS_* flags
|
||||
* @flags: %SCX_OPS_* flags
|
||||
*/
|
||||
u64 flags;
|
||||
|
||||
/**
|
||||
* timeout_ms - The maximum amount of time, in milliseconds, that a
|
||||
* @timeout_ms: The maximum amount of time, in milliseconds, that a
|
||||
* runnable task should be able to wait before being scheduled. The
|
||||
* maximum timeout may not exceed the default timeout of 30 seconds.
|
||||
*
|
||||
@ -664,13 +665,13 @@ struct sched_ext_ops {
|
||||
u32 timeout_ms;
|
||||
|
||||
/**
|
||||
* exit_dump_len - scx_exit_info.dump buffer length. If 0, the default
|
||||
* @exit_dump_len: scx_exit_info.dump buffer length. If 0, the default
|
||||
* value of 32768 is used.
|
||||
*/
|
||||
u32 exit_dump_len;
|
||||
|
||||
/**
|
||||
* hotplug_seq - A sequence number that may be set by the scheduler to
|
||||
* @hotplug_seq: A sequence number that may be set by the scheduler to
|
||||
* detect when a hotplug event has occurred during the loading process.
|
||||
* If 0, no detection occurs. Otherwise, the scheduler will fail to
|
||||
* load if the sequence number does not match @scx_hotplug_seq on the
|
||||
@ -679,7 +680,7 @@ struct sched_ext_ops {
|
||||
u64 hotplug_seq;
|
||||
|
||||
/**
|
||||
* name - BPF scheduler's name
|
||||
* @name: BPF scheduler's name
|
||||
*
|
||||
* Must be a non-zero valid BPF object name including only isalnum(),
|
||||
* '_' and '.' chars. Shows up in kernel.sched_ext_ops sysctl while the
|
||||
@ -960,7 +961,7 @@ static DEFINE_PER_CPU(struct task_struct *, direct_dispatch_task);
|
||||
static struct scx_dispatch_q **global_dsqs;
|
||||
|
||||
static const struct rhashtable_params dsq_hash_params = {
|
||||
.key_len = 8,
|
||||
.key_len = sizeof_field(struct scx_dispatch_q, id),
|
||||
.key_offset = offsetof(struct scx_dispatch_q, id),
|
||||
.head_offset = offsetof(struct scx_dispatch_q, hash_node),
|
||||
};
|
||||
@ -1408,7 +1409,6 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
|
||||
/**
|
||||
* scx_task_iter_next_locked - Next non-idle task with its rq locked
|
||||
* @iter: iterator to walk
|
||||
* @include_dead: Whether we should include dead tasks in the iteration
|
||||
*
|
||||
* Visit the non-idle task with its rq lock held. Allows callers to specify
|
||||
* whether they would like to filter out dead tasks. See scx_task_iter_start()
|
||||
@ -3136,6 +3136,7 @@ static struct task_struct *pick_task_scx(struct rq *rq)
|
||||
* scx_prio_less - Task ordering for core-sched
|
||||
* @a: task A
|
||||
* @b: task B
|
||||
* @in_fi: in forced idle state
|
||||
*
|
||||
* Core-sched is implemented as an additional scheduling layer on top of the
|
||||
* usual sched_class'es and needs to find out the expected task ordering. For
|
||||
@ -3184,6 +3185,10 @@ static bool test_and_clear_cpu_idle(int cpu)
|
||||
* scx_pick_idle_cpu() can get caught in an infinite loop as
|
||||
* @cpu is never cleared from idle_masks.smt. Ensure that @cpu
|
||||
* is eventually cleared.
|
||||
*
|
||||
* NOTE: Use cpumask_intersects() and cpumask_test_cpu() to
|
||||
* reduce memory writes, which may help alleviate cache
|
||||
* coherence pressure.
|
||||
*/
|
||||
if (cpumask_intersects(smt, idle_masks.smt))
|
||||
cpumask_andnot(idle_masks.smt, idle_masks.smt, smt);
|
||||
@ -3219,6 +3224,74 @@ found:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the amount of CPUs in the same LLC domain of @cpu (or zero if the LLC
|
||||
* domain is not defined).
|
||||
*/
|
||||
static unsigned int llc_weight(s32 cpu)
|
||||
{
|
||||
struct sched_domain *sd;
|
||||
|
||||
sd = rcu_dereference(per_cpu(sd_llc, cpu));
|
||||
if (!sd)
|
||||
return 0;
|
||||
|
||||
return sd->span_weight;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the cpumask representing the LLC domain of @cpu (or NULL if the LLC
|
||||
* domain is not defined).
|
||||
*/
|
||||
static struct cpumask *llc_span(s32 cpu)
|
||||
{
|
||||
struct sched_domain *sd;
|
||||
|
||||
sd = rcu_dereference(per_cpu(sd_llc, cpu));
|
||||
if (!sd)
|
||||
return 0;
|
||||
|
||||
return sched_domain_span(sd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the amount of CPUs in the same NUMA domain of @cpu (or zero if the
|
||||
* NUMA domain is not defined).
|
||||
*/
|
||||
static unsigned int numa_weight(s32 cpu)
|
||||
{
|
||||
struct sched_domain *sd;
|
||||
struct sched_group *sg;
|
||||
|
||||
sd = rcu_dereference(per_cpu(sd_numa, cpu));
|
||||
if (!sd)
|
||||
return 0;
|
||||
sg = sd->groups;
|
||||
if (!sg)
|
||||
return 0;
|
||||
|
||||
return sg->group_weight;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the cpumask representing the NUMA domain of @cpu (or NULL if the NUMA
|
||||
* domain is not defined).
|
||||
*/
|
||||
static struct cpumask *numa_span(s32 cpu)
|
||||
{
|
||||
struct sched_domain *sd;
|
||||
struct sched_group *sg;
|
||||
|
||||
sd = rcu_dereference(per_cpu(sd_numa, cpu));
|
||||
if (!sd)
|
||||
return NULL;
|
||||
sg = sd->groups;
|
||||
if (!sg)
|
||||
return NULL;
|
||||
|
||||
return sched_group_span(sg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the LLC domains do not perfectly overlap with the NUMA
|
||||
* domains, false otherwise.
|
||||
@ -3250,19 +3323,10 @@ static bool llc_numa_mismatch(void)
|
||||
* overlapping, which is incorrect (as NUMA 1 has two distinct LLC
|
||||
* domains).
|
||||
*/
|
||||
for_each_online_cpu(cpu) {
|
||||
const struct cpumask *numa_cpus;
|
||||
struct sched_domain *sd;
|
||||
|
||||
sd = rcu_dereference(per_cpu(sd_llc, cpu));
|
||||
if (!sd)
|
||||
for_each_online_cpu(cpu)
|
||||
if (llc_weight(cpu) != numa_weight(cpu))
|
||||
return true;
|
||||
|
||||
numa_cpus = cpumask_of_node(cpu_to_node(cpu));
|
||||
if (sd->span_weight != cpumask_weight(numa_cpus))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -3280,8 +3344,7 @@ static bool llc_numa_mismatch(void)
|
||||
static void update_selcpu_topology(void)
|
||||
{
|
||||
bool enable_llc = false, enable_numa = false;
|
||||
struct sched_domain *sd;
|
||||
const struct cpumask *cpus;
|
||||
unsigned int nr_cpus;
|
||||
s32 cpu = cpumask_first(cpu_online_mask);
|
||||
|
||||
/*
|
||||
@ -3295,10 +3358,12 @@ static void update_selcpu_topology(void)
|
||||
* CPUs.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
sd = rcu_dereference(per_cpu(sd_llc, cpu));
|
||||
if (sd) {
|
||||
if (sd->span_weight < num_online_cpus())
|
||||
nr_cpus = llc_weight(cpu);
|
||||
if (nr_cpus > 0) {
|
||||
if (nr_cpus < num_online_cpus())
|
||||
enable_llc = true;
|
||||
pr_debug("sched_ext: LLC=%*pb weight=%u\n",
|
||||
cpumask_pr_args(llc_span(cpu)), llc_weight(cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3310,15 +3375,19 @@ static void update_selcpu_topology(void)
|
||||
* enabling both NUMA and LLC optimizations is unnecessary, as checking
|
||||
* for an idle CPU in the same domain twice is redundant.
|
||||
*/
|
||||
cpus = cpumask_of_node(cpu_to_node(cpu));
|
||||
if ((cpumask_weight(cpus) < num_online_cpus()) && llc_numa_mismatch())
|
||||
enable_numa = true;
|
||||
nr_cpus = numa_weight(cpu);
|
||||
if (nr_cpus > 0) {
|
||||
if (nr_cpus < num_online_cpus() && llc_numa_mismatch())
|
||||
enable_numa = true;
|
||||
pr_debug("sched_ext: NUMA=%*pb weight=%u\n",
|
||||
cpumask_pr_args(numa_span(cpu)), numa_weight(cpu));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
pr_debug("sched_ext: LLC idle selection %s\n",
|
||||
enable_llc ? "enabled" : "disabled");
|
||||
str_enabled_disabled(enable_llc));
|
||||
pr_debug("sched_ext: NUMA idle selection %s\n",
|
||||
enable_numa ? "enabled" : "disabled");
|
||||
str_enabled_disabled(enable_numa));
|
||||
|
||||
if (enable_llc)
|
||||
static_branch_enable_cpuslocked(&scx_selcpu_topo_llc);
|
||||
@ -3348,6 +3417,8 @@ static void update_selcpu_topology(void)
|
||||
* 4. Pick a CPU within the same NUMA node, if enabled:
|
||||
* - choose a CPU from the same NUMA node to reduce memory access latency.
|
||||
*
|
||||
* 5. Pick any idle CPU usable by the task.
|
||||
*
|
||||
* Step 3 and 4 are performed only if the system has, respectively, multiple
|
||||
* LLC domains / multiple NUMA nodes (see scx_selcpu_topo_llc and
|
||||
* scx_selcpu_topo_numa).
|
||||
@ -3364,7 +3435,6 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
|
||||
|
||||
*found = false;
|
||||
|
||||
|
||||
/*
|
||||
* This is necessary to protect llc_cpus.
|
||||
*/
|
||||
@ -3383,15 +3453,10 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
|
||||
*/
|
||||
if (p->nr_cpus_allowed >= num_possible_cpus()) {
|
||||
if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa))
|
||||
numa_cpus = cpumask_of_node(cpu_to_node(prev_cpu));
|
||||
numa_cpus = numa_span(prev_cpu);
|
||||
|
||||
if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) {
|
||||
struct sched_domain *sd;
|
||||
|
||||
sd = rcu_dereference(per_cpu(sd_llc, prev_cpu));
|
||||
if (sd)
|
||||
llc_cpus = sched_domain_span(sd);
|
||||
}
|
||||
if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc))
|
||||
llc_cpus = llc_span(prev_cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3592,10 +3657,7 @@ static void reset_idle_masks(void)
|
||||
|
||||
static void update_builtin_idle(int cpu, bool idle)
|
||||
{
|
||||
if (idle)
|
||||
cpumask_set_cpu(cpu, idle_masks.cpu);
|
||||
else
|
||||
cpumask_clear_cpu(cpu, idle_masks.cpu);
|
||||
assign_cpu(cpu, idle_masks.cpu, idle);
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
if (sched_smt_active()) {
|
||||
@ -3606,10 +3668,8 @@ static void update_builtin_idle(int cpu, bool idle)
|
||||
* idle_masks.smt handling is racy but that's fine as
|
||||
* it's only for optimization and self-correcting.
|
||||
*/
|
||||
for_each_cpu(cpu, smt) {
|
||||
if (!cpumask_test_cpu(cpu, idle_masks.cpu))
|
||||
return;
|
||||
}
|
||||
if (!cpumask_subset(smt, idle_masks.cpu))
|
||||
return;
|
||||
cpumask_or(idle_masks.smt, idle_masks.smt, smt);
|
||||
} else {
|
||||
cpumask_andnot(idle_masks.smt, idle_masks.smt, smt);
|
||||
@ -4688,6 +4748,7 @@ bool task_should_scx(int policy)
|
||||
|
||||
/**
|
||||
* scx_softlockup - sched_ext softlockup handler
|
||||
* @dur_s: number of seconds of CPU stuck due to soft lockup
|
||||
*
|
||||
* On some multi-socket setups (e.g. 2x Intel 8480c), the BPF scheduler can
|
||||
* live-lock the system by making many CPUs target the same DSQ to the point
|
||||
@ -4731,6 +4792,7 @@ static void scx_clear_softlockup(void)
|
||||
|
||||
/**
|
||||
* scx_ops_bypass - [Un]bypass scx_ops and guarantee forward progress
|
||||
* @bypass: true for bypass, false for unbypass
|
||||
*
|
||||
* Bypassing guarantees that all runnable tasks make forward progress without
|
||||
* trusting the BPF scheduler. We can't grab any mutexes or rwsems as they might
|
||||
@ -4899,7 +4961,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
|
||||
struct task_struct *p;
|
||||
struct rhashtable_iter rht_iter;
|
||||
struct scx_dispatch_q *dsq;
|
||||
int i, kind;
|
||||
int i, kind, cpu;
|
||||
|
||||
kind = atomic_read(&scx_exit_kind);
|
||||
while (true) {
|
||||
@ -4982,6 +5044,15 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
|
||||
scx_task_iter_stop(&sti);
|
||||
percpu_up_write(&scx_fork_rwsem);
|
||||
|
||||
/*
|
||||
* Invalidate all the rq clocks to prevent getting outdated
|
||||
* rq clocks from a previous scx scheduler.
|
||||
*/
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
scx_rq_clock_invalidate(rq);
|
||||
}
|
||||
|
||||
/* no task is on scx, turn off all the switches and flush in-progress calls */
|
||||
static_branch_disable(&__scx_ops_enabled);
|
||||
for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++)
|
||||
@ -5206,9 +5277,9 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx,
|
||||
scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK,
|
||||
p->scx.dsq_flags, ops_state & SCX_OPSS_STATE_MASK,
|
||||
ops_state >> SCX_OPSS_QSEQ_SHIFT);
|
||||
dump_line(s, " sticky/holding_cpu=%d/%d dsq_id=%s dsq_vtime=%llu",
|
||||
dump_line(s, " sticky/holding_cpu=%d/%d dsq_id=%s dsq_vtime=%llu slice=%llu",
|
||||
p->scx.sticky_cpu, p->scx.holding_cpu, dsq_id_buf,
|
||||
p->scx.dsq_vtime);
|
||||
p->scx.dsq_vtime, p->scx.slice);
|
||||
dump_line(s, " cpus=%*pb", cpumask_pr_args(p->cpus_ptr));
|
||||
|
||||
if (SCX_HAS_OP(dump_task)) {
|
||||
@ -6283,6 +6354,15 @@ void __init init_sched_ext_class(void)
|
||||
|
||||
__bpf_kfunc_start_defs();
|
||||
|
||||
static bool check_builtin_idle_enabled(void)
|
||||
{
|
||||
if (static_branch_likely(&scx_builtin_idle_enabled))
|
||||
return true;
|
||||
|
||||
scx_ops_error("built-in idle tracking is disabled");
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_bpf_select_cpu_dfl - The default implementation of ops.select_cpu()
|
||||
* @p: task_struct to select a CPU for
|
||||
@ -6300,10 +6380,8 @@ __bpf_kfunc_start_defs();
|
||||
__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
|
||||
u64 wake_flags, bool *is_idle)
|
||||
{
|
||||
if (!static_branch_likely(&scx_builtin_idle_enabled)) {
|
||||
scx_ops_error("built-in idle tracking is disabled");
|
||||
if (!check_builtin_idle_enabled())
|
||||
goto prev_cpu;
|
||||
}
|
||||
|
||||
if (!scx_kf_allowed(SCX_KF_SELECT_CPU))
|
||||
goto prev_cpu;
|
||||
@ -6387,9 +6465,7 @@ __bpf_kfunc_start_defs();
|
||||
* ops.select_cpu(), and ops.dispatch().
|
||||
*
|
||||
* When called from ops.select_cpu() or ops.enqueue(), it's for direct dispatch
|
||||
* and @p must match the task being enqueued. Also, %SCX_DSQ_LOCAL_ON can't be
|
||||
* used to target the local DSQ of a CPU other than the enqueueing one. Use
|
||||
* ops.select_cpu() to be on the target CPU in the first place.
|
||||
* and @p must match the task being enqueued.
|
||||
*
|
||||
* When called from ops.select_cpu(), @enq_flags and @dsp_id are stored, and @p
|
||||
* will be directly inserted into the corresponding dispatch queue after
|
||||
@ -7228,7 +7304,7 @@ __bpf_kfunc void scx_bpf_error_bstr(char *fmt, unsigned long long *data,
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_bpf_dump - Generate extra debug dump specific to the BPF scheduler
|
||||
* scx_bpf_dump_bstr - Generate extra debug dump specific to the BPF scheduler
|
||||
* @fmt: format string
|
||||
* @data: format string parameters packaged using ___bpf_fill() macro
|
||||
* @data__sz: @data len, must end in '__sz' for the verifier
|
||||
@ -7320,7 +7396,6 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cur(s32 cpu)
|
||||
* scx_bpf_cpuperf_set - Set the relative performance target of a CPU
|
||||
* @cpu: CPU of interest
|
||||
* @perf: target performance level [0, %SCX_CPUPERF_ONE]
|
||||
* @flags: %SCX_CPUPERF_* flags
|
||||
*
|
||||
* Set the target performance level of @cpu to @perf. @perf is in linear
|
||||
* relative scale between 0 and %SCX_CPUPERF_ONE. This determines how the
|
||||
@ -7397,10 +7472,8 @@ __bpf_kfunc void scx_bpf_put_cpumask(const struct cpumask *cpumask)
|
||||
*/
|
||||
__bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void)
|
||||
{
|
||||
if (!static_branch_likely(&scx_builtin_idle_enabled)) {
|
||||
scx_ops_error("built-in idle tracking is disabled");
|
||||
if (!check_builtin_idle_enabled())
|
||||
return cpu_none_mask;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
return idle_masks.cpu;
|
||||
@ -7418,10 +7491,8 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void)
|
||||
*/
|
||||
__bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask(void)
|
||||
{
|
||||
if (!static_branch_likely(&scx_builtin_idle_enabled)) {
|
||||
scx_ops_error("built-in idle tracking is disabled");
|
||||
if (!check_builtin_idle_enabled())
|
||||
return cpu_none_mask;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (sched_smt_active())
|
||||
@ -7436,6 +7507,7 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask(void)
|
||||
/**
|
||||
* scx_bpf_put_idle_cpumask - Release a previously acquired referenced kptr to
|
||||
* either the percpu, or SMT idle-tracking cpumask.
|
||||
* @idle_mask: &cpumask to use
|
||||
*/
|
||||
__bpf_kfunc void scx_bpf_put_idle_cpumask(const struct cpumask *idle_mask)
|
||||
{
|
||||
@ -7459,10 +7531,8 @@ __bpf_kfunc void scx_bpf_put_idle_cpumask(const struct cpumask *idle_mask)
|
||||
*/
|
||||
__bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu)
|
||||
{
|
||||
if (!static_branch_likely(&scx_builtin_idle_enabled)) {
|
||||
scx_ops_error("built-in idle tracking is disabled");
|
||||
if (!check_builtin_idle_enabled())
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ops_cpu_valid(cpu, NULL))
|
||||
return test_and_clear_cpu_idle(cpu);
|
||||
@ -7492,10 +7562,8 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu)
|
||||
__bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed,
|
||||
u64 flags)
|
||||
{
|
||||
if (!static_branch_likely(&scx_builtin_idle_enabled)) {
|
||||
scx_ops_error("built-in idle tracking is disabled");
|
||||
if (!check_builtin_idle_enabled())
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
return scx_pick_idle_cpu(cpus_allowed, flags);
|
||||
}
|
||||
@ -7590,6 +7658,68 @@ out:
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* scx_bpf_now - Returns a high-performance monotonically non-decreasing
|
||||
* clock for the current CPU. The clock returned is in nanoseconds.
|
||||
*
|
||||
* It provides the following properties:
|
||||
*
|
||||
* 1) High performance: Many BPF schedulers call bpf_ktime_get_ns() frequently
|
||||
* to account for execution time and track tasks' runtime properties.
|
||||
* Unfortunately, in some hardware platforms, bpf_ktime_get_ns() -- which
|
||||
* eventually reads a hardware timestamp counter -- is neither performant nor
|
||||
* scalable. scx_bpf_now() aims to provide a high-performance clock by
|
||||
* using the rq clock in the scheduler core whenever possible.
|
||||
*
|
||||
* 2) High enough resolution for the BPF scheduler use cases: In most BPF
|
||||
* scheduler use cases, the required clock resolution is lower than the most
|
||||
* accurate hardware clock (e.g., rdtsc in x86). scx_bpf_now() basically
|
||||
* uses the rq clock in the scheduler core whenever it is valid. It considers
|
||||
* that the rq clock is valid from the time the rq clock is updated
|
||||
* (update_rq_clock) until the rq is unlocked (rq_unpin_lock).
|
||||
*
|
||||
* 3) Monotonically non-decreasing clock for the same CPU: scx_bpf_now()
|
||||
* guarantees the clock never goes backward when comparing them in the same
|
||||
* CPU. On the other hand, when comparing clocks in different CPUs, there
|
||||
* is no such guarantee -- the clock can go backward. It provides a
|
||||
* monotonically *non-decreasing* clock so that it would provide the same
|
||||
* clock values in two different scx_bpf_now() calls in the same CPU
|
||||
* during the same period of when the rq clock is valid.
|
||||
*/
|
||||
__bpf_kfunc u64 scx_bpf_now(void)
|
||||
{
|
||||
struct rq *rq;
|
||||
u64 clock;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
rq = this_rq();
|
||||
if (smp_load_acquire(&rq->scx.flags) & SCX_RQ_CLK_VALID) {
|
||||
/*
|
||||
* If the rq clock is valid, use the cached rq clock.
|
||||
*
|
||||
* Note that scx_bpf_now() is re-entrant between a process
|
||||
* context and an interrupt context (e.g., timer interrupt).
|
||||
* However, we don't need to consider the race between them
|
||||
* because such race is not observable from a caller.
|
||||
*/
|
||||
clock = READ_ONCE(rq->scx.clock);
|
||||
} else {
|
||||
/*
|
||||
* Otherwise, return a fresh rq clock.
|
||||
*
|
||||
* The rq clock is updated outside of the rq lock.
|
||||
* In this case, keep the updated rq clock invalid so the next
|
||||
* kfunc call outside the rq lock gets a fresh rq clock.
|
||||
*/
|
||||
clock = sched_clock_cpu(cpu_of(rq));
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
|
||||
return clock;
|
||||
}
|
||||
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
BTF_KFUNCS_START(scx_kfunc_ids_any)
|
||||
@ -7621,6 +7751,7 @@ BTF_ID_FLAGS(func, scx_bpf_cpu_rq)
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
BTF_ID_FLAGS(func, scx_bpf_task_cgroup, KF_RCU | KF_ACQUIRE)
|
||||
#endif
|
||||
BTF_ID_FLAGS(func, scx_bpf_now)
|
||||
BTF_KFUNCS_END(scx_kfunc_ids_any)
|
||||
|
||||
static const struct btf_kfunc_id_set scx_kfunc_set_any = {
|
||||
|
@ -759,6 +759,7 @@ enum scx_rq_flags {
|
||||
SCX_RQ_BAL_PENDING = 1 << 2, /* balance hasn't run yet */
|
||||
SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */
|
||||
SCX_RQ_BYPASSING = 1 << 4,
|
||||
SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */
|
||||
|
||||
SCX_RQ_IN_WAKEUP = 1 << 16,
|
||||
SCX_RQ_IN_BALANCE = 1 << 17,
|
||||
@ -771,9 +772,10 @@ struct scx_rq {
|
||||
unsigned long ops_qseq;
|
||||
u64 extra_enq_flags; /* see move_task_to_local_dsq() */
|
||||
u32 nr_running;
|
||||
u32 flags;
|
||||
u32 cpuperf_target; /* [0, SCHED_CAPACITY_SCALE] */
|
||||
bool cpu_released;
|
||||
u32 flags;
|
||||
u64 clock; /* current per-rq clock -- see scx_bpf_now() */
|
||||
cpumask_var_t cpus_to_kick;
|
||||
cpumask_var_t cpus_to_kick_if_idle;
|
||||
cpumask_var_t cpus_to_preempt;
|
||||
@ -1722,6 +1724,38 @@ struct rq_flags {
|
||||
|
||||
extern struct balance_callback balance_push_callback;
|
||||
|
||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||
extern const struct sched_class ext_sched_class;
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */
|
||||
DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */
|
||||
|
||||
#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled)
|
||||
#define scx_switched_all() static_branch_unlikely(&__scx_switched_all)
|
||||
|
||||
static inline void scx_rq_clock_update(struct rq *rq, u64 clock)
|
||||
{
|
||||
if (!scx_enabled())
|
||||
return;
|
||||
WRITE_ONCE(rq->scx.clock, clock);
|
||||
smp_store_release(&rq->scx.flags, rq->scx.flags | SCX_RQ_CLK_VALID);
|
||||
}
|
||||
|
||||
static inline void scx_rq_clock_invalidate(struct rq *rq)
|
||||
{
|
||||
if (!scx_enabled())
|
||||
return;
|
||||
WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);
|
||||
}
|
||||
|
||||
#else /* !CONFIG_SCHED_CLASS_EXT */
|
||||
#define scx_enabled() false
|
||||
#define scx_switched_all() false
|
||||
|
||||
static inline void scx_rq_clock_update(struct rq *rq, u64 clock) {}
|
||||
static inline void scx_rq_clock_invalidate(struct rq *rq) {}
|
||||
#endif /* !CONFIG_SCHED_CLASS_EXT */
|
||||
|
||||
/*
|
||||
* Lockdep annotation that avoids accidental unlocks; it's like a
|
||||
* sticky/continuous lockdep_assert_held().
|
||||
@ -1751,7 +1785,7 @@ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
|
||||
if (rq->clock_update_flags > RQCF_ACT_SKIP)
|
||||
rf->clock_update_flags = RQCF_UPDATED;
|
||||
#endif
|
||||
|
||||
scx_rq_clock_invalidate(rq);
|
||||
lockdep_unpin_lock(__rq_lockp(rq), rf->cookie);
|
||||
}
|
||||
|
||||
@ -2510,19 +2544,6 @@ extern const struct sched_class rt_sched_class;
|
||||
extern const struct sched_class fair_sched_class;
|
||||
extern const struct sched_class idle_sched_class;
|
||||
|
||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||
extern const struct sched_class ext_sched_class;
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */
|
||||
DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */
|
||||
|
||||
#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled)
|
||||
#define scx_switched_all() static_branch_unlikely(&__scx_switched_all)
|
||||
#else /* !CONFIG_SCHED_CLASS_EXT */
|
||||
#define scx_enabled() false
|
||||
#define scx_switched_all() false
|
||||
#endif /* !CONFIG_SCHED_CLASS_EXT */
|
||||
|
||||
/*
|
||||
* Iterate only active classes. SCX can take over all fair tasks or be
|
||||
* completely disabled. If the former, skip fair. If the latter, skip SCX.
|
||||
|
@ -9,7 +9,7 @@
|
||||
|
||||
#ifdef LSP
|
||||
#define __bpf__
|
||||
#include "../vmlinux/vmlinux.h"
|
||||
#include "../vmlinux.h"
|
||||
#else
|
||||
#include "vmlinux.h"
|
||||
#endif
|
||||
@ -24,6 +24,10 @@
|
||||
#define PF_EXITING 0x00000004
|
||||
#define CLOCK_MONOTONIC 1
|
||||
|
||||
extern int LINUX_KERNEL_VERSION __kconfig;
|
||||
extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak;
|
||||
extern const char CONFIG_LOCALVERSION[64] __kconfig __weak;
|
||||
|
||||
/*
|
||||
* Earlier versions of clang/pahole lost upper 32bits in 64bit enums which can
|
||||
* lead to really confusing misbehaviors. Let's trigger a build failure.
|
||||
@ -72,6 +76,7 @@ bool scx_bpf_task_running(const struct task_struct *p) __ksym;
|
||||
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
|
||||
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
|
||||
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
|
||||
u64 scx_bpf_now(void) __ksym __weak;
|
||||
|
||||
/*
|
||||
* Use the following as @it__iter when calling scx_bpf_dsq_move[_vtime]() from
|
||||
@ -98,7 +103,7 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
|
||||
___bpf_fill(___param, args); \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
|
||||
/*
|
||||
* scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments
|
||||
@ -136,6 +141,20 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
|
||||
___scx_bpf_bstr_format_checker(fmt, ##args); \
|
||||
})
|
||||
|
||||
/*
|
||||
* scx_bpf_dump_header() is a wrapper around scx_bpf_dump that adds a header
|
||||
* of system information for debugging.
|
||||
*/
|
||||
#define scx_bpf_dump_header() \
|
||||
({ \
|
||||
scx_bpf_dump("kernel: %d.%d.%d %s\ncc: %s\n", \
|
||||
LINUX_KERNEL_VERSION >> 16, \
|
||||
LINUX_KERNEL_VERSION >> 8 & 0xFF, \
|
||||
LINUX_KERNEL_VERSION & 0xFF, \
|
||||
CONFIG_LOCALVERSION, \
|
||||
CONFIG_CC_VERSION_TEXT); \
|
||||
})
|
||||
|
||||
#define BPF_STRUCT_OPS(name, args...) \
|
||||
SEC("struct_ops/"#name) \
|
||||
BPF_PROG(name, ##args)
|
||||
@ -317,6 +336,66 @@ u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
|
||||
const struct cpumask *src2) __ksym;
|
||||
u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym;
|
||||
|
||||
int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words) __ksym;
|
||||
int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym;
|
||||
void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym;
|
||||
|
||||
#define def_iter_struct(name) \
|
||||
struct bpf_iter_##name { \
|
||||
struct bpf_iter_bits it; \
|
||||
const struct cpumask *bitmap; \
|
||||
};
|
||||
|
||||
#define def_iter_new(name) \
|
||||
static inline int bpf_iter_##name##_new( \
|
||||
struct bpf_iter_##name *it, const u64 *unsafe_ptr__ign, u32 nr_words) \
|
||||
{ \
|
||||
it->bitmap = scx_bpf_get_##name##_cpumask(); \
|
||||
return bpf_iter_bits_new(&it->it, (const u64 *)it->bitmap, \
|
||||
sizeof(struct cpumask) / 8); \
|
||||
}
|
||||
|
||||
#define def_iter_next(name) \
|
||||
static inline int *bpf_iter_##name##_next(struct bpf_iter_##name *it) { \
|
||||
return bpf_iter_bits_next(&it->it); \
|
||||
}
|
||||
|
||||
#define def_iter_destroy(name) \
|
||||
static inline void bpf_iter_##name##_destroy(struct bpf_iter_##name *it) { \
|
||||
scx_bpf_put_cpumask(it->bitmap); \
|
||||
bpf_iter_bits_destroy(&it->it); \
|
||||
}
|
||||
#define def_for_each_cpu(cpu, name) for_each_##name##_cpu(cpu)
|
||||
|
||||
/// Provides iterator for possible and online cpus.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// static inline void example_use() {
|
||||
/// int *cpu;
|
||||
///
|
||||
/// for_each_possible_cpu(cpu){
|
||||
/// bpf_printk("CPU %d is possible", *cpu);
|
||||
/// }
|
||||
///
|
||||
/// for_each_online_cpu(cpu){
|
||||
/// bpf_printk("CPU %d is online", *cpu);
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
def_iter_struct(possible);
|
||||
def_iter_new(possible);
|
||||
def_iter_next(possible);
|
||||
def_iter_destroy(possible);
|
||||
#define for_each_possible_cpu(cpu) bpf_for_each(possible, cpu, NULL, 0)
|
||||
|
||||
def_iter_struct(online);
|
||||
def_iter_new(online);
|
||||
def_iter_next(online);
|
||||
def_iter_destroy(online);
|
||||
#define for_each_online_cpu(cpu) bpf_for_each(online, cpu, NULL, 0)
|
||||
|
||||
/*
|
||||
* Access a cpumask in read-only mode (typically to check bits).
|
||||
*/
|
||||
@ -329,6 +408,100 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask)
|
||||
void bpf_rcu_read_lock(void) __ksym;
|
||||
void bpf_rcu_read_unlock(void) __ksym;
|
||||
|
||||
/*
|
||||
* Time helpers, most of which are from jiffies.h.
|
||||
*/
|
||||
|
||||
/**
|
||||
* time_delta - Calculate the delta between new and old time stamp
|
||||
* @after: first comparable as u64
|
||||
* @before: second comparable as u64
|
||||
*
|
||||
* Return: the time difference, which is >= 0
|
||||
*/
|
||||
static inline s64 time_delta(u64 after, u64 before)
|
||||
{
|
||||
return (s64)(after - before) > 0 ? : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* time_after - returns true if the time a is after time b.
|
||||
* @a: first comparable as u64
|
||||
* @b: second comparable as u64
|
||||
*
|
||||
* Do this with "<0" and ">=0" to only test the sign of the result. A
|
||||
* good compiler would generate better code (and a really good compiler
|
||||
* wouldn't care). Gcc is currently neither.
|
||||
*
|
||||
* Return: %true is time a is after time b, otherwise %false.
|
||||
*/
|
||||
static inline bool time_after(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(b - a) < 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* time_before - returns true if the time a is before time b.
|
||||
* @a: first comparable as u64
|
||||
* @b: second comparable as u64
|
||||
*
|
||||
* Return: %true is time a is before time b, otherwise %false.
|
||||
*/
|
||||
static inline bool time_before(u64 a, u64 b)
|
||||
{
|
||||
return time_after(b, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* time_after_eq - returns true if the time a is after or the same as time b.
|
||||
* @a: first comparable as u64
|
||||
* @b: second comparable as u64
|
||||
*
|
||||
* Return: %true is time a is after or the same as time b, otherwise %false.
|
||||
*/
|
||||
static inline bool time_after_eq(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(a - b) >= 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* time_before_eq - returns true if the time a is before or the same as time b.
|
||||
* @a: first comparable as u64
|
||||
* @b: second comparable as u64
|
||||
*
|
||||
* Return: %true is time a is before or the same as time b, otherwise %false.
|
||||
*/
|
||||
static inline bool time_before_eq(u64 a, u64 b)
|
||||
{
|
||||
return time_after_eq(b, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* time_in_range - Calculate whether a is in the range of [b, c].
|
||||
* @a: time to test
|
||||
* @b: beginning of the range
|
||||
* @c: end of the range
|
||||
*
|
||||
* Return: %true is time a is in the range [b, c], otherwise %false.
|
||||
*/
|
||||
static inline bool time_in_range(u64 a, u64 b, u64 c)
|
||||
{
|
||||
return time_after_eq(a, b) && time_before_eq(a, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* time_in_range_open - Calculate whether a is in the range of [b, c).
|
||||
* @a: time to test
|
||||
* @b: beginning of the range
|
||||
* @c: end of the range
|
||||
*
|
||||
* Return: %true is time a is in the range [b, c), otherwise %false.
|
||||
*/
|
||||
static inline bool time_in_range_open(u64 a, u64 b, u64 c)
|
||||
{
|
||||
return time_after_eq(a, b) && time_before(a, c);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Other helpers
|
||||
@ -423,5 +596,6 @@ static inline u32 log2_u64(u64 v)
|
||||
}
|
||||
|
||||
#include "compat.bpf.h"
|
||||
#include "enums.bpf.h"
|
||||
|
||||
#endif /* __SCX_COMMON_BPF_H */
|
||||
|
@ -71,5 +71,11 @@ typedef int64_t s64;
|
||||
|
||||
#include "user_exit_info.h"
|
||||
#include "compat.h"
|
||||
#include "enums.h"
|
||||
|
||||
/* not available when building kernel tools/sched_ext */
|
||||
#if __has_include(<lib/sdt_task.h>)
|
||||
#include <lib/sdt_task.h>
|
||||
#endif
|
||||
|
||||
#endif /* __SCHED_EXT_COMMON_H */
|
||||
|
@ -125,6 +125,11 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter,
|
||||
false; \
|
||||
})
|
||||
|
||||
#define scx_bpf_now() \
|
||||
(bpf_ksym_exists(scx_bpf_now) ? \
|
||||
scx_bpf_now() : \
|
||||
bpf_ktime_get_ns())
|
||||
|
||||
/*
|
||||
* Define sched_ext_ops. This may be expanded to define multiple variants for
|
||||
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
|
||||
|
@ -149,6 +149,7 @@ static inline long scx_hotplug_seq(void)
|
||||
__skel = __scx_name##__open(); \
|
||||
SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \
|
||||
__skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \
|
||||
SCX_ENUM_INIT(__skel); \
|
||||
__skel; \
|
||||
})
|
||||
|
||||
|
105
tools/sched_ext/include/scx/enums.autogen.bpf.h
Normal file
105
tools/sched_ext/include/scx/enums.autogen.bpf.h
Normal file
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* WARNING: This file is autogenerated from scripts/gen_enums.py. If you would
|
||||
* like to access an enum that is currently missing, add it to the script
|
||||
* and run it from the root directory to update this file.
|
||||
*/
|
||||
|
||||
const volatile u64 __SCX_OPS_NAME_LEN __weak;
|
||||
#define SCX_OPS_NAME_LEN __SCX_OPS_NAME_LEN
|
||||
|
||||
const volatile u64 __SCX_SLICE_DFL __weak;
|
||||
#define SCX_SLICE_DFL __SCX_SLICE_DFL
|
||||
|
||||
const volatile u64 __SCX_SLICE_INF __weak;
|
||||
#define SCX_SLICE_INF __SCX_SLICE_INF
|
||||
|
||||
const volatile u64 __SCX_DSQ_FLAG_BUILTIN __weak;
|
||||
#define SCX_DSQ_FLAG_BUILTIN __SCX_DSQ_FLAG_BUILTIN
|
||||
|
||||
const volatile u64 __SCX_DSQ_FLAG_LOCAL_ON __weak;
|
||||
#define SCX_DSQ_FLAG_LOCAL_ON __SCX_DSQ_FLAG_LOCAL_ON
|
||||
|
||||
const volatile u64 __SCX_DSQ_INVALID __weak;
|
||||
#define SCX_DSQ_INVALID __SCX_DSQ_INVALID
|
||||
|
||||
const volatile u64 __SCX_DSQ_GLOBAL __weak;
|
||||
#define SCX_DSQ_GLOBAL __SCX_DSQ_GLOBAL
|
||||
|
||||
const volatile u64 __SCX_DSQ_LOCAL __weak;
|
||||
#define SCX_DSQ_LOCAL __SCX_DSQ_LOCAL
|
||||
|
||||
const volatile u64 __SCX_DSQ_LOCAL_ON __weak;
|
||||
#define SCX_DSQ_LOCAL_ON __SCX_DSQ_LOCAL_ON
|
||||
|
||||
const volatile u64 __SCX_DSQ_LOCAL_CPU_MASK __weak;
|
||||
#define SCX_DSQ_LOCAL_CPU_MASK __SCX_DSQ_LOCAL_CPU_MASK
|
||||
|
||||
const volatile u64 __SCX_TASK_QUEUED __weak;
|
||||
#define SCX_TASK_QUEUED __SCX_TASK_QUEUED
|
||||
|
||||
const volatile u64 __SCX_TASK_RESET_RUNNABLE_AT __weak;
|
||||
#define SCX_TASK_RESET_RUNNABLE_AT __SCX_TASK_RESET_RUNNABLE_AT
|
||||
|
||||
const volatile u64 __SCX_TASK_DEQD_FOR_SLEEP __weak;
|
||||
#define SCX_TASK_DEQD_FOR_SLEEP __SCX_TASK_DEQD_FOR_SLEEP
|
||||
|
||||
const volatile u64 __SCX_TASK_STATE_SHIFT __weak;
|
||||
#define SCX_TASK_STATE_SHIFT __SCX_TASK_STATE_SHIFT
|
||||
|
||||
const volatile u64 __SCX_TASK_STATE_BITS __weak;
|
||||
#define SCX_TASK_STATE_BITS __SCX_TASK_STATE_BITS
|
||||
|
||||
const volatile u64 __SCX_TASK_STATE_MASK __weak;
|
||||
#define SCX_TASK_STATE_MASK __SCX_TASK_STATE_MASK
|
||||
|
||||
const volatile u64 __SCX_TASK_CURSOR __weak;
|
||||
#define SCX_TASK_CURSOR __SCX_TASK_CURSOR
|
||||
|
||||
const volatile u64 __SCX_TASK_NONE __weak;
|
||||
#define SCX_TASK_NONE __SCX_TASK_NONE
|
||||
|
||||
const volatile u64 __SCX_TASK_INIT __weak;
|
||||
#define SCX_TASK_INIT __SCX_TASK_INIT
|
||||
|
||||
const volatile u64 __SCX_TASK_READY __weak;
|
||||
#define SCX_TASK_READY __SCX_TASK_READY
|
||||
|
||||
const volatile u64 __SCX_TASK_ENABLED __weak;
|
||||
#define SCX_TASK_ENABLED __SCX_TASK_ENABLED
|
||||
|
||||
const volatile u64 __SCX_TASK_NR_STATES __weak;
|
||||
#define SCX_TASK_NR_STATES __SCX_TASK_NR_STATES
|
||||
|
||||
const volatile u64 __SCX_TASK_DSQ_ON_PRIQ __weak;
|
||||
#define SCX_TASK_DSQ_ON_PRIQ __SCX_TASK_DSQ_ON_PRIQ
|
||||
|
||||
const volatile u64 __SCX_KICK_IDLE __weak;
|
||||
#define SCX_KICK_IDLE __SCX_KICK_IDLE
|
||||
|
||||
const volatile u64 __SCX_KICK_PREEMPT __weak;
|
||||
#define SCX_KICK_PREEMPT __SCX_KICK_PREEMPT
|
||||
|
||||
const volatile u64 __SCX_KICK_WAIT __weak;
|
||||
#define SCX_KICK_WAIT __SCX_KICK_WAIT
|
||||
|
||||
const volatile u64 __SCX_ENQ_WAKEUP __weak;
|
||||
#define SCX_ENQ_WAKEUP __SCX_ENQ_WAKEUP
|
||||
|
||||
const volatile u64 __SCX_ENQ_HEAD __weak;
|
||||
#define SCX_ENQ_HEAD __SCX_ENQ_HEAD
|
||||
|
||||
const volatile u64 __SCX_ENQ_PREEMPT __weak;
|
||||
#define SCX_ENQ_PREEMPT __SCX_ENQ_PREEMPT
|
||||
|
||||
const volatile u64 __SCX_ENQ_REENQ __weak;
|
||||
#define SCX_ENQ_REENQ __SCX_ENQ_REENQ
|
||||
|
||||
const volatile u64 __SCX_ENQ_LAST __weak;
|
||||
#define SCX_ENQ_LAST __SCX_ENQ_LAST
|
||||
|
||||
const volatile u64 __SCX_ENQ_CLEAR_OPSS __weak;
|
||||
#define SCX_ENQ_CLEAR_OPSS __SCX_ENQ_CLEAR_OPSS
|
||||
|
||||
const volatile u64 __SCX_ENQ_DSQ_PRIQ __weak;
|
||||
#define SCX_ENQ_DSQ_PRIQ __SCX_ENQ_DSQ_PRIQ
|
||||
|
41
tools/sched_ext/include/scx/enums.autogen.h
Normal file
41
tools/sched_ext/include/scx/enums.autogen.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* WARNING: This file is autogenerated from scripts/gen_enums.py. If you would
|
||||
* like to access an enum that is currently missing, add it to the script
|
||||
* and run it from the root directory to update this file.
|
||||
*/
|
||||
|
||||
#define SCX_ENUM_INIT(skel) do { \
|
||||
SCX_ENUM_SET(skel, scx_public_consts, SCX_OPS_NAME_LEN); \
|
||||
SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_DFL); \
|
||||
SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_INF); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_BUILTIN); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_LOCAL_ON); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_INVALID); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_GLOBAL); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_ON); \
|
||||
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_CPU_MASK); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_QUEUED); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_RESET_RUNNABLE_AT); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_DEQD_FOR_SLEEP); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_SHIFT); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_BITS); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_MASK); \
|
||||
SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_CURSOR); \
|
||||
SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NONE); \
|
||||
SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_INIT); \
|
||||
SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_READY); \
|
||||
SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_ENABLED); \
|
||||
SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NR_STATES); \
|
||||
SCX_ENUM_SET(skel, scx_ent_dsq_flags, SCX_TASK_DSQ_ON_PRIQ); \
|
||||
SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_IDLE); \
|
||||
SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_PREEMPT); \
|
||||
SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_WAIT); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_WAKEUP); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_HEAD); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_PREEMPT); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_REENQ); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_LAST); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_CLEAR_OPSS); \
|
||||
SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_DSQ_PRIQ); \
|
||||
} while (0)
|
12
tools/sched_ext/include/scx/enums.bpf.h
Normal file
12
tools/sched_ext/include/scx/enums.bpf.h
Normal file
@ -0,0 +1,12 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Convenience macros for getting/setting struct scx_enums instances.
|
||||
*
|
||||
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
|
||||
*/
|
||||
#ifndef __SCX_ENUMS_BPF_H
|
||||
#define __SCX_ENUMS_BPF_H
|
||||
|
||||
#include "enums.autogen.bpf.h"
|
||||
|
||||
#endif /* __SCX_ENUMS_BPF_H */
|
27
tools/sched_ext/include/scx/enums.h
Normal file
27
tools/sched_ext/include/scx/enums.h
Normal file
@ -0,0 +1,27 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Define struct scx_enums that stores the load-time values of enums
|
||||
* used by the BPF program.
|
||||
*
|
||||
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
|
||||
*/
|
||||
|
||||
#ifndef __SCX_ENUMS_H
|
||||
#define __SCX_ENUMS_H
|
||||
|
||||
static inline void __ENUM_set(u64 *val, char *type, char *name)
|
||||
{
|
||||
bool res;
|
||||
|
||||
res = __COMPAT_read_enum(type, name, val);
|
||||
SCX_BUG_ON(!res, "enum not found(%s)", name);
|
||||
}
|
||||
|
||||
#define SCX_ENUM_SET(skel, type, name) do { \
|
||||
__ENUM_set(&skel->rodata->__##name, #type, #name); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#include "enums.autogen.h"
|
||||
|
||||
#endif /* __SCX_ENUMS_H */
|
@ -10,6 +10,11 @@
|
||||
#ifndef __USER_EXIT_INFO_H
|
||||
#define __USER_EXIT_INFO_H
|
||||
|
||||
#ifdef LSP
|
||||
#define __bpf__
|
||||
#include "../vmlinux.h"
|
||||
#endif
|
||||
|
||||
enum uei_sizes {
|
||||
UEI_REASON_LEN = 128,
|
||||
UEI_MSG_LEN = 1024,
|
||||
@ -25,9 +30,7 @@ struct user_exit_info {
|
||||
|
||||
#ifdef __bpf__
|
||||
|
||||
#ifdef LSP
|
||||
#include "../vmlinux/vmlinux.h"
|
||||
#else
|
||||
#ifndef LSP
|
||||
#include "vmlinux.h"
|
||||
#endif
|
||||
#include <bpf/bpf_core_read.h>
|
||||
|
@ -57,7 +57,7 @@ enum {
|
||||
|
||||
const volatile s32 central_cpu;
|
||||
const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */
|
||||
const volatile u64 slice_ns = SCX_SLICE_DFL;
|
||||
const volatile u64 slice_ns;
|
||||
|
||||
bool timer_pinned = true;
|
||||
u64 nr_total, nr_locals, nr_queued, nr_lost_pids;
|
||||
@ -87,11 +87,6 @@ struct {
|
||||
__type(value, struct central_timer);
|
||||
} central_timer SEC(".maps");
|
||||
|
||||
static bool vtime_before(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(a - b) < 0;
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS(central_select_cpu, struct task_struct *p,
|
||||
s32 prev_cpu, u64 wake_flags)
|
||||
{
|
||||
@ -245,7 +240,7 @@ void BPF_STRUCT_OPS(central_running, struct task_struct *p)
|
||||
s32 cpu = scx_bpf_task_cpu(p);
|
||||
u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
|
||||
if (started_at)
|
||||
*started_at = bpf_ktime_get_ns() ?: 1; /* 0 indicates idle */
|
||||
*started_at = scx_bpf_now() ?: 1; /* 0 indicates idle */
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable)
|
||||
@ -258,7 +253,7 @@ void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable)
|
||||
|
||||
static int central_timerfn(void *map, int *key, struct bpf_timer *timer)
|
||||
{
|
||||
u64 now = bpf_ktime_get_ns();
|
||||
u64 now = scx_bpf_now();
|
||||
u64 nr_to_kick = nr_queued;
|
||||
s32 i, curr_cpu;
|
||||
|
||||
@ -279,7 +274,7 @@ static int central_timerfn(void *map, int *key, struct bpf_timer *timer)
|
||||
/* kick iff the current one exhausted its slice */
|
||||
started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
|
||||
if (started_at && *started_at &&
|
||||
vtime_before(now, *started_at + slice_ns))
|
||||
time_before(now, *started_at + slice_ns))
|
||||
continue;
|
||||
|
||||
/* and there's something pending */
|
||||
|
@ -58,6 +58,7 @@ restart:
|
||||
|
||||
skel->rodata->central_cpu = 0;
|
||||
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
|
||||
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
||||
|
||||
while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
|
||||
switch (opt) {
|
||||
|
@ -57,7 +57,7 @@ enum {
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
const volatile u32 nr_cpus = 32; /* !0 for veristat, set during init */
|
||||
const volatile u64 cgrp_slice_ns = SCX_SLICE_DFL;
|
||||
const volatile u64 cgrp_slice_ns;
|
||||
const volatile bool fifo_sched;
|
||||
|
||||
u64 cvtime_now;
|
||||
@ -137,11 +137,6 @@ static u64 div_round_up(u64 dividend, u64 divisor)
|
||||
return (dividend + divisor - 1) / divisor;
|
||||
}
|
||||
|
||||
static bool vtime_before(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(a - b) < 0;
|
||||
}
|
||||
|
||||
static bool cgv_node_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
|
||||
{
|
||||
struct cgv_node *cgc_a, *cgc_b;
|
||||
@ -271,7 +266,7 @@ static void cgrp_cap_budget(struct cgv_node *cgv_node, struct fcg_cgrp_ctx *cgc)
|
||||
*/
|
||||
max_budget = (cgrp_slice_ns * nr_cpus * cgc->hweight) /
|
||||
(2 * FCG_HWEIGHT_ONE);
|
||||
if (vtime_before(cvtime, cvtime_now - max_budget))
|
||||
if (time_before(cvtime, cvtime_now - max_budget))
|
||||
cvtime = cvtime_now - max_budget;
|
||||
|
||||
cgv_node->cvtime = cvtime;
|
||||
@ -401,7 +396,7 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
* Limit the amount of budget that an idling task can accumulate
|
||||
* to one slice.
|
||||
*/
|
||||
if (vtime_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL))
|
||||
if (time_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL))
|
||||
tvtime = cgc->tvtime_now - SCX_SLICE_DFL;
|
||||
|
||||
scx_bpf_dsq_insert_vtime(p, cgrp->kn->id, SCX_SLICE_DFL,
|
||||
@ -535,7 +530,7 @@ void BPF_STRUCT_OPS(fcg_running, struct task_struct *p)
|
||||
* from multiple CPUs and thus racy. Any error should be
|
||||
* contained and temporary. Let's just live with it.
|
||||
*/
|
||||
if (vtime_before(cgc->tvtime_now, p->scx.dsq_vtime))
|
||||
if (time_before(cgc->tvtime_now, p->scx.dsq_vtime))
|
||||
cgc->tvtime_now = p->scx.dsq_vtime;
|
||||
}
|
||||
bpf_cgroup_release(cgrp);
|
||||
@ -645,7 +640,7 @@ static bool try_pick_next_cgroup(u64 *cgidp)
|
||||
cgv_node = container_of(rb_node, struct cgv_node, rb_node);
|
||||
cgid = cgv_node->cgid;
|
||||
|
||||
if (vtime_before(cvtime_now, cgv_node->cvtime))
|
||||
if (time_before(cvtime_now, cgv_node->cvtime))
|
||||
cvtime_now = cgv_node->cvtime;
|
||||
|
||||
/*
|
||||
@ -734,7 +729,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev)
|
||||
struct fcg_cpu_ctx *cpuc;
|
||||
struct fcg_cgrp_ctx *cgc;
|
||||
struct cgroup *cgrp;
|
||||
u64 now = bpf_ktime_get_ns();
|
||||
u64 now = scx_bpf_now();
|
||||
bool picked_next = false;
|
||||
|
||||
cpuc = find_cpu_ctx();
|
||||
@ -744,7 +739,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev)
|
||||
if (!cpuc->cur_cgid)
|
||||
goto pick_next_cgroup;
|
||||
|
||||
if (vtime_before(now, cpuc->cur_at + cgrp_slice_ns)) {
|
||||
if (time_before(now, cpuc->cur_at + cgrp_slice_ns)) {
|
||||
if (scx_bpf_dsq_move_to_local(cpuc->cur_cgid)) {
|
||||
stat_inc(FCG_STAT_CNS_KEEP);
|
||||
return;
|
||||
@ -920,14 +915,14 @@ void BPF_STRUCT_OPS(fcg_cgroup_move, struct task_struct *p,
|
||||
struct cgroup *from, struct cgroup *to)
|
||||
{
|
||||
struct fcg_cgrp_ctx *from_cgc, *to_cgc;
|
||||
s64 vtime_delta;
|
||||
s64 delta;
|
||||
|
||||
/* find_cgrp_ctx() triggers scx_ops_error() on lookup failures */
|
||||
if (!(from_cgc = find_cgrp_ctx(from)) || !(to_cgc = find_cgrp_ctx(to)))
|
||||
return;
|
||||
|
||||
vtime_delta = p->scx.dsq_vtime - from_cgc->tvtime_now;
|
||||
p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta;
|
||||
delta = time_delta(p->scx.dsq_vtime, from_cgc->tvtime_now);
|
||||
p->scx.dsq_vtime = to_cgc->tvtime_now + delta;
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS_SLEEPABLE(fcg_init)
|
||||
|
@ -137,6 +137,7 @@ restart:
|
||||
skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
|
||||
|
||||
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
|
||||
skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
||||
|
||||
while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
|
||||
double v;
|
||||
|
@ -33,7 +33,7 @@ enum consts {
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
const volatile u64 slice_ns = SCX_SLICE_DFL;
|
||||
const volatile u64 slice_ns;
|
||||
const volatile u32 stall_user_nth;
|
||||
const volatile u32 stall_kernel_nth;
|
||||
const volatile u32 dsp_inf_loop_after;
|
||||
|
@ -64,6 +64,8 @@ int main(int argc, char **argv)
|
||||
|
||||
skel = SCX_OPS_OPEN(qmap_ops, scx_qmap);
|
||||
|
||||
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
||||
|
||||
while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PHd:D:Spvh")) != -1) {
|
||||
switch (opt) {
|
||||
case 's':
|
||||
|
@ -52,11 +52,6 @@ static void stat_inc(u32 idx)
|
||||
(*cnt_p)++;
|
||||
}
|
||||
|
||||
static inline bool vtime_before(u64 a, u64 b)
|
||||
{
|
||||
return (s64)(a - b) < 0;
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
|
||||
{
|
||||
bool is_idle = false;
|
||||
@ -84,7 +79,7 @@ void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
* Limit the amount of budget that an idling task can accumulate
|
||||
* to one slice.
|
||||
*/
|
||||
if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL))
|
||||
if (time_before(vtime, vtime_now - SCX_SLICE_DFL))
|
||||
vtime = vtime_now - SCX_SLICE_DFL;
|
||||
|
||||
scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime,
|
||||
@ -108,7 +103,7 @@ void BPF_STRUCT_OPS(simple_running, struct task_struct *p)
|
||||
* thus racy. Any error should be contained and temporary. Let's just
|
||||
* live with it.
|
||||
*/
|
||||
if (vtime_before(vtime_now, p->scx.dsq_vtime))
|
||||
if (time_before(vtime_now, p->scx.dsq_vtime))
|
||||
vtime_now = p->scx.dsq_vtime;
|
||||
}
|
||||
|
||||
|
@ -22,11 +22,12 @@ const char help_fmt[] =
|
||||
"\n"
|
||||
" -t TEST Only run tests whose name includes this string\n"
|
||||
" -s Include print output for skipped tests\n"
|
||||
" -l List all available tests\n"
|
||||
" -q Don't print the test descriptions during run\n"
|
||||
" -h Display this help and exit\n";
|
||||
|
||||
static volatile int exit_req;
|
||||
static bool quiet, print_skipped;
|
||||
static bool quiet, print_skipped, list;
|
||||
|
||||
#define MAX_SCX_TESTS 2048
|
||||
|
||||
@ -133,7 +134,7 @@ int main(int argc, char **argv)
|
||||
|
||||
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
|
||||
|
||||
while ((opt = getopt(argc, argv, "qst:h")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "qslt:h")) != -1) {
|
||||
switch (opt) {
|
||||
case 'q':
|
||||
quiet = true;
|
||||
@ -141,6 +142,9 @@ int main(int argc, char **argv)
|
||||
case 's':
|
||||
print_skipped = true;
|
||||
break;
|
||||
case 'l':
|
||||
list = true;
|
||||
break;
|
||||
case 't':
|
||||
filter = optarg;
|
||||
break;
|
||||
@ -154,6 +158,13 @@ int main(int argc, char **argv)
|
||||
enum scx_test_status status;
|
||||
struct scx_test *test = &__scx_tests[i];
|
||||
|
||||
if (list) {
|
||||
printf("%s\n", test->name);
|
||||
if (i == (__scx_num_tests - 1))
|
||||
return 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (filter && should_skip_test(test, filter)) {
|
||||
/*
|
||||
* Printing the skipped tests and their preambles can
|
||||
|
Loading…
x
Reference in New Issue
Block a user