mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-04 04:04:19 +00:00
sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()
Once a task is put into a DSQ, the allowed operations are fairly limited. Tasks in the built-in local and global DSQs are executed automatically and, ignoring dequeue, there is only one way a task in a user DSQ can be manipulated - scx_bpf_consume() moves the first task to the dispatching local DSQ. This inflexibility sometimes gets in the way and is an area where multiple feature requests have been made. Implement scx_bpf_dispatch[_vtime]_from_dsq(), which can be called during DSQ iteration and can move the task to any DSQ - local DSQs, global DSQ and user DSQs. The kfuncs can be called from ops.dispatch() and any BPF context which dosen't hold a rq lock including BPF timers and SYSCALL programs. This is an expansion of an earlier patch which only allowed moving into the dispatching local DSQ: http://lkml.kernel.org/r/Zn4Cw4FDTmvXnhaf@slm.duckdns.org v2: Remove @slice and @vtime from scx_bpf_dispatch_from_dsq[_vtime]() as they push scx_bpf_dispatch_from_dsq_vtime() over the kfunc argument count limit and often won't be needed anyway. Instead provide scx_bpf_dispatch_from_dsq_set_{slice|vtime}() kfuncs which can be called only when needed and override the specified parameter for the subsequent dispatch. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Daniel Hodges <hodges.daniel.scott@gmail.com> Cc: David Vernet <void@manifault.com> Cc: Changwoo Min <multics69@gmail.com> Cc: Andrea Righi <andrea.righi@linux.dev> Cc: Dan Schatzberg <schatzberg.dan@gmail.com>
This commit is contained in:
parent
6462dd53a2
commit
4c30f5ce4f
@ -1158,6 +1158,11 @@ static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool scx_kf_allowed_if_unlocked(void)
|
||||
{
|
||||
return !current->scx.kf_mask;
|
||||
}
|
||||
|
||||
/**
|
||||
* nldsq_next_task - Iterate to the next task in a non-local DSQ
|
||||
* @dsq: user dsq being interated
|
||||
@ -1211,13 +1216,20 @@ enum scx_dsq_iter_flags {
|
||||
/* iterate in the reverse dispatch order */
|
||||
SCX_DSQ_ITER_REV = 1U << 16,
|
||||
|
||||
__SCX_DSQ_ITER_HAS_SLICE = 1U << 30,
|
||||
__SCX_DSQ_ITER_HAS_VTIME = 1U << 31,
|
||||
|
||||
__SCX_DSQ_ITER_USER_FLAGS = SCX_DSQ_ITER_REV,
|
||||
__SCX_DSQ_ITER_ALL_FLAGS = __SCX_DSQ_ITER_USER_FLAGS,
|
||||
__SCX_DSQ_ITER_ALL_FLAGS = __SCX_DSQ_ITER_USER_FLAGS |
|
||||
__SCX_DSQ_ITER_HAS_SLICE |
|
||||
__SCX_DSQ_ITER_HAS_VTIME,
|
||||
};
|
||||
|
||||
struct bpf_iter_scx_dsq_kern {
|
||||
struct scx_dsq_list_node cursor;
|
||||
struct scx_dispatch_q *dsq;
|
||||
u64 slice;
|
||||
u64 vtime;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_iter_scx_dsq {
|
||||
@ -5872,7 +5884,7 @@ __bpf_kfunc_start_defs();
|
||||
* scx_bpf_dispatch - Dispatch a task into the FIFO queue of a DSQ
|
||||
* @p: task_struct to dispatch
|
||||
* @dsq_id: DSQ to dispatch to
|
||||
* @slice: duration @p can run for in nsecs
|
||||
* @slice: duration @p can run for in nsecs, 0 to keep the current value
|
||||
* @enq_flags: SCX_ENQ_*
|
||||
*
|
||||
* Dispatch @p into the FIFO queue of the DSQ identified by @dsq_id. It is safe
|
||||
@ -5922,7 +5934,7 @@ __bpf_kfunc void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice,
|
||||
* scx_bpf_dispatch_vtime - Dispatch a task into the vtime priority queue of a DSQ
|
||||
* @p: task_struct to dispatch
|
||||
* @dsq_id: DSQ to dispatch to
|
||||
* @slice: duration @p can run for in nsecs
|
||||
* @slice: duration @p can run for in nsecs, 0 to keep the current value
|
||||
* @vtime: @p's ordering inside the vtime-sorted queue of the target DSQ
|
||||
* @enq_flags: SCX_ENQ_*
|
||||
*
|
||||
@ -5963,6 +5975,118 @@ static const struct btf_kfunc_id_set scx_kfunc_set_enqueue_dispatch = {
|
||||
.set = &scx_kfunc_ids_enqueue_dispatch,
|
||||
};
|
||||
|
||||
static bool scx_dispatch_from_dsq(struct bpf_iter_scx_dsq_kern *kit,
|
||||
struct task_struct *p, u64 dsq_id,
|
||||
u64 enq_flags)
|
||||
{
|
||||
struct scx_dispatch_q *src_dsq = kit->dsq, *dst_dsq;
|
||||
struct rq *this_rq, *src_rq, *dst_rq, *locked_rq;
|
||||
bool dispatched = false;
|
||||
bool in_balance;
|
||||
unsigned long flags;
|
||||
|
||||
if (!scx_kf_allowed_if_unlocked() && !scx_kf_allowed(SCX_KF_DISPATCH))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Can be called from either ops.dispatch() locking this_rq() or any
|
||||
* context where no rq lock is held. If latter, lock @p's task_rq which
|
||||
* we'll likely need anyway.
|
||||
*/
|
||||
src_rq = task_rq(p);
|
||||
|
||||
local_irq_save(flags);
|
||||
this_rq = this_rq();
|
||||
in_balance = this_rq->scx.flags & SCX_RQ_IN_BALANCE;
|
||||
|
||||
if (in_balance) {
|
||||
if (this_rq != src_rq) {
|
||||
raw_spin_rq_unlock(this_rq);
|
||||
raw_spin_rq_lock(src_rq);
|
||||
}
|
||||
} else {
|
||||
raw_spin_rq_lock(src_rq);
|
||||
}
|
||||
|
||||
locked_rq = src_rq;
|
||||
raw_spin_lock(&src_dsq->lock);
|
||||
|
||||
/*
|
||||
* Did someone else get to it? @p could have already left $src_dsq, got
|
||||
* re-enqueud, or be in the process of being consumed by someone else.
|
||||
*/
|
||||
if (unlikely(p->scx.dsq != src_dsq ||
|
||||
u32_before(kit->cursor.priv, p->scx.dsq_seq) ||
|
||||
p->scx.holding_cpu >= 0) ||
|
||||
WARN_ON_ONCE(src_rq != task_rq(p))) {
|
||||
raw_spin_unlock(&src_dsq->lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* @p is still on $src_dsq and stable, determine the destination */
|
||||
dst_dsq = find_dsq_for_dispatch(this_rq, dsq_id, p);
|
||||
|
||||
if (dst_dsq->id == SCX_DSQ_LOCAL) {
|
||||
dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
|
||||
if (!task_can_run_on_remote_rq(p, dst_rq, true)) {
|
||||
dst_dsq = &scx_dsq_global;
|
||||
dst_rq = src_rq;
|
||||
}
|
||||
} else {
|
||||
/* no need to migrate if destination is a non-local DSQ */
|
||||
dst_rq = src_rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move @p into $dst_dsq. If $dst_dsq is the local DSQ of a different
|
||||
* CPU, @p will be migrated.
|
||||
*/
|
||||
if (dst_dsq->id == SCX_DSQ_LOCAL) {
|
||||
/* @p is going from a non-local DSQ to a local DSQ */
|
||||
if (src_rq == dst_rq) {
|
||||
task_unlink_from_dsq(p, src_dsq);
|
||||
move_local_task_to_local_dsq(p, enq_flags,
|
||||
src_dsq, dst_rq);
|
||||
raw_spin_unlock(&src_dsq->lock);
|
||||
} else {
|
||||
raw_spin_unlock(&src_dsq->lock);
|
||||
move_remote_task_to_local_dsq(p, enq_flags,
|
||||
src_rq, dst_rq);
|
||||
locked_rq = dst_rq;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* @p is going from a non-local DSQ to a non-local DSQ. As
|
||||
* $src_dsq is already locked, do an abbreviated dequeue.
|
||||
*/
|
||||
task_unlink_from_dsq(p, src_dsq);
|
||||
p->scx.dsq = NULL;
|
||||
raw_spin_unlock(&src_dsq->lock);
|
||||
|
||||
if (kit->cursor.flags & __SCX_DSQ_ITER_HAS_VTIME)
|
||||
p->scx.dsq_vtime = kit->vtime;
|
||||
dispatch_enqueue(dst_dsq, p, enq_flags);
|
||||
}
|
||||
|
||||
if (kit->cursor.flags & __SCX_DSQ_ITER_HAS_SLICE)
|
||||
p->scx.slice = kit->slice;
|
||||
|
||||
dispatched = true;
|
||||
out:
|
||||
if (in_balance) {
|
||||
if (this_rq != locked_rq) {
|
||||
raw_spin_rq_unlock(locked_rq);
|
||||
raw_spin_rq_lock(this_rq);
|
||||
}
|
||||
} else {
|
||||
raw_spin_rq_unlock_irqrestore(locked_rq, flags);
|
||||
}
|
||||
|
||||
kit->cursor.flags &= ~(__SCX_DSQ_ITER_HAS_SLICE |
|
||||
__SCX_DSQ_ITER_HAS_VTIME);
|
||||
return dispatched;
|
||||
}
|
||||
|
||||
__bpf_kfunc_start_defs();
|
||||
|
||||
/**
|
||||
@ -6042,12 +6166,112 @@ __bpf_kfunc bool scx_bpf_consume(u64 dsq_id)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_bpf_dispatch_from_dsq_set_slice - Override slice when dispatching from DSQ
|
||||
* @it__iter: DSQ iterator in progress
|
||||
* @slice: duration the dispatched task can run for in nsecs
|
||||
*
|
||||
* Override the slice of the next task that will be dispatched from @it__iter
|
||||
* using scx_bpf_dispatch_from_dsq[_vtime](). If this function is not called,
|
||||
* the previous slice duration is kept.
|
||||
*/
|
||||
__bpf_kfunc void scx_bpf_dispatch_from_dsq_set_slice(
|
||||
struct bpf_iter_scx_dsq *it__iter, u64 slice)
|
||||
{
|
||||
struct bpf_iter_scx_dsq_kern *kit = (void *)it__iter;
|
||||
|
||||
kit->slice = slice;
|
||||
kit->cursor.flags |= __SCX_DSQ_ITER_HAS_SLICE;
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_bpf_dispatch_from_dsq_set_vtime - Override vtime when dispatching from DSQ
|
||||
* @it__iter: DSQ iterator in progress
|
||||
* @vtime: task's ordering inside the vtime-sorted queue of the target DSQ
|
||||
*
|
||||
* Override the vtime of the next task that will be dispatched from @it__iter
|
||||
* using scx_bpf_dispatch_from_dsq_vtime(). If this function is not called, the
|
||||
* previous slice vtime is kept. If scx_bpf_dispatch_from_dsq() is used to
|
||||
* dispatch the next task, the override is ignored and cleared.
|
||||
*/
|
||||
__bpf_kfunc void scx_bpf_dispatch_from_dsq_set_vtime(
|
||||
struct bpf_iter_scx_dsq *it__iter, u64 vtime)
|
||||
{
|
||||
struct bpf_iter_scx_dsq_kern *kit = (void *)it__iter;
|
||||
|
||||
kit->vtime = vtime;
|
||||
kit->cursor.flags |= __SCX_DSQ_ITER_HAS_VTIME;
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_bpf_dispatch_from_dsq - Move a task from DSQ iteration to a DSQ
|
||||
* @it__iter: DSQ iterator in progress
|
||||
* @p: task to transfer
|
||||
* @dsq_id: DSQ to move @p to
|
||||
* @enq_flags: SCX_ENQ_*
|
||||
*
|
||||
* Transfer @p which is on the DSQ currently iterated by @it__iter to the DSQ
|
||||
* specified by @dsq_id. All DSQs - local DSQs, global DSQ and user DSQs - can
|
||||
* be the destination.
|
||||
*
|
||||
* For the transfer to be successful, @p must still be on the DSQ and have been
|
||||
* queued before the DSQ iteration started. This function doesn't care whether
|
||||
* @p was obtained from the DSQ iteration. @p just has to be on the DSQ and have
|
||||
* been queued before the iteration started.
|
||||
*
|
||||
* @p's slice is kept by default. Use scx_bpf_dispatch_from_dsq_set_slice() to
|
||||
* update.
|
||||
*
|
||||
* Can be called from ops.dispatch() or any BPF context which doesn't hold a rq
|
||||
* lock (e.g. BPF timers or SYSCALL programs).
|
||||
*
|
||||
* Returns %true if @p has been consumed, %false if @p had already been consumed
|
||||
* or dequeued.
|
||||
*/
|
||||
__bpf_kfunc bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter,
|
||||
struct task_struct *p, u64 dsq_id,
|
||||
u64 enq_flags)
|
||||
{
|
||||
return scx_dispatch_from_dsq((struct bpf_iter_scx_dsq_kern *)it__iter,
|
||||
p, dsq_id, enq_flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* scx_bpf_dispatch_vtime_from_dsq - Move a task from DSQ iteration to a PRIQ DSQ
|
||||
* @it__iter: DSQ iterator in progress
|
||||
* @p: task to transfer
|
||||
* @dsq_id: DSQ to move @p to
|
||||
* @enq_flags: SCX_ENQ_*
|
||||
*
|
||||
* Transfer @p which is on the DSQ currently iterated by @it__iter to the
|
||||
* priority queue of the DSQ specified by @dsq_id. The destination must be a
|
||||
* user DSQ as only user DSQs support priority queue.
|
||||
*
|
||||
* @p's slice and vtime are kept by default. Use
|
||||
* scx_bpf_dispatch_from_dsq_set_slice() and
|
||||
* scx_bpf_dispatch_from_dsq_set_vtime() to update.
|
||||
*
|
||||
* All other aspects are identical to scx_bpf_dispatch_from_dsq(). See
|
||||
* scx_bpf_dispatch_vtime() for more information on @vtime.
|
||||
*/
|
||||
__bpf_kfunc bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter,
|
||||
struct task_struct *p, u64 dsq_id,
|
||||
u64 enq_flags)
|
||||
{
|
||||
return scx_dispatch_from_dsq((struct bpf_iter_scx_dsq_kern *)it__iter,
|
||||
p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ);
|
||||
}
|
||||
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
BTF_KFUNCS_START(scx_kfunc_ids_dispatch)
|
||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots)
|
||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel)
|
||||
BTF_ID_FLAGS(func, scx_bpf_consume)
|
||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice)
|
||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime)
|
||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
|
||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
|
||||
BTF_KFUNCS_END(scx_kfunc_ids_dispatch)
|
||||
|
||||
static const struct btf_kfunc_id_set scx_kfunc_set_dispatch = {
|
||||
@ -6144,6 +6368,8 @@ __bpf_kfunc_end_defs();
|
||||
|
||||
BTF_KFUNCS_START(scx_kfunc_ids_unlocked)
|
||||
BTF_ID_FLAGS(func, scx_bpf_create_dsq, KF_SLEEPABLE)
|
||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
|
||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
|
||||
BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
|
||||
|
||||
static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
|
||||
|
@ -35,6 +35,10 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vt
|
||||
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
|
||||
void scx_bpf_dispatch_cancel(void) __ksym;
|
||||
bool scx_bpf_consume(u64 dsq_id) __ksym;
|
||||
void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym;
|
||||
void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym;
|
||||
bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
|
||||
bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
|
||||
u32 scx_bpf_reenqueue_local(void) __ksym;
|
||||
void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
|
||||
s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
|
||||
@ -63,6 +67,12 @@ s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
|
||||
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
|
||||
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym;
|
||||
|
||||
/*
|
||||
* Use the following as @it__iter when calling
|
||||
* scx_bpf_dispatch[_vtime]_from_dsq() from within bpf_for_each() loops.
|
||||
*/
|
||||
#define BPF_FOR_EACH_ITER (&___it)
|
||||
|
||||
static inline __attribute__((format(printf, 1, 2)))
|
||||
void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user