mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 18:55:12 +00:00
245254f708
Scheduler classes are strictly ordered and when a higher priority class has tasks to run, the lower priority ones lose access to the CPU. Being able to monitor and act on these events are necessary for use cases includling strict core-scheduling and latency management. This patch adds two operations ops.cpu_acquire() and .cpu_release(). The former is invoked when a CPU becomes available to the BPF scheduler and the opposite for the latter. This patch also implements scx_bpf_reenqueue_local() which can be called from .cpu_release() to trigger requeueing of all tasks in the local dsq of the CPU so that the tasks can be reassigned to other available CPUs. scx_pair is updated to use .cpu_acquire/release() along with %SCX_KICK_WAIT to make the pair scheduling guarantee strict even when a CPU is preempted by a higher priority scheduler class. scx_qmap is updated to use .cpu_acquire/release() to empty the local dsq of a preempted CPU. A similar approach can be adopted by BPF schedulers that want to have a tight control over latency. v4: Use the new SCX_KICK_IDLE to wake up a CPU after re-enqueueing. v3: Drop the const qualifier from scx_cpu_release_args.task. BPF enforces access control through the verifier, so the qualifier isn't actually operative and only gets in the way when interacting with various helpers. v2: Add p->scx.kf_mask annotation to allow calling scx_bpf_reenqueue_local() from ops.cpu_release() nested inside ops.init() and other sleepable operations. Signed-off-by: David Vernet <dvernet@meta.com> Reviewed-by: Tejun Heo <tj@kernel.org> Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Josh Don <joshdon@google.com> Acked-by: Hao Luo <haoluo@google.com> Acked-by: Barret Rhoden <brho@google.com>
127 lines
3.5 KiB
C
127 lines
3.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
|
|
* Copyright (c) 2022 Tejun Heo <tj@kernel.org>
|
|
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
|
|
*/
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <inttypes.h>
|
|
#include <signal.h>
|
|
#include <libgen.h>
|
|
#include <bpf/bpf.h>
|
|
#include <scx/common.h>
|
|
#include "scx_qmap.bpf.skel.h"
|
|
|
|
const char help_fmt[] =
|
|
"A simple five-level FIFO queue sched_ext scheduler.\n"
|
|
"\n"
|
|
"See the top-level comment in .bpf.c for more details.\n"
|
|
"\n"
|
|
"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-b COUNT]\n"
|
|
" [-d PID] [-D LEN] [-p] [-v]\n"
|
|
"\n"
|
|
" -s SLICE_US Override slice duration\n"
|
|
" -e COUNT Trigger scx_bpf_error() after COUNT enqueues\n"
|
|
" -t COUNT Stall every COUNT'th user thread\n"
|
|
" -T COUNT Stall every COUNT'th kernel thread\n"
|
|
" -l COUNT Trigger dispatch infinite looping after COUNT dispatches\n"
|
|
" -b COUNT Dispatch upto COUNT tasks together\n"
|
|
" -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n"
|
|
" -D LEN Set scx_exit_info.dump buffer length\n"
|
|
" -S Suppress qmap-specific debug dump\n"
|
|
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
|
|
" -v Print libbpf debug messages\n"
|
|
" -h Display this help and exit\n";
|
|
|
|
static bool verbose;
|
|
static volatile int exit_req;
|
|
|
|
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
|
|
{
|
|
if (level == LIBBPF_DEBUG && !verbose)
|
|
return 0;
|
|
return vfprintf(stderr, format, args);
|
|
}
|
|
|
|
static void sigint_handler(int dummy)
|
|
{
|
|
exit_req = 1;
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
struct scx_qmap *skel;
|
|
struct bpf_link *link;
|
|
int opt;
|
|
|
|
libbpf_set_print(libbpf_print_fn);
|
|
signal(SIGINT, sigint_handler);
|
|
signal(SIGTERM, sigint_handler);
|
|
|
|
skel = SCX_OPS_OPEN(qmap_ops, scx_qmap);
|
|
|
|
while ((opt = getopt(argc, argv, "s:e:t:T:l:b:d:D:Spvh")) != -1) {
|
|
switch (opt) {
|
|
case 's':
|
|
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
|
|
break;
|
|
case 'e':
|
|
skel->bss->test_error_cnt = strtoul(optarg, NULL, 0);
|
|
break;
|
|
case 't':
|
|
skel->rodata->stall_user_nth = strtoul(optarg, NULL, 0);
|
|
break;
|
|
case 'T':
|
|
skel->rodata->stall_kernel_nth = strtoul(optarg, NULL, 0);
|
|
break;
|
|
case 'l':
|
|
skel->rodata->dsp_inf_loop_after = strtoul(optarg, NULL, 0);
|
|
break;
|
|
case 'b':
|
|
skel->rodata->dsp_batch = strtoul(optarg, NULL, 0);
|
|
break;
|
|
case 'd':
|
|
skel->rodata->disallow_tgid = strtol(optarg, NULL, 0);
|
|
if (skel->rodata->disallow_tgid < 0)
|
|
skel->rodata->disallow_tgid = getpid();
|
|
break;
|
|
case 'D':
|
|
skel->struct_ops.qmap_ops->exit_dump_len = strtoul(optarg, NULL, 0);
|
|
break;
|
|
case 'S':
|
|
skel->rodata->suppress_dump = true;
|
|
break;
|
|
case 'p':
|
|
skel->struct_ops.qmap_ops->flags |= SCX_OPS_SWITCH_PARTIAL;
|
|
break;
|
|
case 'v':
|
|
verbose = true;
|
|
break;
|
|
default:
|
|
fprintf(stderr, help_fmt, basename(argv[0]));
|
|
return opt != 'h';
|
|
}
|
|
}
|
|
|
|
SCX_OPS_LOAD(skel, qmap_ops, scx_qmap, uei);
|
|
link = SCX_OPS_ATTACH(skel, qmap_ops, scx_qmap);
|
|
|
|
while (!exit_req && !UEI_EXITED(skel, uei)) {
|
|
long nr_enqueued = skel->bss->nr_enqueued;
|
|
long nr_dispatched = skel->bss->nr_dispatched;
|
|
|
|
printf("stats : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64"\n",
|
|
nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched,
|
|
skel->bss->nr_reenqueued, skel->bss->nr_dequeued);
|
|
fflush(stdout);
|
|
sleep(1);
|
|
}
|
|
|
|
bpf_link__destroy(link);
|
|
UEI_REPORT(skel, uei);
|
|
scx_qmap__destroy(skel);
|
|
return 0;
|
|
}
|