mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-20 04:19:41 +00:00
ac5a23b2f2
The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf ftrace latency --use-bpf -T schedule ^C# DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 us | 0 | | 2 - 4 us | 0 | | 4 - 8 us | 0 | | 8 - 16 us | 1 | | 16 - 32 us | 5 | | 32 - 64 us | 2 | | 64 - 128 us | 6 | | 128 - 256 us | 7 | | 256 - 512 us | 5 | | 512 - 1024 us | 22 | # | 1 - 2 ms | 36 | ## | 2 - 4 ms | 68 | ##### | 4 - 8 ms | 22 | # | 8 - 16 ms | 91 | ####### | 16 - 32 ms | 11 | | 32 - 64 ms | 26 | ## | 64 - 128 ms | 213 | ################# | 128 - 256 ms | 19 | # | 256 - 512 ms | 14 | # | 512 - 1024 ms | 5 | | 1 - ... s | 8 | | root@x1:~# root@x1:~# perf trace -e bpf perf ftrace latency --use-bpf -T schedule 0.000 ( 0.015 ms): perf/2944525 bpf(cmd: 36, uattr: 0x7ffe80de7b40, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.025 ( 0.102 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7870, size: 148) = 8 0.136 ( 0.026 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7930, size: 148) = 8 0.174 ( 0.026 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de77e0, size: 148) = 8 0.205 ( 0.010 ms): perf/2944525 bpf(uattr: 0x7ffe80de7990, size: 80) = 8 0.227 ( 0.011 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7810, size: 40) = 8 0.244 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7880, size: 40) = 8 0.257 ( 0.006 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7660, size: 40) = 8 0.265 ( 0.058 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7730, size: 148) = 9 0.330 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de78e0, size: 40) = 8 0.337 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7890, size: 40) = 8 0.343 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7880, size: 40) = 8 0.349 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de78b0, size: 40) = 8 0.355 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7890, size: 40) = 8 0.361 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de78b0, size: 40) = 8 0.367 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7880, size: 40) = 8 0.373 ( 0.014 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7a00, size: 40) = 8 0.390 ( 0.358 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80) = 9 0.763 ( 0.014 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80) = 9 0.783 ( 0.011 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80) = 9 0.798 ( 0.017 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80) = 9 0.819 ( 0.003 ms): perf/2944525 bpf(uattr: 0x7ffe80de7700, size: 80) = 9 0.824 ( 0.047 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de76c0, size: 148) = 10 0.878 ( 0.008 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80) = 9 0.891 ( 0.014 ms): perf/2944525 bpf(cmd: MAP_UPDATE_ELEM, uattr: 0x7ffe80de79e0, size: 32) = 0 0.910 ( 0.103 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7880, size: 148) = 9 1.016 ( 0.143 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7880, size: 148) = 10 3.777 ( 0.068 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7570, size: 148) = 12 3.848 ( 0.003 ms): perf/2944525 bpf(cmd: LINK_CREATE, uattr: 0x7ffe80de7550, size: 64) = -1 EBADF (Bad file descriptor) 3.859 ( 0.006 ms): perf/2944525 bpf(cmd: LINK_CREATE, uattr: 0x7ffe80de77c0, size: 64) = 12 6.504 ( 0.010 ms): perf/2944525 bpf(cmd: LINK_CREATE, uattr: 0x7ffe80de77c0, size: 64) = 14 ^C# DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 us | 0 | | 2 - 4 us | 1 | | 4 - 8 us | 3 | | 8 - 16 us | 3 | | 16 - 32 us | 11 | | 32 - 64 us | 9 | | 64 - 128 us | 17 | | 128 - 256 us | 30 | # | 256 - 512 us | 20 | | 512 - 1024 us | 42 | # | 1 - 2 ms | 151 | ###### | 2 - 4 ms | 106 | #### | 4 - 8 ms | 18 | | 8 - 16 ms | 149 | ###### | 16 - 32 ms | 30 | # | 32 - 64 ms | 17 | | 64 - 128 ms | 360 | ############### | 128 - 256 ms | 52 | ## | 256 - 512 ms | 18 | | 512 - 1024 ms | 28 | # | 1 - ... s | 5 | | root@x1:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240902200515.2103769-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
155 lines
3.5 KiB
C
155 lines
3.5 KiB
C
#include <stdio.h>
|
|
#include <fcntl.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include "util/ftrace.h"
|
|
#include "util/cpumap.h"
|
|
#include "util/thread_map.h"
|
|
#include "util/debug.h"
|
|
#include "util/evlist.h"
|
|
#include "util/bpf_counter.h"
|
|
|
|
#include "util/bpf_skel/func_latency.skel.h"
|
|
|
|
static struct func_latency_bpf *skel;
|
|
|
|
int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
|
|
{
|
|
int fd, err;
|
|
int i, ncpus = 1, ntasks = 1;
|
|
struct filter_entry *func;
|
|
|
|
if (!list_is_singular(&ftrace->filters)) {
|
|
pr_err("ERROR: %s target function(s).\n",
|
|
list_empty(&ftrace->filters) ? "No" : "Too many");
|
|
return -1;
|
|
}
|
|
|
|
func = list_first_entry(&ftrace->filters, struct filter_entry, list);
|
|
|
|
skel = func_latency_bpf__open();
|
|
if (!skel) {
|
|
pr_err("Failed to open func latency skeleton\n");
|
|
return -1;
|
|
}
|
|
|
|
/* don't need to set cpu filter for system-wide mode */
|
|
if (ftrace->target.cpu_list) {
|
|
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
|
|
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
|
|
skel->rodata->has_cpu = 1;
|
|
}
|
|
|
|
if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
|
|
ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
|
|
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
|
|
skel->rodata->has_task = 1;
|
|
}
|
|
|
|
skel->rodata->use_nsec = ftrace->use_nsec;
|
|
|
|
set_max_rlimit();
|
|
|
|
err = func_latency_bpf__load(skel);
|
|
if (err) {
|
|
pr_err("Failed to load func latency skeleton\n");
|
|
goto out;
|
|
}
|
|
|
|
if (ftrace->target.cpu_list) {
|
|
u32 cpu;
|
|
u8 val = 1;
|
|
|
|
fd = bpf_map__fd(skel->maps.cpu_filter);
|
|
|
|
for (i = 0; i < ncpus; i++) {
|
|
cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu;
|
|
bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
|
|
u32 pid;
|
|
u8 val = 1;
|
|
|
|
fd = bpf_map__fd(skel->maps.task_filter);
|
|
|
|
for (i = 0; i < ntasks; i++) {
|
|
pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
|
|
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
|
|
false, func->name);
|
|
if (IS_ERR(skel->links.func_begin)) {
|
|
pr_err("Failed to attach fentry program\n");
|
|
err = PTR_ERR(skel->links.func_begin);
|
|
goto out;
|
|
}
|
|
|
|
skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
|
|
true, func->name);
|
|
if (IS_ERR(skel->links.func_end)) {
|
|
pr_err("Failed to attach fexit program\n");
|
|
err = PTR_ERR(skel->links.func_end);
|
|
goto out;
|
|
}
|
|
|
|
/* XXX: we don't actually use this fd - just for poll() */
|
|
return open("/dev/null", O_RDONLY);
|
|
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
skel->bss->enabled = 1;
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
skel->bss->enabled = 0;
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
|
|
int buckets[])
|
|
{
|
|
int i, fd, err;
|
|
u32 idx;
|
|
u64 *hist;
|
|
int ncpus = cpu__max_cpu().cpu;
|
|
|
|
fd = bpf_map__fd(skel->maps.latency);
|
|
|
|
hist = calloc(ncpus, sizeof(*hist));
|
|
if (hist == NULL)
|
|
return -ENOMEM;
|
|
|
|
for (idx = 0; idx < NUM_BUCKET; idx++) {
|
|
err = bpf_map_lookup_elem(fd, &idx, hist);
|
|
if (err) {
|
|
buckets[idx] = 0;
|
|
continue;
|
|
}
|
|
|
|
for (i = 0; i < ncpus; i++)
|
|
buckets[idx] += hist[i];
|
|
}
|
|
|
|
free(hist);
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
func_latency_bpf__destroy(skel);
|
|
return 0;
|
|
}
|