perf record offcpu: Constify control data for BPF

The control knobs set before loading BPF programs should be declared as
'const volatile' so that it can be optimized by the BPF core.

Committer testing:

  root@x1:~# perf record --off-cpu
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 1.807 MB perf.data (5645 samples) ]

  root@x1:~# perf evlist
  cpu_atom/cycles/P
  cpu_core/cycles/P
  offcpu-time
  dummy:u
  root@x1:~# perf evlist -v
  cpu_atom/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0xa00000000, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
  cpu_core/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x400000000, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
  offcpu-time: type: 1 (software), size: 136, config: 0xa (PERF_COUNT_SW_BPF_OUTPUT), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1
  dummy:u: type: 1 (software), size: 136, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|IDENTIFIER, read_format: ID|LOST, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1
  root@x1:~# perf trace -e bpf --max-events 5 perf record --off-cpu
       0.000 ( 0.015 ms): :2949124/2949124 bpf(cmd: 36, uattr: 0x7ffefc6dbe30, size: 8)          = -1 EOPNOTSUPP (Operation not supported)
       0.031 ( 0.115 ms): :2949124/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbb60, size: 148) = 14
       0.159 ( 0.037 ms): :2949124/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbc20, size: 148) = 14
      23.868 ( 0.144 ms): perf/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbad0, size: 148)     = 14
      24.027 ( 0.014 ms): perf/2949124 bpf(uattr: 0x7ffefc6dbc80, size: 80)                      = 14
  root@x1:~#

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240902200515.2103769-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Namhyung Kim 2024-09-02 13:05:15 -07:00 committed by Arnaldo Carvalho de Melo
parent 4afdc00c37
commit 8b3b1bb3ea
2 changed files with 13 additions and 12 deletions

View File

@ -73,14 +73,12 @@ static void off_cpu_start(void *arg)
struct evlist *evlist = arg; struct evlist *evlist = arg;
/* update task filter for the given workload */ /* update task filter for the given workload */
if (!skel->bss->has_cpu && !skel->bss->has_task && if (skel->rodata->has_task && skel->rodata->uses_tgid &&
perf_thread_map__pid(evlist->core.threads, 0) != -1) { perf_thread_map__pid(evlist->core.threads, 0) != -1) {
int fd; int fd;
u32 pid; u32 pid;
u8 val = 1; u8 val = 1;
skel->bss->has_task = 1;
skel->bss->uses_tgid = 1;
fd = bpf_map__fd(skel->maps.task_filter); fd = bpf_map__fd(skel->maps.task_filter);
pid = perf_thread_map__pid(evlist->core.threads, 0); pid = perf_thread_map__pid(evlist->core.threads, 0);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY); bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
@ -148,6 +146,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (target->cpu_list) { if (target->cpu_list) {
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
skel->rodata->has_cpu = 1;
} }
if (target->pid) { if (target->pid) {
@ -173,11 +172,16 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
ntasks = MAX_PROC; ntasks = MAX_PROC;
bpf_map__set_max_entries(skel->maps.task_filter, ntasks); bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
skel->rodata->has_task = 1;
skel->rodata->uses_tgid = 1;
} else if (target__has_task(target)) { } else if (target__has_task(target)) {
ntasks = perf_thread_map__nr(evlist->core.threads); ntasks = perf_thread_map__nr(evlist->core.threads);
bpf_map__set_max_entries(skel->maps.task_filter, ntasks); bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
skel->rodata->has_task = 1;
} else if (target__none(target)) { } else if (target__none(target)) {
bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC); bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC);
skel->rodata->has_task = 1;
skel->rodata->uses_tgid = 1;
} }
if (evlist__first(evlist)->cgrp) { if (evlist__first(evlist)->cgrp) {
@ -186,6 +190,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (!cgroup_is_v2("perf_event")) if (!cgroup_is_v2("perf_event"))
skel->rodata->uses_cgroup_v1 = true; skel->rodata->uses_cgroup_v1 = true;
skel->rodata->has_cgroup = 1;
} }
if (opts->record_cgroup) { if (opts->record_cgroup) {
@ -208,7 +213,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
u32 cpu; u32 cpu;
u8 val = 1; u8 val = 1;
skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter); fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) { for (i = 0; i < ncpus; i++) {
@ -220,8 +224,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
if (target->pid) { if (target->pid) {
u8 val = 1; u8 val = 1;
skel->bss->has_task = 1;
skel->bss->uses_tgid = 1;
fd = bpf_map__fd(skel->maps.task_filter); fd = bpf_map__fd(skel->maps.task_filter);
strlist__for_each_entry(pos, pid_slist) { strlist__for_each_entry(pos, pid_slist) {
@ -240,7 +242,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
u32 pid; u32 pid;
u8 val = 1; u8 val = 1;
skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter); fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) { for (i = 0; i < ntasks; i++) {
@ -253,7 +254,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
struct evsel *evsel; struct evsel *evsel;
u8 val = 1; u8 val = 1;
skel->bss->has_cgroup = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter); fd = bpf_map__fd(skel->maps.cgroup_filter);
evlist__for_each_entry(evlist, evsel) { evlist__for_each_entry(evlist, evsel) {

View File

@ -85,10 +85,11 @@ struct task_struct___old {
} __attribute__((preserve_access_index)); } __attribute__((preserve_access_index));
int enabled = 0; int enabled = 0;
int has_cpu = 0;
int has_task = 0; const volatile int has_cpu = 0;
int has_cgroup = 0; const volatile int has_task = 0;
int uses_tgid = 0; const volatile int has_cgroup = 0;
const volatile int uses_tgid = 0;
const volatile bool has_prev_state = false; const volatile bool has_prev_state = false;
const volatile bool needs_cgroup = false; const volatile bool needs_cgroup = false;