perf ftrace: Add 'profile' command

The 'perf ftrace profile' command is to get function execution profiles
using function-graph tracer so that users can see the total, average,
max execution time as well as the number of invocations easily.

The following is a profile for the perf_event_open syscall.

  $ sudo perf ftrace profile -G __x64_sys_perf_event_open -- \
    perf stat -e cycles -C1 true 2> /dev/null | head
  # Total (us)   Avg (us)   Max (us)      Count   Function
        65.611     65.611     65.611          1   __x64_sys_perf_event_open
        30.527     30.527     30.527          1   anon_inode_getfile
        30.260     30.260     30.260          1   __anon_inode_getfile
        29.700     29.700     29.700          1   alloc_file_pseudo
        17.578     17.578     17.578          1   d_alloc_pseudo
        17.382     17.382     17.382          1   __d_alloc
        16.738     16.738     16.738          1   kmem_cache_alloc_lru
        15.686     15.686     15.686          1   perf_event_alloc
        14.012      7.006     11.264          2   obj_cgroup_charge
  #

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Changbin Du <changbin.du@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: https://lore.kernel.org/lkml/20240729004127.238611-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Namhyung Kim 2024-07-28 17:41:26 -07:00 committed by Arnaldo Carvalho de Melo
parent 608585f43f
commit 0f223813ed
3 changed files with 359 additions and 3 deletions

View File

@ -9,7 +9,7 @@ perf-ftrace - simple wrapper for kernel's ftrace functionality
SYNOPSIS
--------
[verse]
'perf ftrace' {trace|latency} <command>
'perf ftrace' {trace|latency|profile} <command>
DESCRIPTION
-----------
@ -23,6 +23,9 @@ kernel's ftrace infrastructure.
'perf ftrace latency' calculates execution latency of a given function
(optionally with BPF) and display it as a histogram.
'perf ftrace profile' show a execution profile for each function including
total, average, max time and the number of calls.
The following options apply to perf ftrace.
COMMON OPTIONS
@ -146,6 +149,43 @@ OPTIONS for 'perf ftrace latency'
Use nano-second instead of micro-second as a base unit of the histogram.
OPTIONS for 'perf ftrace profile'
---------------------------------
-T::
--trace-funcs=::
Set function filter on the given function (or a glob pattern).
Multiple functions can be given by using this option more than once.
The function argument also can be a glob pattern. It will be passed
to 'set_ftrace_filter' in tracefs.
-N::
--notrace-funcs=::
Do not trace functions given by the argument. Like -T option, this
can be used more than once to specify multiple functions (or glob
patterns). It will be passed to 'set_ftrace_notrace' in tracefs.
-G::
--graph-funcs=::
Set graph filter on the given function (or a glob pattern). This is
useful to trace for functions executed from the given function. This
can be used more than once to specify multiple functions. It will be
passed to 'set_graph_function' in tracefs.
-g::
--nograph-funcs=::
Set graph notrace filter on the given function (or a glob pattern).
Like -G option, this is useful for the function_graph tracer only and
disables tracing for function executed from the given function. This
can be used more than once to specify multiple functions. It will be
passed to 'set_graph_notrace' in tracefs.
-m::
--buffer-size::
Set the size of per-cpu tracing buffer, <size> is expected to
be a number with appended unit character - B/K/M/G.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-trace[1]

View File

@ -13,6 +13,7 @@
#include <signal.h>
#include <stdlib.h>
#include <fcntl.h>
#include <inttypes.h>
#include <math.h>
#include <poll.h>
#include <ctype.h>
@ -22,15 +23,18 @@
#include "debug.h"
#include <subcmd/pager.h>
#include <subcmd/parse-options.h>
#include <api/io.h>
#include <api/fs/tracing_path.h>
#include "evlist.h"
#include "target.h"
#include "cpumap.h"
#include "hashmap.h"
#include "thread_map.h"
#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
#include "util/ftrace.h"
#include "util/stat.h"
#include "util/units.h"
#include "util/parse-sublevel-options.h"
@ -959,6 +963,294 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
return (done && !workload_exec_errno) ? 0 : -1;
}
static size_t profile_hash(long func, void *ctx __maybe_unused)
{
return str_hash((char *)func);
}
static bool profile_equal(long func1, long func2, void *ctx __maybe_unused)
{
return !strcmp((char *)func1, (char *)func2);
}
static int prepare_func_profile(struct perf_ftrace *ftrace)
{
ftrace->tracer = "function_graph";
ftrace->graph_tail = 1;
ftrace->profile_hash = hashmap__new(profile_hash, profile_equal, NULL);
if (ftrace->profile_hash == NULL)
return -ENOMEM;
return 0;
}
/* This is saved in a hashmap keyed by the function name */
struct ftrace_profile_data {
struct stats st;
};
static int add_func_duration(struct perf_ftrace *ftrace, char *func, double time_ns)
{
struct ftrace_profile_data *prof = NULL;
if (!hashmap__find(ftrace->profile_hash, func, &prof)) {
char *key = strdup(func);
if (key == NULL)
return -ENOMEM;
prof = zalloc(sizeof(*prof));
if (prof == NULL) {
free(key);
return -ENOMEM;
}
init_stats(&prof->st);
hashmap__add(ftrace->profile_hash, key, prof);
}
update_stats(&prof->st, time_ns);
return 0;
}
/*
* The ftrace function_graph text output normally looks like below:
*
* CPU DURATION FUNCTION
*
* 0) | syscall_trace_enter.isra.0() {
* 0) | __audit_syscall_entry() {
* 0) | auditd_test_task() {
* 0) 0.271 us | __rcu_read_lock();
* 0) 0.275 us | __rcu_read_unlock();
* 0) 1.254 us | } /\* auditd_test_task *\/
* 0) 0.279 us | ktime_get_coarse_real_ts64();
* 0) 2.227 us | } /\* __audit_syscall_entry *\/
* 0) 2.713 us | } /\* syscall_trace_enter.isra.0 *\/
*
* Parse the line and get the duration and function name.
*/
static int parse_func_duration(struct perf_ftrace *ftrace, char *line, size_t len)
{
char *p;
char *func;
double duration;
/* skip CPU */
p = strchr(line, ')');
if (p == NULL)
return 0;
/* get duration */
p = skip_spaces(p + 1);
/* no duration? */
if (p == NULL || *p == '|')
return 0;
/* skip markers like '*' or '!' for longer than ms */
if (!isdigit(*p))
p++;
duration = strtod(p, &p);
if (strncmp(p, " us", 3)) {
pr_debug("non-usec time found.. ignoring\n");
return 0;
}
/*
* profile stat keeps the max and min values as integer,
* convert to nsec time so that we can have accurate max.
*/
duration *= 1000;
/* skip to the pipe */
while (p < line + len && *p != '|')
p++;
if (*p++ != '|')
return -EINVAL;
/* get function name */
func = skip_spaces(p);
/* skip the closing bracket and the start of comment */
if (*func == '}')
func += 5;
/* remove semi-colon or end of comment at the end */
p = line + len - 1;
while (!isalnum(*p) && *p != ']') {
*p = '\0';
--p;
}
return add_func_duration(ftrace, func, duration);
}
static int cmp_profile_data(const void *a, const void *b)
{
const struct hashmap_entry *e1 = *(const struct hashmap_entry **)a;
const struct hashmap_entry *e2 = *(const struct hashmap_entry **)b;
struct ftrace_profile_data *p1 = e1->pvalue;
struct ftrace_profile_data *p2 = e2->pvalue;
/* compare by total time */
if ((p1->st.n * p1->st.mean) > (p2->st.n * p2->st.mean))
return -1;
else
return 1;
}
static void print_profile_result(struct perf_ftrace *ftrace)
{
struct hashmap_entry *entry, **profile;
size_t i, nr, bkt;
nr = hashmap__size(ftrace->profile_hash);
if (nr == 0)
return;
profile = calloc(nr, sizeof(*profile));
if (profile == NULL) {
pr_err("failed to allocate memory for the result\n");
return;
}
i = 0;
hashmap__for_each_entry(ftrace->profile_hash, entry, bkt)
profile[i++] = entry;
assert(i == nr);
//cmp_profile_data(profile[0], profile[1]);
qsort(profile, nr, sizeof(*profile), cmp_profile_data);
printf("# %10s %10s %10s %10s %s\n",
"Total (us)", "Avg (us)", "Max (us)", "Count", "Function");
for (i = 0; i < nr; i++) {
const char *name = profile[i]->pkey;
struct ftrace_profile_data *p = profile[i]->pvalue;
printf("%12.3f %10.3f %6"PRIu64".%03"PRIu64" %10.0f %s\n",
p->st.n * p->st.mean / 1000, p->st.mean / 1000,
p->st.max / 1000, p->st.max % 1000, p->st.n, name);
}
free(profile);
hashmap__for_each_entry(ftrace->profile_hash, entry, bkt) {
free((char *)entry->pkey);
free(entry->pvalue);
}
hashmap__free(ftrace->profile_hash);
ftrace->profile_hash = NULL;
}
static int __cmd_profile(struct perf_ftrace *ftrace)
{
char *trace_file;
int trace_fd;
char buf[4096];
struct io io;
char *line = NULL;
size_t line_len = 0;
if (prepare_func_profile(ftrace) < 0) {
pr_err("failed to prepare func profiler\n");
goto out;
}
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
goto out;
}
/* reset ftrace buffer */
if (write_tracing_file("trace", "0") < 0)
goto out;
if (set_tracing_options(ftrace) < 0)
return -1;
if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
goto out_reset;
}
setup_pager();
trace_file = get_tracing_file("trace_pipe");
if (!trace_file) {
pr_err("failed to open trace_pipe\n");
goto out_reset;
}
trace_fd = open(trace_file, O_RDONLY);
put_tracing_file(trace_file);
if (trace_fd < 0) {
pr_err("failed to open trace_pipe\n");
goto out_reset;
}
fcntl(trace_fd, F_SETFL, O_NONBLOCK);
if (write_tracing_file("tracing_on", "1") < 0) {
pr_err("can't enable tracing\n");
goto out_close_fd;
}
evlist__start_workload(ftrace->evlist);
io__init(&io, trace_fd, buf, sizeof(buf));
io.timeout_ms = -1;
while (!done && !io.eof) {
if (io__getline(&io, &line, &line_len) < 0)
break;
if (parse_func_duration(ftrace, line, line_len) < 0)
break;
}
write_tracing_file("tracing_on", "0");
if (workload_exec_errno) {
const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
/* flush stdout first so below error msg appears at the end. */
fflush(stdout);
pr_err("workload failed: %s\n", emsg);
goto out_free_line;
}
/* read remaining buffer contents */
io.timeout_ms = 0;
while (!io.eof) {
if (io__getline(&io, &line, &line_len) < 0)
break;
if (parse_func_duration(ftrace, line, line_len) < 0)
break;
}
print_profile_result(ftrace);
out_free_line:
free(line);
out_close_fd:
close(trace_fd);
out_reset:
reset_tracing_files(ftrace);
out:
return (done && !workload_exec_errno) ? 0 : -1;
}
static int perf_ftrace_config(const char *var, const char *value, void *cb)
{
struct perf_ftrace *ftrace = cb;
@ -1126,6 +1418,7 @@ enum perf_ftrace_subcommand {
PERF_FTRACE_NONE,
PERF_FTRACE_TRACE,
PERF_FTRACE_LATENCY,
PERF_FTRACE_PROFILE,
};
int cmd_ftrace(int argc, const char **argv)
@ -1191,13 +1484,28 @@ int cmd_ftrace(int argc, const char **argv)
"Use nano-second histogram"),
OPT_PARENT(common_options),
};
const struct option profile_options[] = {
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
"Trace given functions using function tracer",
parse_filter_func),
OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
"Do not trace given functions", parse_filter_func),
OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
"Trace given functions using function_graph tracer",
parse_filter_func),
OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
"Set nograph filter on given functions", parse_filter_func),
OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
"Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
OPT_PARENT(common_options),
};
const struct option *options = ftrace_options;
const char * const ftrace_usage[] = {
"perf ftrace [<options>] [<command>]",
"perf ftrace [<options>] -- [<command>] [<options>]",
"perf ftrace {trace|latency} [<options>] [<command>]",
"perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
"perf ftrace {trace|latency|profile} [<options>] [<command>]",
"perf ftrace {trace|latency|profile} [<options>] -- [<command>] [<options>]",
NULL
};
enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
@ -1226,6 +1534,9 @@ int cmd_ftrace(int argc, const char **argv)
} else if (!strcmp(argv[1], "latency")) {
subcmd = PERF_FTRACE_LATENCY;
options = latency_options;
} else if (!strcmp(argv[1], "profile")) {
subcmd = PERF_FTRACE_PROFILE;
options = profile_options;
}
if (subcmd != PERF_FTRACE_NONE) {
@ -1261,6 +1572,9 @@ int cmd_ftrace(int argc, const char **argv)
}
cmd_func = __cmd_latency;
break;
case PERF_FTRACE_PROFILE:
cmd_func = __cmd_profile;
break;
case PERF_FTRACE_NONE:
default:
pr_err("Invalid subcommand\n");

View File

@ -6,6 +6,7 @@
#include "target.h"
struct evlist;
struct hashamp;
struct perf_ftrace {
struct evlist *evlist;
@ -15,6 +16,7 @@ struct perf_ftrace {
struct list_head notrace;
struct list_head graph_funcs;
struct list_head nograph_funcs;
struct hashmap *profile_hash;
unsigned long percpu_buffer_size;
bool inherit;
bool use_nsec;