mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-29 17:25:38 +00:00
perf ftrace: Add 'profile' command
The 'perf ftrace profile' command is to get function execution profiles using function-graph tracer so that users can see the total, average, max execution time as well as the number of invocations easily. The following is a profile for the perf_event_open syscall. $ sudo perf ftrace profile -G __x64_sys_perf_event_open -- \ perf stat -e cycles -C1 true 2> /dev/null | head # Total (us) Avg (us) Max (us) Count Function 65.611 65.611 65.611 1 __x64_sys_perf_event_open 30.527 30.527 30.527 1 anon_inode_getfile 30.260 30.260 30.260 1 __anon_inode_getfile 29.700 29.700 29.700 1 alloc_file_pseudo 17.578 17.578 17.578 1 d_alloc_pseudo 17.382 17.382 17.382 1 __d_alloc 16.738 16.738 16.738 1 kmem_cache_alloc_lru 15.686 15.686 15.686 1 perf_event_alloc 14.012 7.006 11.264 2 obj_cgroup_charge # Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Changbin Du <changbin.du@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Rostedt (VMware) <rostedt@goodmis.org> Link: https://lore.kernel.org/lkml/20240729004127.238611-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
608585f43f
commit
0f223813ed
@ -9,7 +9,7 @@ perf-ftrace - simple wrapper for kernel's ftrace functionality
|
||||
SYNOPSIS
|
||||
--------
|
||||
[verse]
|
||||
'perf ftrace' {trace|latency} <command>
|
||||
'perf ftrace' {trace|latency|profile} <command>
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
@ -23,6 +23,9 @@ kernel's ftrace infrastructure.
|
||||
'perf ftrace latency' calculates execution latency of a given function
|
||||
(optionally with BPF) and display it as a histogram.
|
||||
|
||||
'perf ftrace profile' show a execution profile for each function including
|
||||
total, average, max time and the number of calls.
|
||||
|
||||
The following options apply to perf ftrace.
|
||||
|
||||
COMMON OPTIONS
|
||||
@ -146,6 +149,43 @@ OPTIONS for 'perf ftrace latency'
|
||||
Use nano-second instead of micro-second as a base unit of the histogram.
|
||||
|
||||
|
||||
OPTIONS for 'perf ftrace profile'
|
||||
---------------------------------
|
||||
|
||||
-T::
|
||||
--trace-funcs=::
|
||||
Set function filter on the given function (or a glob pattern).
|
||||
Multiple functions can be given by using this option more than once.
|
||||
The function argument also can be a glob pattern. It will be passed
|
||||
to 'set_ftrace_filter' in tracefs.
|
||||
|
||||
-N::
|
||||
--notrace-funcs=::
|
||||
Do not trace functions given by the argument. Like -T option, this
|
||||
can be used more than once to specify multiple functions (or glob
|
||||
patterns). It will be passed to 'set_ftrace_notrace' in tracefs.
|
||||
|
||||
-G::
|
||||
--graph-funcs=::
|
||||
Set graph filter on the given function (or a glob pattern). This is
|
||||
useful to trace for functions executed from the given function. This
|
||||
can be used more than once to specify multiple functions. It will be
|
||||
passed to 'set_graph_function' in tracefs.
|
||||
|
||||
-g::
|
||||
--nograph-funcs=::
|
||||
Set graph notrace filter on the given function (or a glob pattern).
|
||||
Like -G option, this is useful for the function_graph tracer only and
|
||||
disables tracing for function executed from the given function. This
|
||||
can be used more than once to specify multiple functions. It will be
|
||||
passed to 'set_graph_notrace' in tracefs.
|
||||
|
||||
-m::
|
||||
--buffer-size::
|
||||
Set the size of per-cpu tracing buffer, <size> is expected to
|
||||
be a number with appended unit character - B/K/M/G.
|
||||
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-trace[1]
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <signal.h>
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <math.h>
|
||||
#include <poll.h>
|
||||
#include <ctype.h>
|
||||
@ -22,15 +23,18 @@
|
||||
#include "debug.h"
|
||||
#include <subcmd/pager.h>
|
||||
#include <subcmd/parse-options.h>
|
||||
#include <api/io.h>
|
||||
#include <api/fs/tracing_path.h>
|
||||
#include "evlist.h"
|
||||
#include "target.h"
|
||||
#include "cpumap.h"
|
||||
#include "hashmap.h"
|
||||
#include "thread_map.h"
|
||||
#include "strfilter.h"
|
||||
#include "util/cap.h"
|
||||
#include "util/config.h"
|
||||
#include "util/ftrace.h"
|
||||
#include "util/stat.h"
|
||||
#include "util/units.h"
|
||||
#include "util/parse-sublevel-options.h"
|
||||
|
||||
@ -959,6 +963,294 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
|
||||
return (done && !workload_exec_errno) ? 0 : -1;
|
||||
}
|
||||
|
||||
static size_t profile_hash(long func, void *ctx __maybe_unused)
|
||||
{
|
||||
return str_hash((char *)func);
|
||||
}
|
||||
|
||||
static bool profile_equal(long func1, long func2, void *ctx __maybe_unused)
|
||||
{
|
||||
return !strcmp((char *)func1, (char *)func2);
|
||||
}
|
||||
|
||||
static int prepare_func_profile(struct perf_ftrace *ftrace)
|
||||
{
|
||||
ftrace->tracer = "function_graph";
|
||||
ftrace->graph_tail = 1;
|
||||
|
||||
ftrace->profile_hash = hashmap__new(profile_hash, profile_equal, NULL);
|
||||
if (ftrace->profile_hash == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This is saved in a hashmap keyed by the function name */
|
||||
struct ftrace_profile_data {
|
||||
struct stats st;
|
||||
};
|
||||
|
||||
static int add_func_duration(struct perf_ftrace *ftrace, char *func, double time_ns)
|
||||
{
|
||||
struct ftrace_profile_data *prof = NULL;
|
||||
|
||||
if (!hashmap__find(ftrace->profile_hash, func, &prof)) {
|
||||
char *key = strdup(func);
|
||||
|
||||
if (key == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
prof = zalloc(sizeof(*prof));
|
||||
if (prof == NULL) {
|
||||
free(key);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
init_stats(&prof->st);
|
||||
hashmap__add(ftrace->profile_hash, key, prof);
|
||||
}
|
||||
|
||||
update_stats(&prof->st, time_ns);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The ftrace function_graph text output normally looks like below:
|
||||
*
|
||||
* CPU DURATION FUNCTION
|
||||
*
|
||||
* 0) | syscall_trace_enter.isra.0() {
|
||||
* 0) | __audit_syscall_entry() {
|
||||
* 0) | auditd_test_task() {
|
||||
* 0) 0.271 us | __rcu_read_lock();
|
||||
* 0) 0.275 us | __rcu_read_unlock();
|
||||
* 0) 1.254 us | } /\* auditd_test_task *\/
|
||||
* 0) 0.279 us | ktime_get_coarse_real_ts64();
|
||||
* 0) 2.227 us | } /\* __audit_syscall_entry *\/
|
||||
* 0) 2.713 us | } /\* syscall_trace_enter.isra.0 *\/
|
||||
*
|
||||
* Parse the line and get the duration and function name.
|
||||
*/
|
||||
static int parse_func_duration(struct perf_ftrace *ftrace, char *line, size_t len)
|
||||
{
|
||||
char *p;
|
||||
char *func;
|
||||
double duration;
|
||||
|
||||
/* skip CPU */
|
||||
p = strchr(line, ')');
|
||||
if (p == NULL)
|
||||
return 0;
|
||||
|
||||
/* get duration */
|
||||
p = skip_spaces(p + 1);
|
||||
|
||||
/* no duration? */
|
||||
if (p == NULL || *p == '|')
|
||||
return 0;
|
||||
|
||||
/* skip markers like '*' or '!' for longer than ms */
|
||||
if (!isdigit(*p))
|
||||
p++;
|
||||
|
||||
duration = strtod(p, &p);
|
||||
|
||||
if (strncmp(p, " us", 3)) {
|
||||
pr_debug("non-usec time found.. ignoring\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* profile stat keeps the max and min values as integer,
|
||||
* convert to nsec time so that we can have accurate max.
|
||||
*/
|
||||
duration *= 1000;
|
||||
|
||||
/* skip to the pipe */
|
||||
while (p < line + len && *p != '|')
|
||||
p++;
|
||||
|
||||
if (*p++ != '|')
|
||||
return -EINVAL;
|
||||
|
||||
/* get function name */
|
||||
func = skip_spaces(p);
|
||||
|
||||
/* skip the closing bracket and the start of comment */
|
||||
if (*func == '}')
|
||||
func += 5;
|
||||
|
||||
/* remove semi-colon or end of comment at the end */
|
||||
p = line + len - 1;
|
||||
while (!isalnum(*p) && *p != ']') {
|
||||
*p = '\0';
|
||||
--p;
|
||||
}
|
||||
|
||||
return add_func_duration(ftrace, func, duration);
|
||||
}
|
||||
|
||||
static int cmp_profile_data(const void *a, const void *b)
|
||||
{
|
||||
const struct hashmap_entry *e1 = *(const struct hashmap_entry **)a;
|
||||
const struct hashmap_entry *e2 = *(const struct hashmap_entry **)b;
|
||||
struct ftrace_profile_data *p1 = e1->pvalue;
|
||||
struct ftrace_profile_data *p2 = e2->pvalue;
|
||||
|
||||
/* compare by total time */
|
||||
if ((p1->st.n * p1->st.mean) > (p2->st.n * p2->st.mean))
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void print_profile_result(struct perf_ftrace *ftrace)
|
||||
{
|
||||
struct hashmap_entry *entry, **profile;
|
||||
size_t i, nr, bkt;
|
||||
|
||||
nr = hashmap__size(ftrace->profile_hash);
|
||||
if (nr == 0)
|
||||
return;
|
||||
|
||||
profile = calloc(nr, sizeof(*profile));
|
||||
if (profile == NULL) {
|
||||
pr_err("failed to allocate memory for the result\n");
|
||||
return;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
hashmap__for_each_entry(ftrace->profile_hash, entry, bkt)
|
||||
profile[i++] = entry;
|
||||
|
||||
assert(i == nr);
|
||||
|
||||
//cmp_profile_data(profile[0], profile[1]);
|
||||
qsort(profile, nr, sizeof(*profile), cmp_profile_data);
|
||||
|
||||
printf("# %10s %10s %10s %10s %s\n",
|
||||
"Total (us)", "Avg (us)", "Max (us)", "Count", "Function");
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
const char *name = profile[i]->pkey;
|
||||
struct ftrace_profile_data *p = profile[i]->pvalue;
|
||||
|
||||
printf("%12.3f %10.3f %6"PRIu64".%03"PRIu64" %10.0f %s\n",
|
||||
p->st.n * p->st.mean / 1000, p->st.mean / 1000,
|
||||
p->st.max / 1000, p->st.max % 1000, p->st.n, name);
|
||||
}
|
||||
|
||||
free(profile);
|
||||
|
||||
hashmap__for_each_entry(ftrace->profile_hash, entry, bkt) {
|
||||
free((char *)entry->pkey);
|
||||
free(entry->pvalue);
|
||||
}
|
||||
|
||||
hashmap__free(ftrace->profile_hash);
|
||||
ftrace->profile_hash = NULL;
|
||||
}
|
||||
|
||||
static int __cmd_profile(struct perf_ftrace *ftrace)
|
||||
{
|
||||
char *trace_file;
|
||||
int trace_fd;
|
||||
char buf[4096];
|
||||
struct io io;
|
||||
char *line = NULL;
|
||||
size_t line_len = 0;
|
||||
|
||||
if (prepare_func_profile(ftrace) < 0) {
|
||||
pr_err("failed to prepare func profiler\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (reset_tracing_files(ftrace) < 0) {
|
||||
pr_err("failed to reset ftrace\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* reset ftrace buffer */
|
||||
if (write_tracing_file("trace", "0") < 0)
|
||||
goto out;
|
||||
|
||||
if (set_tracing_options(ftrace) < 0)
|
||||
return -1;
|
||||
|
||||
if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
|
||||
pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
|
||||
goto out_reset;
|
||||
}
|
||||
|
||||
setup_pager();
|
||||
|
||||
trace_file = get_tracing_file("trace_pipe");
|
||||
if (!trace_file) {
|
||||
pr_err("failed to open trace_pipe\n");
|
||||
goto out_reset;
|
||||
}
|
||||
|
||||
trace_fd = open(trace_file, O_RDONLY);
|
||||
|
||||
put_tracing_file(trace_file);
|
||||
|
||||
if (trace_fd < 0) {
|
||||
pr_err("failed to open trace_pipe\n");
|
||||
goto out_reset;
|
||||
}
|
||||
|
||||
fcntl(trace_fd, F_SETFL, O_NONBLOCK);
|
||||
|
||||
if (write_tracing_file("tracing_on", "1") < 0) {
|
||||
pr_err("can't enable tracing\n");
|
||||
goto out_close_fd;
|
||||
}
|
||||
|
||||
evlist__start_workload(ftrace->evlist);
|
||||
|
||||
io__init(&io, trace_fd, buf, sizeof(buf));
|
||||
io.timeout_ms = -1;
|
||||
|
||||
while (!done && !io.eof) {
|
||||
if (io__getline(&io, &line, &line_len) < 0)
|
||||
break;
|
||||
|
||||
if (parse_func_duration(ftrace, line, line_len) < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
write_tracing_file("tracing_on", "0");
|
||||
|
||||
if (workload_exec_errno) {
|
||||
const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
|
||||
/* flush stdout first so below error msg appears at the end. */
|
||||
fflush(stdout);
|
||||
pr_err("workload failed: %s\n", emsg);
|
||||
goto out_free_line;
|
||||
}
|
||||
|
||||
/* read remaining buffer contents */
|
||||
io.timeout_ms = 0;
|
||||
while (!io.eof) {
|
||||
if (io__getline(&io, &line, &line_len) < 0)
|
||||
break;
|
||||
|
||||
if (parse_func_duration(ftrace, line, line_len) < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
print_profile_result(ftrace);
|
||||
|
||||
out_free_line:
|
||||
free(line);
|
||||
out_close_fd:
|
||||
close(trace_fd);
|
||||
out_reset:
|
||||
reset_tracing_files(ftrace);
|
||||
out:
|
||||
return (done && !workload_exec_errno) ? 0 : -1;
|
||||
}
|
||||
|
||||
static int perf_ftrace_config(const char *var, const char *value, void *cb)
|
||||
{
|
||||
struct perf_ftrace *ftrace = cb;
|
||||
@ -1126,6 +1418,7 @@ enum perf_ftrace_subcommand {
|
||||
PERF_FTRACE_NONE,
|
||||
PERF_FTRACE_TRACE,
|
||||
PERF_FTRACE_LATENCY,
|
||||
PERF_FTRACE_PROFILE,
|
||||
};
|
||||
|
||||
int cmd_ftrace(int argc, const char **argv)
|
||||
@ -1191,13 +1484,28 @@ int cmd_ftrace(int argc, const char **argv)
|
||||
"Use nano-second histogram"),
|
||||
OPT_PARENT(common_options),
|
||||
};
|
||||
const struct option profile_options[] = {
|
||||
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
|
||||
"Trace given functions using function tracer",
|
||||
parse_filter_func),
|
||||
OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
|
||||
"Do not trace given functions", parse_filter_func),
|
||||
OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
|
||||
"Trace given functions using function_graph tracer",
|
||||
parse_filter_func),
|
||||
OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
|
||||
"Set nograph filter on given functions", parse_filter_func),
|
||||
OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
|
||||
"Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
|
||||
OPT_PARENT(common_options),
|
||||
};
|
||||
const struct option *options = ftrace_options;
|
||||
|
||||
const char * const ftrace_usage[] = {
|
||||
"perf ftrace [<options>] [<command>]",
|
||||
"perf ftrace [<options>] -- [<command>] [<options>]",
|
||||
"perf ftrace {trace|latency} [<options>] [<command>]",
|
||||
"perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
|
||||
"perf ftrace {trace|latency|profile} [<options>] [<command>]",
|
||||
"perf ftrace {trace|latency|profile} [<options>] -- [<command>] [<options>]",
|
||||
NULL
|
||||
};
|
||||
enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
|
||||
@ -1226,6 +1534,9 @@ int cmd_ftrace(int argc, const char **argv)
|
||||
} else if (!strcmp(argv[1], "latency")) {
|
||||
subcmd = PERF_FTRACE_LATENCY;
|
||||
options = latency_options;
|
||||
} else if (!strcmp(argv[1], "profile")) {
|
||||
subcmd = PERF_FTRACE_PROFILE;
|
||||
options = profile_options;
|
||||
}
|
||||
|
||||
if (subcmd != PERF_FTRACE_NONE) {
|
||||
@ -1261,6 +1572,9 @@ int cmd_ftrace(int argc, const char **argv)
|
||||
}
|
||||
cmd_func = __cmd_latency;
|
||||
break;
|
||||
case PERF_FTRACE_PROFILE:
|
||||
cmd_func = __cmd_profile;
|
||||
break;
|
||||
case PERF_FTRACE_NONE:
|
||||
default:
|
||||
pr_err("Invalid subcommand\n");
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "target.h"
|
||||
|
||||
struct evlist;
|
||||
struct hashamp;
|
||||
|
||||
struct perf_ftrace {
|
||||
struct evlist *evlist;
|
||||
@ -15,6 +16,7 @@ struct perf_ftrace {
|
||||
struct list_head notrace;
|
||||
struct list_head graph_funcs;
|
||||
struct list_head nograph_funcs;
|
||||
struct hashmap *profile_hash;
|
||||
unsigned long percpu_buffer_size;
|
||||
bool inherit;
|
||||
bool use_nsec;
|
||||
|
Loading…
Reference in New Issue
Block a user