2016-09-22 15:36:38 +00:00
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include "util.h"
|
|
|
|
#include "debug.h"
|
|
|
|
#include "builtin.h"
|
|
|
|
#include <subcmd/parse-options.h>
|
perf c2c: Add record subcommand
Adding c2c record subcommand. It setups options related to HITM
cacheline analysis and calls standard perf record command.
$ sudo perf c2c record -v -- -a
calling: record -W -d --sample-cpu -e cpu/mem-loads,ldlat=30/P -e cpu/mem-stores/P -a
...
It produces perf.data, which is to be reported by perf c2c report, that
comes in following patches.
Details are described in the man page, which is added in one of the
following patches.
Committer notes:
Testing it:
# perf c2c record -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 5.050 MB perf.data (412 samples) ]
# ls -la perf.data
-rw-------. 1 root root 5301752 Oct 4 13:32 perf.data
# perf evlist
cpu/mem-loads,ldlat=30/P
cpu/mem-stores/P
# perf evlist -v
cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f
cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
#
# perf report --stdio
<SNIP>
# Total Lost Samples: 14
# Samples: 216 of event 'cpu/mem-loads,ldlat=30/P'
# Event count (approx.): 15207
# Overhead Symbol Shared Object
# ........ ..................................... ............................
10.32% [k] update_blocked_averages [kernel.vmlinux]
3.43% [.] 0x00000000001a2122 qemu-system-x86_64 (deleted)
2.52% [k] enqueue_entity [kernel.vmlinux]
1.88% [.] g_main_context_query libglib-2.0.so.0.4800.2
1.86% [k] __schedule [kernel.vmlinux]
<SNIP>
# Samples: 196 of event 'cpu/mem-stores/P'
# Event count (approx.): 14771346
# Overhead Symbol Shared Object
# ........ ................................... ............................
13.91% [k] intel_idle [kernel.vmlinux]
3.02% [.] 0x00000000022f06ea chrome
2.94% [.] 0x00000000001a1b4c qemu-system-x86_64 (deleted)
2.94% [.] 0x000000000019d8e4 qemu-system-x86_64 (deleted)
2.38% [.] 0x00000000001a1c52 qemu-system-x86_64 (deleted)
<SNIP>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-12-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-22 15:36:39 +00:00
|
|
|
#include "mem-events.h"
|
2016-09-22 15:36:40 +00:00
|
|
|
#include "session.h"
|
|
|
|
#include "hist.h"
|
|
|
|
#include "tool.h"
|
|
|
|
#include "data.h"
|
|
|
|
|
2016-09-22 15:36:41 +00:00
|
|
|
struct c2c_hists {
|
|
|
|
struct hists hists;
|
|
|
|
struct perf_hpp_list list;
|
|
|
|
};
|
|
|
|
|
2016-09-22 15:36:40 +00:00
|
|
|
struct perf_c2c {
|
2016-09-22 15:36:41 +00:00
|
|
|
struct perf_tool tool;
|
|
|
|
struct c2c_hists hists;
|
2016-09-22 15:36:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct perf_c2c c2c;
|
2016-09-22 15:36:38 +00:00
|
|
|
|
|
|
|
static const char * const c2c_usage[] = {
|
2016-09-22 15:36:40 +00:00
|
|
|
"perf c2c {record|report}",
|
2016-09-22 15:36:38 +00:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2016-09-22 15:36:40 +00:00
|
|
|
static const char * const __usage_report[] = {
|
|
|
|
"perf c2c report",
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char * const *report_c2c_usage = __usage_report;
|
|
|
|
|
2016-09-22 15:36:41 +00:00
|
|
|
#define C2C_HEADER_MAX 2
|
|
|
|
|
|
|
|
struct c2c_header {
|
|
|
|
struct {
|
|
|
|
const char *text;
|
|
|
|
int span;
|
|
|
|
} line[C2C_HEADER_MAX];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct c2c_dimension {
|
|
|
|
struct c2c_header header;
|
|
|
|
const char *name;
|
|
|
|
int width;
|
|
|
|
|
|
|
|
int64_t (*cmp)(struct perf_hpp_fmt *fmt,
|
|
|
|
struct hist_entry *, struct hist_entry *);
|
|
|
|
int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
|
|
|
struct hist_entry *he);
|
|
|
|
int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
|
|
|
struct hist_entry *he);
|
|
|
|
};
|
|
|
|
|
|
|
|
struct c2c_fmt {
|
|
|
|
struct perf_hpp_fmt fmt;
|
|
|
|
struct c2c_dimension *dim;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int c2c_width(struct perf_hpp_fmt *fmt,
|
|
|
|
struct perf_hpp *hpp __maybe_unused,
|
|
|
|
struct hists *hists __maybe_unused)
|
|
|
|
{
|
|
|
|
struct c2c_fmt *c2c_fmt;
|
|
|
|
|
|
|
|
c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
|
|
|
|
return c2c_fmt->dim->width;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
|
|
|
struct hists *hists __maybe_unused, int line, int *span)
|
|
|
|
{
|
|
|
|
struct c2c_fmt *c2c_fmt;
|
|
|
|
struct c2c_dimension *dim;
|
|
|
|
int len = c2c_width(fmt, hpp, hists);
|
|
|
|
const char *text;
|
|
|
|
|
|
|
|
c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
|
|
|
|
dim = c2c_fmt->dim;
|
|
|
|
|
|
|
|
text = dim->header.line[line].text;
|
|
|
|
if (text == NULL)
|
|
|
|
text = "";
|
|
|
|
|
|
|
|
if (*span) {
|
|
|
|
(*span)--;
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
*span = dim->header.line[line].span;
|
|
|
|
}
|
|
|
|
|
|
|
|
return scnprintf(hpp->buf, hpp->size, "%*s", len, text);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct c2c_dimension *dimensions[] = {
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void fmt_free(struct perf_hpp_fmt *fmt)
|
|
|
|
{
|
|
|
|
struct c2c_fmt *c2c_fmt;
|
|
|
|
|
|
|
|
c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
|
|
|
|
free(c2c_fmt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
|
|
|
|
{
|
|
|
|
struct c2c_fmt *c2c_a = container_of(a, struct c2c_fmt, fmt);
|
|
|
|
struct c2c_fmt *c2c_b = container_of(b, struct c2c_fmt, fmt);
|
|
|
|
|
|
|
|
return c2c_a->dim == c2c_b->dim;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct c2c_dimension *get_dimension(const char *name)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; dimensions[i]; i++) {
|
|
|
|
struct c2c_dimension *dim = dimensions[i];
|
|
|
|
|
|
|
|
if (!strcmp(dim->name, name))
|
|
|
|
return dim;
|
|
|
|
};
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct c2c_fmt *get_format(const char *name)
|
|
|
|
{
|
|
|
|
struct c2c_dimension *dim = get_dimension(name);
|
|
|
|
struct c2c_fmt *c2c_fmt;
|
|
|
|
struct perf_hpp_fmt *fmt;
|
|
|
|
|
|
|
|
if (!dim)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
c2c_fmt = zalloc(sizeof(*c2c_fmt));
|
|
|
|
if (!c2c_fmt)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
c2c_fmt->dim = dim;
|
|
|
|
|
|
|
|
fmt = &c2c_fmt->fmt;
|
|
|
|
INIT_LIST_HEAD(&fmt->list);
|
|
|
|
INIT_LIST_HEAD(&fmt->sort_list);
|
|
|
|
|
|
|
|
fmt->cmp = dim->cmp;
|
|
|
|
fmt->sort = dim->cmp;
|
|
|
|
fmt->entry = dim->entry;
|
|
|
|
fmt->header = c2c_header;
|
|
|
|
fmt->width = c2c_width;
|
|
|
|
fmt->collapse = dim->cmp;
|
|
|
|
fmt->equal = fmt_equal;
|
|
|
|
fmt->free = fmt_free;
|
|
|
|
|
|
|
|
return c2c_fmt;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name)
|
|
|
|
{
|
|
|
|
struct c2c_fmt *c2c_fmt = get_format(name);
|
|
|
|
|
|
|
|
if (!c2c_fmt)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
perf_hpp_list__column_register(hpp_list, &c2c_fmt->fmt);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name)
|
|
|
|
{
|
|
|
|
struct c2c_fmt *c2c_fmt = get_format(name);
|
|
|
|
|
|
|
|
if (!c2c_fmt)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define PARSE_LIST(_list, _fn) \
|
|
|
|
do { \
|
|
|
|
char *tmp, *tok; \
|
|
|
|
ret = 0; \
|
|
|
|
\
|
|
|
|
if (!_list) \
|
|
|
|
break; \
|
|
|
|
\
|
|
|
|
for (tok = strtok_r((char *)_list, ", ", &tmp); \
|
|
|
|
tok; tok = strtok_r(NULL, ", ", &tmp)) { \
|
|
|
|
ret = _fn(hpp_list, tok); \
|
|
|
|
if (ret == -EINVAL) { \
|
|
|
|
error("Invalid --fields key: `%s'", tok); \
|
|
|
|
break; \
|
|
|
|
} else if (ret == -ESRCH) { \
|
|
|
|
error("Unknown --fields key: `%s'", tok); \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
static int hpp_list__parse(struct perf_hpp_list *hpp_list,
|
|
|
|
const char *output_,
|
|
|
|
const char *sort_)
|
|
|
|
{
|
|
|
|
char *output = output_ ? strdup(output_) : NULL;
|
|
|
|
char *sort = sort_ ? strdup(sort_) : NULL;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
PARSE_LIST(output, c2c_hists__init_output);
|
|
|
|
PARSE_LIST(sort, c2c_hists__init_sort);
|
|
|
|
|
|
|
|
/* copy sort keys to output fields */
|
|
|
|
perf_hpp__setup_output_field(hpp_list);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We dont need other sorting keys other than those
|
|
|
|
* we already specified. It also really slows down
|
|
|
|
* the processing a lot with big number of output
|
|
|
|
* fields, so switching this off for c2c.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
/* and then copy output fields to sort keys */
|
|
|
|
perf_hpp__append_sort_keys(&hists->list);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
free(output);
|
|
|
|
free(sort);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int c2c_hists__init(struct c2c_hists *hists,
|
|
|
|
const char *sort)
|
|
|
|
{
|
|
|
|
__hists__init(&hists->hists, &hists->list);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize only with sort fields, we need to resort
|
|
|
|
* later anyway, and that's where we add output fields
|
|
|
|
* as well.
|
|
|
|
*/
|
|
|
|
perf_hpp_list__init(&hists->list);
|
|
|
|
|
|
|
|
return hpp_list__parse(&hists->list, NULL, sort);
|
|
|
|
}
|
|
|
|
|
|
|
|
__maybe_unused
|
|
|
|
static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
|
|
|
|
const char *output,
|
|
|
|
const char *sort)
|
|
|
|
{
|
|
|
|
perf_hpp__reset_output_field(&c2c_hists->list);
|
|
|
|
return hpp_list__parse(&c2c_hists->list, output, sort);
|
|
|
|
}
|
|
|
|
|
2016-09-22 15:36:40 +00:00
|
|
|
static int perf_c2c__report(int argc, const char **argv)
|
|
|
|
{
|
|
|
|
struct perf_session *session;
|
|
|
|
struct perf_data_file file = {
|
|
|
|
.mode = PERF_DATA_MODE_READ,
|
|
|
|
};
|
|
|
|
const struct option c2c_options[] = {
|
|
|
|
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
|
|
|
|
"file", "vmlinux pathname"),
|
|
|
|
OPT_INCR('v', "verbose", &verbose,
|
|
|
|
"be more verbose (show counter open errors, etc)"),
|
|
|
|
OPT_STRING('i', "input", &input_name, "file",
|
|
|
|
"the input file to process"),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
argc = parse_options(argc, argv, c2c_options, report_c2c_usage,
|
|
|
|
PARSE_OPT_STOP_AT_NON_OPTION);
|
|
|
|
if (!argc)
|
|
|
|
usage_with_options(report_c2c_usage, c2c_options);
|
|
|
|
|
|
|
|
file.path = input_name;
|
|
|
|
|
2016-09-22 15:36:41 +00:00
|
|
|
err = c2c_hists__init(&c2c.hists, "dcacheline");
|
|
|
|
if (err) {
|
|
|
|
pr_debug("Failed to initialize hists\n");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-09-22 15:36:40 +00:00
|
|
|
session = perf_session__new(&file, 0, &c2c.tool);
|
|
|
|
if (session == NULL) {
|
|
|
|
pr_debug("No memory for session\n");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (symbol__init(&session->header.env) < 0)
|
|
|
|
goto out_session;
|
|
|
|
|
|
|
|
/* No pipe support at the moment. */
|
|
|
|
if (perf_data_file__is_pipe(session->file)) {
|
|
|
|
pr_debug("No pipe support at the moment.\n");
|
|
|
|
goto out_session;
|
|
|
|
}
|
|
|
|
|
|
|
|
out_session:
|
|
|
|
perf_session__delete(session);
|
|
|
|
out:
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
perf c2c: Add record subcommand
Adding c2c record subcommand. It setups options related to HITM
cacheline analysis and calls standard perf record command.
$ sudo perf c2c record -v -- -a
calling: record -W -d --sample-cpu -e cpu/mem-loads,ldlat=30/P -e cpu/mem-stores/P -a
...
It produces perf.data, which is to be reported by perf c2c report, that
comes in following patches.
Details are described in the man page, which is added in one of the
following patches.
Committer notes:
Testing it:
# perf c2c record -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 5.050 MB perf.data (412 samples) ]
# ls -la perf.data
-rw-------. 1 root root 5301752 Oct 4 13:32 perf.data
# perf evlist
cpu/mem-loads,ldlat=30/P
cpu/mem-stores/P
# perf evlist -v
cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f
cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
#
# perf report --stdio
<SNIP>
# Total Lost Samples: 14
# Samples: 216 of event 'cpu/mem-loads,ldlat=30/P'
# Event count (approx.): 15207
# Overhead Symbol Shared Object
# ........ ..................................... ............................
10.32% [k] update_blocked_averages [kernel.vmlinux]
3.43% [.] 0x00000000001a2122 qemu-system-x86_64 (deleted)
2.52% [k] enqueue_entity [kernel.vmlinux]
1.88% [.] g_main_context_query libglib-2.0.so.0.4800.2
1.86% [k] __schedule [kernel.vmlinux]
<SNIP>
# Samples: 196 of event 'cpu/mem-stores/P'
# Event count (approx.): 14771346
# Overhead Symbol Shared Object
# ........ ................................... ............................
13.91% [k] intel_idle [kernel.vmlinux]
3.02% [.] 0x00000000022f06ea chrome
2.94% [.] 0x00000000001a1b4c qemu-system-x86_64 (deleted)
2.94% [.] 0x000000000019d8e4 qemu-system-x86_64 (deleted)
2.38% [.] 0x00000000001a1c52 qemu-system-x86_64 (deleted)
<SNIP>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-12-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-22 15:36:39 +00:00
|
|
|
static int parse_record_events(const struct option *opt __maybe_unused,
|
|
|
|
const char *str, int unset __maybe_unused)
|
|
|
|
{
|
|
|
|
bool *event_set = (bool *) opt->value;
|
|
|
|
|
|
|
|
*event_set = true;
|
|
|
|
return perf_mem_events__parse(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static const char * const __usage_record[] = {
|
|
|
|
"perf c2c record [<options>] [<command>]",
|
|
|
|
"perf c2c record [<options>] -- <command> [<options>]",
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char * const *record_mem_usage = __usage_record;
|
|
|
|
|
|
|
|
static int perf_c2c__record(int argc, const char **argv)
|
|
|
|
{
|
|
|
|
int rec_argc, i = 0, j;
|
|
|
|
const char **rec_argv;
|
|
|
|
int ret;
|
|
|
|
bool all_user = false, all_kernel = false;
|
|
|
|
bool event_set = false;
|
|
|
|
struct option options[] = {
|
|
|
|
OPT_CALLBACK('e', "event", &event_set, "event",
|
|
|
|
"event selector. Use 'perf mem record -e list' to list available events",
|
|
|
|
parse_record_events),
|
|
|
|
OPT_INCR('v', "verbose", &verbose,
|
|
|
|
"be more verbose (show counter open errors, etc)"),
|
|
|
|
OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
|
|
|
|
OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
|
|
|
|
OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
|
|
|
if (perf_mem_events__init()) {
|
|
|
|
pr_err("failed: memory events not supported\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
argc = parse_options(argc, argv, options, record_mem_usage,
|
|
|
|
PARSE_OPT_KEEP_UNKNOWN);
|
|
|
|
|
|
|
|
rec_argc = argc + 10; /* max number of arguments */
|
|
|
|
rec_argv = calloc(rec_argc + 1, sizeof(char *));
|
|
|
|
if (!rec_argv)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
rec_argv[i++] = "record";
|
|
|
|
|
|
|
|
if (!event_set) {
|
|
|
|
perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
|
|
|
|
perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
|
|
|
|
rec_argv[i++] = "-W";
|
|
|
|
|
|
|
|
rec_argv[i++] = "-d";
|
|
|
|
rec_argv[i++] = "--sample-cpu";
|
|
|
|
|
|
|
|
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
|
|
|
|
if (!perf_mem_events[j].record)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!perf_mem_events[j].supported) {
|
|
|
|
pr_err("failed: event '%s' not supported\n",
|
|
|
|
perf_mem_events[j].name);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
rec_argv[i++] = "-e";
|
|
|
|
rec_argv[i++] = perf_mem_events__name(j);
|
|
|
|
};
|
|
|
|
|
|
|
|
if (all_user)
|
|
|
|
rec_argv[i++] = "--all-user";
|
|
|
|
|
|
|
|
if (all_kernel)
|
|
|
|
rec_argv[i++] = "--all-kernel";
|
|
|
|
|
|
|
|
for (j = 0; j < argc; j++, i++)
|
|
|
|
rec_argv[i] = argv[j];
|
|
|
|
|
|
|
|
if (verbose > 0) {
|
|
|
|
pr_debug("calling: ");
|
|
|
|
|
|
|
|
j = 0;
|
|
|
|
|
|
|
|
while (rec_argv[j]) {
|
|
|
|
pr_debug("%s ", rec_argv[j]);
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
pr_debug("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = cmd_record(i, rec_argv, NULL);
|
|
|
|
free(rec_argv);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-09-22 15:36:38 +00:00
|
|
|
int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
|
|
|
|
{
|
|
|
|
const struct option c2c_options[] = {
|
|
|
|
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
|
|
|
argc = parse_options(argc, argv, c2c_options, c2c_usage,
|
|
|
|
PARSE_OPT_STOP_AT_NON_OPTION);
|
perf c2c: Add record subcommand
Adding c2c record subcommand. It setups options related to HITM
cacheline analysis and calls standard perf record command.
$ sudo perf c2c record -v -- -a
calling: record -W -d --sample-cpu -e cpu/mem-loads,ldlat=30/P -e cpu/mem-stores/P -a
...
It produces perf.data, which is to be reported by perf c2c report, that
comes in following patches.
Details are described in the man page, which is added in one of the
following patches.
Committer notes:
Testing it:
# perf c2c record -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 5.050 MB perf.data (412 samples) ]
# ls -la perf.data
-rw-------. 1 root root 5301752 Oct 4 13:32 perf.data
# perf evlist
cpu/mem-loads,ldlat=30/P
cpu/mem-stores/P
# perf evlist -v
cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f
cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
#
# perf report --stdio
<SNIP>
# Total Lost Samples: 14
# Samples: 216 of event 'cpu/mem-loads,ldlat=30/P'
# Event count (approx.): 15207
# Overhead Symbol Shared Object
# ........ ..................................... ............................
10.32% [k] update_blocked_averages [kernel.vmlinux]
3.43% [.] 0x00000000001a2122 qemu-system-x86_64 (deleted)
2.52% [k] enqueue_entity [kernel.vmlinux]
1.88% [.] g_main_context_query libglib-2.0.so.0.4800.2
1.86% [k] __schedule [kernel.vmlinux]
<SNIP>
# Samples: 196 of event 'cpu/mem-stores/P'
# Event count (approx.): 14771346
# Overhead Symbol Shared Object
# ........ ................................... ............................
13.91% [k] intel_idle [kernel.vmlinux]
3.02% [.] 0x00000000022f06ea chrome
2.94% [.] 0x00000000001a1b4c qemu-system-x86_64 (deleted)
2.94% [.] 0x000000000019d8e4 qemu-system-x86_64 (deleted)
2.38% [.] 0x00000000001a1c52 qemu-system-x86_64 (deleted)
<SNIP>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-12-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-22 15:36:39 +00:00
|
|
|
|
|
|
|
if (!argc)
|
|
|
|
usage_with_options(c2c_usage, c2c_options);
|
|
|
|
|
|
|
|
if (!strncmp(argv[0], "rec", 3)) {
|
|
|
|
return perf_c2c__record(argc, argv);
|
2016-09-22 15:36:40 +00:00
|
|
|
} else if (!strncmp(argv[0], "rep", 3)) {
|
|
|
|
return perf_c2c__report(argc, argv);
|
perf c2c: Add record subcommand
Adding c2c record subcommand. It setups options related to HITM
cacheline analysis and calls standard perf record command.
$ sudo perf c2c record -v -- -a
calling: record -W -d --sample-cpu -e cpu/mem-loads,ldlat=30/P -e cpu/mem-stores/P -a
...
It produces perf.data, which is to be reported by perf c2c report, that
comes in following patches.
Details are described in the man page, which is added in one of the
following patches.
Committer notes:
Testing it:
# perf c2c record -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 5.050 MB perf.data (412 samples) ]
# ls -la perf.data
-rw-------. 1 root root 5301752 Oct 4 13:32 perf.data
# perf evlist
cpu/mem-loads,ldlat=30/P
cpu/mem-stores/P
# perf evlist -v
cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f
cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
#
# perf report --stdio
<SNIP>
# Total Lost Samples: 14
# Samples: 216 of event 'cpu/mem-loads,ldlat=30/P'
# Event count (approx.): 15207
# Overhead Symbol Shared Object
# ........ ..................................... ............................
10.32% [k] update_blocked_averages [kernel.vmlinux]
3.43% [.] 0x00000000001a2122 qemu-system-x86_64 (deleted)
2.52% [k] enqueue_entity [kernel.vmlinux]
1.88% [.] g_main_context_query libglib-2.0.so.0.4800.2
1.86% [k] __schedule [kernel.vmlinux]
<SNIP>
# Samples: 196 of event 'cpu/mem-stores/P'
# Event count (approx.): 14771346
# Overhead Symbol Shared Object
# ........ ................................... ............................
13.91% [k] intel_idle [kernel.vmlinux]
3.02% [.] 0x00000000022f06ea chrome
2.94% [.] 0x00000000001a1b4c qemu-system-x86_64 (deleted)
2.94% [.] 0x000000000019d8e4 qemu-system-x86_64 (deleted)
2.38% [.] 0x00000000001a1c52 qemu-system-x86_64 (deleted)
<SNIP>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-12-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-22 15:36:39 +00:00
|
|
|
} else {
|
|
|
|
usage_with_options(c2c_usage, c2c_options);
|
|
|
|
}
|
|
|
|
|
2016-09-22 15:36:38 +00:00
|
|
|
return 0;
|
|
|
|
}
|