From f19b5872d889bb91f5e204c0674e5c0dd1d5895e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 25 Dec 2020 13:27:50 +0800 Subject: [PATCH 001/148] perf probe: Fixup Arm64 SDT arguments Arm64 ELF section '.note.stapsdt' uses string format "-4@[sp, NUM]" if the probe is to access data in stack, e.g. below is an example for dumping Arm64 ELF file and shows the argument format: Arguments: -4@[sp, 12] -4@[sp, 8] -4@[sp, 4] Comparing against other archs' argument format, Arm64's argument introduces an extra space character in the middle of square brackets, due to argv_split() uses space as splitter, the argument is wrongly divided into two items. To support Arm64 SDT, this patch fixes up for this case, if any item contains sub string "[sp", concatenates the two continuous items. And adds the detailed explaination in comment. Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Alexandre Truong Cc: Alexis Berlemont Cc: He Zhe Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20201225052751.24513-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-file.c | 38 ++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index bbecb449ea94..52273542e6ef 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -794,6 +794,8 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, char *ret = NULL; int i, args_count, err; unsigned long long ref_ctr_offset; + char *arg; + int arg_idx = 0; if (strbuf_init(&buf, 32) < 0) return NULL; @@ -818,11 +820,43 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, if (args == NULL) goto error; - for (i = 0; i < args_count; ++i) { - if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) { + for (i = 0; i < args_count; ) { + /* + * FIXUP: Arm64 ELF section '.note.stapsdt' uses string + * format "-4@[sp, NUM]" if a probe is to access data in + * the stack, e.g. below is an example for the SDT + * Arguments: + * + * Arguments: -4@[sp, 12] -4@[sp, 8] -4@[sp, 4] + * + * Since the string introduces an extra space character + * in the middle of square brackets, the argument is + * divided into two items. Fixup for this case, if an + * item contains sub string "[sp,", need to concatenate + * the two items. + */ + if (strstr(args[i], "[sp,") && (i+1) < args_count) { + err = asprintf(&arg, "%s %s", args[i], args[i+1]); + i += 2; + } else { + err = asprintf(&arg, "%s", args[i]); + i += 1; + } + + /* Failed to allocate memory */ + if (err < 0) { argv_free(args); goto error; } + + if (synthesize_sdt_probe_arg(&buf, arg_idx, arg) < 0) { + free(arg); + argv_free(args); + goto error; + } + + free(arg); + arg_idx++; } argv_free(args); From feab999efefe3c7df2d83bc5d9a9360e8fd69ecf Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 25 Dec 2020 13:27:51 +0800 Subject: [PATCH 002/148] perf arm64: Add argument support for SDT Now the two OP formats are used for SDT marker argument in Arm64 ELF, one format is general register xNUM (e.g. x1, x2, etc), another is for using stack pointer to access local variables (e.g. [sp], [sp, 8]). This patch adds support SDT marker argument for Arm64, it parses OP and converts to uprobe compatible format. Signed-off-by: Leo Yan Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Alexandre Truong Cc: Alexis Berlemont Cc: He Zhe Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20201225052751.24513-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/perf_regs.c | 94 ++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 54efa12fdbea..2518cde18b34 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -1,4 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +#include "../../../util/debug.h" +#include "../../../util/event.h" #include "../../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { @@ -37,3 +45,89 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(pc, PERF_REG_ARM64_PC), SMPL_REG_END }; + +/* %xNUM */ +#define SDT_OP_REGEX1 "^(x[1-2]?[0-9]|3[0-1])$" + +/* [sp], [sp, NUM] */ +#define SDT_OP_REGEX2 "^\\[sp(, )?([0-9]+)?\\]$" + +static regex_t sdt_op_regex1, sdt_op_regex2; + +static int sdt_init_op_regex(void) +{ + static int initialized; + int ret = 0; + + if (initialized) + return 0; + + ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED); + if (ret) + goto error; + + ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED); + if (ret) + goto free_regex1; + + initialized = 1; + return 0; + +free_regex1: + regfree(&sdt_op_regex1); +error: + pr_debug4("Regex compilation error.\n"); + return ret; +} + +/* + * SDT marker arguments on Arm64 uses %xREG or [sp, NUM], currently + * support these two formats. + */ +int arch_sdt_arg_parse_op(char *old_op, char **new_op) +{ + int ret, new_len; + regmatch_t rm[5]; + + ret = sdt_init_op_regex(); + if (ret < 0) + return ret; + + if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) { + /* Extract xNUM */ + new_len = 2; /* % NULL */ + new_len += (int)(rm[1].rm_eo - rm[1].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%%%.*s", + (int)(rm[1].rm_eo - rm[1].rm_so), old_op + rm[1].rm_so); + } else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) { + /* [sp], [sp, NUM] or [sp,NUM] */ + new_len = 7; /* + ( % s p ) NULL */ + + /* If the arugment is [sp], need to fill offset '0' */ + if (rm[2].rm_so == -1) + new_len += 1; + else + new_len += (int)(rm[2].rm_eo - rm[2].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + if (rm[2].rm_so == -1) + scnprintf(*new_op, new_len, "+0(%%sp)"); + else + scnprintf(*new_op, new_len, "+%.*s(%%sp)", + (int)(rm[2].rm_eo - rm[2].rm_so), + old_op + rm[2].rm_so); + } else { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + return SDT_ARG_VALID; +} From dde587aa217484ee96de8b0f5ee29b8ccbf5c88c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:46 +0100 Subject: [PATCH 003/148] tools headers uapi: Sync tools/include/uapi/linux/perf_event.h Syncing tools's uapi with mmap2 build id data changes. Committer notes: I'm taking the tools/ bits, so this will be in fact ahead of the kernel till the bpf/perf-kernel bits are merged. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 42 +++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b15e3447cd9f..cb6f84103560 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -386,7 +386,8 @@ struct perf_event_attr { aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - __reserved_1 : 30; + build_id : 1, /* use build id in mmap2 events */ + __reserved_1 : 29; union { __u32 wakeup_events; /* wakeup every n events */ @@ -659,6 +660,22 @@ struct perf_event_mmap_page { __u64 aux_size; }; +/* + * The current state of perf_event_header::misc bits usage: + * ('|' used bit, '-' unused bit) + * + * 012 CDEF + * |||---------|||| + * + * Where: + * 0-2 CPUMODE_MASK + * + * C PROC_MAP_PARSE_TIMEOUT + * D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT + * E MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT + * F (reserved) + */ + #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) @@ -690,6 +707,7 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events + * PERF_RECORD_MISC_MMAP_BUILD_ID - PERF_RECORD_MMAP2 event * * * PERF_RECORD_MISC_EXACT_IP: @@ -699,9 +717,13 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: * Indicates that thread was preempted in TASK_RUNNING state. + * + * PERF_RECORD_MISC_MMAP_BUILD_ID: + * Indicates that mmap2 event carries build id data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) +#define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14) /* * Reserve the last bit to indicate some extended misc field */ @@ -915,10 +937,20 @@ enum perf_event_type { * u64 addr; * u64 len; * u64 pgoff; - * u32 maj; - * u32 min; - * u64 ino; - * u64 ino_generation; + * union { + * struct { + * u32 maj; + * u32 min; + * u64 ino; + * u64 ino_generation; + * }; + * struct { + * u8 build_id_size; + * u8 __reserved_1; + * u16 __reserved_2; + * u8 build_id[20]; + * }; + * }; * u32 prot, flags; * char filename[]; * struct sample_id sample_id; From 29245ae8ff658bd8cd0d78bfb8c2801e66db6ee7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:47 +0100 Subject: [PATCH 004/148] perf tools: Do not swap mmap2 fields in case it contains build id If the PERF_RECORD_MISC_MMAP_BUILD_ID misc bit is set, mmap2 events carries a build id, placed in the following union: union { struct { u32 maj; u32 min; u64 ino; u64 ino_generation; }; struct { u8 build_id_size; u8 __reserved_1; u16 __reserved_2; u8 build_id[20]; }; }; In this case we can't swap above fields. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 50ff9795a4f1..357d6b972b9d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -593,10 +593,13 @@ static void perf_event__mmap2_swap(union perf_event *event, event->mmap2.start = bswap_64(event->mmap2.start); event->mmap2.len = bswap_64(event->mmap2.len); event->mmap2.pgoff = bswap_64(event->mmap2.pgoff); - event->mmap2.maj = bswap_32(event->mmap2.maj); - event->mmap2.min = bswap_32(event->mmap2.min); - event->mmap2.ino = bswap_64(event->mmap2.ino); - event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); + + if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) { + event->mmap2.maj = bswap_32(event->mmap2.maj); + event->mmap2.min = bswap_32(event->mmap2.min); + event->mmap2.ino = bswap_64(event->mmap2.ino); + event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); + } if (sample_id_all) { void *data = &event->mmap2.filename; From 1ca6e80254141d26262acc5471df9955ec40dbfb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:49 +0100 Subject: [PATCH 005/148] perf tools: Store build id when available in PERF_RECORD_MMAP2 metadata events When processing a PERF_RECORD_MMAP2 metadata event, check on the build id misc bit: PERF_RECORD_MISC_MMAP_BUILD_ID and if it is set, store the build id in mmap's dso object. Also adding the build id data to struct perf_record_mmap2 event definition. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/event.h | 18 ++++++++++++++---- tools/perf/util/machine.c | 24 +++++++++++++++++++----- tools/perf/util/map.c | 8 ++++++-- tools/perf/util/map.h | 3 ++- 4 files changed, 41 insertions(+), 12 deletions(-) diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 988c539bedb6..d82054225fcc 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -23,10 +23,20 @@ struct perf_record_mmap2 { __u64 start; __u64 len; __u64 pgoff; - __u32 maj; - __u32 min; - __u64 ino; - __u64 ino_generation; + union { + struct { + __u32 maj; + __u32 min; + __u64 ino; + __u64 ino_generation; + }; + struct { + __u8 build_id_size; + __u8 __reserved_1; + __u16 __reserved_2; + __u8 build_id[20]; + }; + }; __u32 prot; __u32 flags; char filename[PATH_MAX]; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f841f3503cae..522ea3236bcc 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1599,7 +1599,8 @@ static int machine__process_extra_kernel_map(struct machine *machine, } static int machine__process_kernel_mmap_event(struct machine *machine, - struct extra_kernel_map *xm) + struct extra_kernel_map *xm, + struct build_id *bid) { struct map *map; enum dso_space_type dso_space; @@ -1624,6 +1625,10 @@ static int machine__process_kernel_mmap_event(struct machine *machine, goto out_problem; map->end = map->start + xm->end - xm->start; + + if (build_id__is_defined(bid)) + dso__set_build_id(map->dso, bid); + } else if (is_kernel_mmap) { const char *symbol_name = (xm->name + strlen(machine->mmap_name)); /* @@ -1681,6 +1686,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine, machine__update_kernel_mmap(machine, xm->start, xm->end); + if (build_id__is_defined(bid)) + dso__set_build_id(kernel, bid); + /* * Avoid using a zero address (kptr_restrict) for the ref reloc * symbol. Effectively having zero here means that at record @@ -1718,11 +1726,17 @@ int machine__process_mmap2_event(struct machine *machine, .ino = event->mmap2.ino, .ino_generation = event->mmap2.ino_generation, }; + struct build_id __bid, *bid = NULL; int ret = 0; if (dump_trace) perf_event__fprintf_mmap2(event, stdout); + if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { + bid = &__bid; + build_id__init(bid, event->mmap2.build_id, event->mmap2.build_id_size); + } + if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || sample->cpumode == PERF_RECORD_MISC_KERNEL) { struct extra_kernel_map xm = { @@ -1732,7 +1746,7 @@ int machine__process_mmap2_event(struct machine *machine, }; strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN); - ret = machine__process_kernel_mmap_event(machine, &xm); + ret = machine__process_kernel_mmap_event(machine, &xm, bid); if (ret < 0) goto out_problem; return 0; @@ -1746,7 +1760,7 @@ int machine__process_mmap2_event(struct machine *machine, map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, &dso_id, event->mmap2.prot, - event->mmap2.flags, + event->mmap2.flags, bid, event->mmap2.filename, thread); if (map == NULL) @@ -1789,7 +1803,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event }; strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN); - ret = machine__process_kernel_mmap_event(machine, &xm); + ret = machine__process_kernel_mmap_event(machine, &xm, NULL); if (ret < 0) goto out_problem; return 0; @@ -1805,7 +1819,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event map = map__new(machine, event->mmap.start, event->mmap.len, event->mmap.pgoff, - NULL, prot, 0, event->mmap.filename, thread); + NULL, prot, 0, NULL, event->mmap.filename, thread); if (map == NULL) goto out_problem_map; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f44ede437dc7..692e56dc832e 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -130,8 +130,8 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) struct map *map__new(struct machine *machine, u64 start, u64 len, u64 pgoff, struct dso_id *id, - u32 prot, u32 flags, char *filename, - struct thread *thread) + u32 prot, u32 flags, struct build_id *bid, + char *filename, struct thread *thread) { struct map *map = malloc(sizeof(*map)); struct nsinfo *nsi = NULL; @@ -194,6 +194,10 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, dso__set_loaded(dso); } dso->nsinfo = nsi; + + if (build_id__is_defined(bid)) + dso__set_build_id(dso, bid); + dso__put(dso); } return map; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index b1c0686db1b7..9f32825c98d8 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -104,10 +104,11 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso); struct dso_id; +struct build_id; struct map *map__new(struct machine *machine, u64 start, u64 len, u64 pgoff, struct dso_id *id, u32 prot, u32 flags, - char *filename, struct thread *thread); + struct build_id *bid, char *filename, struct thread *thread); struct map *map__new2(u64 start, struct dso *dso); void map__delete(struct map *map); struct map *map__clone(struct map *map); From 978410ff9952169b3c5e87b2c71dafff9184a4a8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:50 +0100 Subject: [PATCH 006/148] perf tools: Allow using PERF_RECORD_MMAP2 to synthesize the kernel map Allow using PERF_RECORD_MMAP2 to synthesize the kernel map so that we can use PERF_RECORD_MMAP2 to encode the kernel build id in the following csets. It's enabled by a new symbol_conf.buildid_mmap2 bool field, which will be switchable in following changes. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol_conf.h | 3 ++- tools/perf/util/synthetic-events.c | 40 ++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index b916afb95ec5..b18f9c8dbb75 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -42,7 +42,8 @@ struct symbol_conf { report_block, report_individual_block, inline_name, - disable_add2line_warn; + disable_add2line_warn, + buildid_mmap2; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 2947e3f3c6d9..746f35bc2e76 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -991,11 +991,12 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { - size_t size; + union perf_event *event; + size_t size = symbol_conf.buildid_mmap2 ? + sizeof(event->mmap2) : sizeof(event->mmap); struct map *map = machine__kernel_map(machine); struct kmap *kmap; int err; - union perf_event *event; if (map == NULL) return -1; @@ -1009,7 +1010,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, * available use this, and after it is use this as a fallback for older * kernels. */ - event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); + event = zalloc(size + machine->id_hdr_size); if (event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); @@ -1026,16 +1027,29 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; } - size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), - "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; - size = PERF_ALIGN(size, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); - event->mmap.pgoff = kmap->ref_reloc_sym->addr; - event->mmap.start = map->start; - event->mmap.len = map->end - event->mmap.start; - event->mmap.pid = machine->pid; + if (symbol_conf.buildid_mmap2) { + size = snprintf(event->mmap2.filename, sizeof(event->mmap2.filename), + "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; + size = PERF_ALIGN(size, sizeof(u64)); + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size) + machine->id_hdr_size); + event->mmap2.pgoff = kmap->ref_reloc_sym->addr; + event->mmap2.start = map->start; + event->mmap2.len = map->end - event->mmap.start; + event->mmap2.pid = machine->pid; + } else { + size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), + "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; + size = PERF_ALIGN(size, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); + event->mmap.pgoff = kmap->ref_reloc_sym->addr; + event->mmap.start = map->start; + event->mmap.len = map->end - event->mmap.start; + event->mmap.pid = machine->pid; + } err = perf_tool__process_synth_event(tool, event, machine, process); free(event); From e0dbf18f657f130bb42c843d2b0d11ddf51ee8bb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:51 +0100 Subject: [PATCH 007/148] perf tools: Allow using PERF_RECORD_MMAP2 to synthesize the kernel modules maps Allow using PERF_RECORD_MMAP2 to synthesize the kernel modules maps so that we can use PERF_RECORD_MMAP2 to encode the kernel modules build ids in the following csets. It's enabled by a new symbol_conf.buildid_mmap2 bool field, which will be switchable in following changes. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 49 +++++++++++++++++++----------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 746f35bc2e76..126446de7bdd 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -596,16 +596,17 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t int rc = 0; struct map *pos; struct maps *maps = machine__kernel_maps(machine); - union perf_event *event = zalloc((sizeof(event->mmap) + - machine->id_hdr_size)); + union perf_event *event; + size_t size = symbol_conf.buildid_mmap2 ? + sizeof(event->mmap2) : sizeof(event->mmap); + + event = zalloc(size + machine->id_hdr_size); if (event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); return -1; } - event->header.type = PERF_RECORD_MMAP; - /* * kernel uses 0 for user space maps, see kernel/perf_event.c * __perf_event_mmap @@ -616,23 +617,37 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; maps__for_each_entry(maps, pos) { - size_t size; - if (!__map__is_kmodule(pos)) continue; - size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.start = pos->start; - event->mmap.len = pos->end - pos->start; - event->mmap.pid = machine->pid; + if (symbol_conf.buildid_mmap2) { + size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, machine->id_hdr_size); + event->mmap2.header.size += machine->id_hdr_size; + event->mmap2.start = pos->start; + event->mmap2.len = pos->end - pos->start; + event->mmap2.pid = machine->pid; + + memcpy(event->mmap2.filename, pos->dso->long_name, + pos->dso->long_name_len + 1); + } else { + size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, machine->id_hdr_size); + event->mmap.header.size += machine->id_hdr_size; + event->mmap.start = pos->start; + event->mmap.len = pos->end - pos->start; + event->mmap.pid = machine->pid; + + memcpy(event->mmap.filename, pos->dso->long_name, + pos->dso->long_name_len + 1); + } - memcpy(event->mmap.filename, pos->dso->long_name, - pos->dso->long_name_len + 1); if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { rc = -1; break; From 4183a8d70a288627219942f84e1f83fd28717de3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:52 +0100 Subject: [PATCH 008/148] perf tools: Allow synthesizing the build id for kernel/modules/tasks in PERF_RECORD_MMAP2 Adding build id to synthesized mmap2 events for everything - kernel/modules/tasks, when symbol_conf.buildid_mmap2 is true. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 32 ++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 126446de7bdd..69688f20db11 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -347,6 +347,31 @@ static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end, } } +static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, + bool is_kernel) +{ + struct build_id bid; + int rc; + + if (is_kernel) + rc = sysfs__read_build_id("/sys/kernel/notes", &bid); + else + rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1; + + if (rc == 0) { + memcpy(event->build_id, bid.data, sizeof(bid.data)); + event->build_id_size = (u8) bid.size; + event->header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID; + event->__reserved_1 = 0; + event->__reserved_2 = 0; + } else { + if (event->filename[0] == '/') { + pr_debug2("Failed to read build ID for %s\n", + event->filename); + } + } +} + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, @@ -453,6 +478,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, event->mmap2.pid = tgid; event->mmap2.tid = pid; + if (symbol_conf.buildid_mmap2) + perf_record_mmap2__read_build_id(&event->mmap2, false); + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { rc = -1; break; @@ -633,6 +661,8 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t memcpy(event->mmap2.filename, pos->dso->long_name, pos->dso->long_name_len + 1); + + perf_record_mmap2__read_build_id(&event->mmap2, false); } else { size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; @@ -1053,6 +1083,8 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->mmap2.start = map->start; event->mmap2.len = map->end - event->mmap.start; event->mmap2.pid = machine->pid; + + perf_record_mmap2__read_build_id(&event->mmap2, true); } else { size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; From e29386c8f7d71fa59a6524b174a9c75ff74be580 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:57 +0100 Subject: [PATCH 009/148] perf record: Add --buildid-mmap option to enable PERF_RECORD_MMAP2's build id Add --buildid-mmap option to enable build id in PERF_RECORD_MMAP2 events. It will only work if there's kernel support for that and it disables build id cache (implies --no-buildid). It's also possible to enable it permanently via config option in ~/.perfconfig file: [record] build-id=mmap Also added build_id bit in the verbose output for perf_event_attr: # perf record --buildid-mmap -vv ... perf_event_attr: type 1 size 120 ... build_id 1 Adding also missing text_poke bit. Committer testing: $ perf record -h build Usage: perf record [] [] or: perf record [] -- [] -B, --no-buildid do not collect buildids in perf.data -N, --no-buildid-cache do not update the buildid cache --buildid-all Record build-id of all DSOs regardless of hits --buildid-mmap Record build-id in map events $ $ perf record --buildid-mmap sleep 1 Failed: no support to record build id in mmap events, update your kernel. $ After adding the needed kernel bits in a test kernel: $ perf record -vv --buildid-mmap sleep 1 |& grep -m1 build Enabling build id in mmap2 events. $ perf evlist -v cycles:u: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD, read_format: ID, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, build_id: 1 $ Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-16-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 3 ++- tools/perf/Documentation/perf-record.txt | 3 +++ tools/perf/builtin-record.c | 20 ++++++++++++++++++++ tools/perf/util/evsel.c | 10 ++++++---- tools/perf/util/perf_api_probe.c | 10 ++++++++++ tools/perf/util/perf_api_probe.h | 1 + tools/perf/util/perf_event_attr_fprintf.c | 2 ++ tools/perf/util/record.h | 1 + 8 files changed, 45 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 5c379adf8304..1a0e13404d08 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -552,11 +552,12 @@ kmem.*:: record.*:: record.build-id:: - This option can be 'cache', 'no-cache' or 'skip'. + This option can be 'cache', 'no-cache', 'skip' or 'mmap'. 'cache' is to post-process data and save/update the binaries into the build-id cache (in ~/.debug). This is the default. But if this option is 'no-cache', it will not update the build-id cache. 'skip' skips post-processing and does not update the cache. + 'mmap' skips post-processing and reads build-ids from MMAP events. record.call-graph:: This is identical to 'call-graph.record-mode', except it is diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 34cf651ee237..0042ff7f6f33 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -485,6 +485,9 @@ Specify vmlinux path which has debuginfo. --buildid-all:: Record build-id of all DSOs regardless whether it's actually hit or not. +--buildid-mmap:: +Record build ids in mmap2 events, disables build id cache (implies --no-buildid). + --aio[=n]:: Use control blocks in asynchronous (Posix AIO) trace writing mode (default: 1, max: 4). Asynchronous mode is supported only when linking Perf tool with libc library diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index fd3911650612..7bb10e9863bd 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -102,6 +102,7 @@ struct record { bool no_buildid_cache; bool no_buildid_cache_set; bool buildid_all; + bool buildid_mmap; bool timestamp_filename; bool timestamp_boundary; struct switch_output switch_output; @@ -2135,6 +2136,8 @@ static int perf_record_config(const char *var, const char *value, void *cb) rec->no_buildid_cache = true; else if (!strcmp(value, "skip")) rec->no_buildid = true; + else if (!strcmp(value, "mmap")) + rec->buildid_mmap = true; else return -1; return 0; @@ -2552,6 +2555,8 @@ static struct option __record_options[] = { "file", "vmlinux pathname"), OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, "Record build-id of all DSOs regardless of hits"), + OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, + "Record build-id in map events"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, @@ -2655,6 +2660,21 @@ int cmd_record(int argc, const char **argv) } + if (rec->buildid_mmap) { + if (!perf_can_record_build_id()) { + pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); + err = -EINVAL; + goto out_opts; + } + pr_debug("Enabling build id in mmap2 events.\n"); + /* Enable mmap build id synthesizing. */ + symbol_conf.buildid_mmap2 = true; + /* Enable perf_event_attr::build_id bit. */ + rec->opts.build_id = true; + /* Disable build id cache. */ + rec->no_buildid = true; + } + if (rec->opts.kcore) rec->data.is_dir = true; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c26ea82220bd..dc0cfa5f2610 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1168,10 +1168,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->sample_weight) evsel__set_sample_bit(evsel, WEIGHT); - attr->task = track; - attr->mmap = track; - attr->mmap2 = track && !perf_missing_features.mmap2; - attr->comm = track; + attr->task = track; + attr->mmap = track; + attr->mmap2 = track && !perf_missing_features.mmap2; + attr->comm = track; + attr->build_id = track && opts->build_id; + /* * ksymbol is tracked separately with text poke because it needs to be * system wide and enabled immediately. diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 3840d02f0f7b..829af17a0867 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -98,6 +98,11 @@ static void perf_probe_text_poke(struct evsel *evsel) evsel->core.attr.text_poke = 1; } +static void perf_probe_build_id(struct evsel *evsel) +{ + evsel->core.attr.build_id = 1; +} + bool perf_can_sample_identifier(void) { return perf_probe_api(perf_probe_sample_identifier); @@ -172,3 +177,8 @@ bool perf_can_aux_sample(void) return true; } + +bool perf_can_record_build_id(void) +{ + return perf_probe_api(perf_probe_build_id); +} diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h index d5506a983a94..f12ca55f509a 100644 --- a/tools/perf/util/perf_api_probe.h +++ b/tools/perf/util/perf_api_probe.h @@ -11,5 +11,6 @@ bool perf_can_record_cpu_wide(void); bool perf_can_record_switch_events(void); bool perf_can_record_text_poke_events(void); bool perf_can_sample_identifier(void); +bool perf_can_record_build_id(void); #endif // __PERF_API_PROBE_H diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index fb0bb6684438..22b417f43470 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -134,6 +134,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(bpf_event, p_unsigned); PRINT_ATTRf(aux_output, p_unsigned); PRINT_ATTRf(cgroup, p_unsigned); + PRINT_ATTRf(text_poke, p_unsigned); + PRINT_ATTRf(build_id, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 694b351dcd27..b996ce61fadd 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -50,6 +50,7 @@ struct record_opts { bool no_bpf_event; bool kcore; bool text_poke; + bool build_id; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; From 0b5c88214e9c6980ebb25a4fd7e1398f10adf2e2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:53 +0100 Subject: [PATCH 010/148] perf tools: Add support to display build ids when available in PERF_RECORD_MMAP2 events Add support to display the build id in PERF_RECORD_MMAP2 events, when available: $ perf script --show-mmap-events | head -4 swapper ... @ 0xffffffff81000000 ]: ---p [kernel.kallsyms]_text swapper ... @ 0 <5f62adb730272c9417883ae8b8a8ec224df8cddd>]: ---p /lib/modules/5.9.0-rc5buildid+/kernel/drivers/firmware/qemu_fw_cfg.ko swapper ... @ 0 ]: ---p /lib/modules/5.9.0-rc5buildid+/kernel/drivers/char/virtio_console.ko swapper ... @ 0 <86441a4c5b2c2ff5b440682f4c612bd4b426eb5c>]: ---p /lib/modules/5.9.0-rc5buildid+/kernel/lib/libcrc32c.ko $ perf report -D | grep MMAP2 | head -4 0 0 ... @ 0xffffffff81000000 ]: ---p [kernel.kallsyms]_text 0 0 ... @ 0 <5f62adb730272c9417883ae8b8a8ec224df8cddd>]: ---p /lib/modules/5.9.0-rc5buildid+/kernel/drivers/firmware/qemu_fw_cfg.ko 0 0 ... @ 0 ]: ---p /lib/modules/5.9.0-rc5buildid+/kernel/drivers/char/virtio_console.ko 0 0 ... @ 0 <86441a4c5b2c2ff5b440682f4c612bd4b426eb5c>]: ---p /lib/modules/5.9.0-rc5buildid+/kernel/lib/libcrc32c.ko Adding build id data into <> brackets. Committer testing: $ perf record -vv --buildid-mmap sleep 1 |& grep -m1 build Enabling build id in mmap2 events. $ perf evlist -v cycles:u: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD, read_format: ID, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, build_id: 1 $ $ perf script --show-mmap-events | head -4 sleep 274800 2843.556112: PERF_RECORD_MMAP2 274800/274800: [0x564e2fd32000(0x3000) @ 0x2000 ]: r-xp /usr/bin/sleep sleep 274800 2843.556129: PERF_RECORD_MMAP2 274800/274800: [0x7fa9550d7000(0x21000) @ 0x1000 ]: r-xp /usr/lib64/ld-2.32.so sleep 274800 2843.556140: PERF_RECORD_MMAP2 274800/274800: [0x7ffd8fa96000(0x2000) @ 0 00:00 0 0]: r-xp [vdso] sleep 274800 2843.556162: 1 cycles:u: ffffffffbb26bff4 [unknown] ([unknown]) $ $ perf buildid-list -i /usr/bin/sleep c37cb90b77c79fc719798b066d78ef121285843e $ perf buildid-list -i /usr/lib64/ld-2.32.so fc190f17c4f4dc4a8a26df18eaeed41ecdb2c61b And now on a system wide session to check the build ids synthesized for the kernel and some kernel modules: # perf record -a --buildid-mmap sleep 2s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.717 MB perf.data ] # perf script --show-mmap-events | head -4 swapper 0 [000] 0.000000: PERF_RECORD_MMAP2 -1/0: [0xffffffffbb000000(0xe02557) @ 0xffffffffbb000000 ]: ---p [kernel.kallsyms]_text swapper 0 [000] 0.000000: PERF_RECORD_MMAP2 -1/0: [0xffffffffc01dc000(0x6000) @ 0 <36d21515c0b22eb2859b6419a6cdf87ef4cd01c8>]: ---p /lib/modules/5.11.0-rc1+/kernel/drivers/i2c/i2c-dev.ko swapper 0 [000] 0.000000: PERF_RECORD_MMAP2 -1/0: [0xffffffffc01eb000(0x24000) @ 0 ]: ---p /lib/modules/5.11.0-rc1+/kernel/fs/fuse/fuse.ko swapper 0 [000] 0.000000: PERF_RECORD_MMAP2 -1/0: [0xffffffffc0210000(0x7000) @ 0 ]: ---p /lib/modules/5.11.0-rc1+/kernel/drivers/block/zram/zram.ko # perf buildid-list -h kernel Usage: perf buildid-list [] -k, --kernel Show current kernel build id # perf buildid-list --kernel e71ac4b0b0631c27181dab25d63be18dad02feb8 # file /lib/modules/5.11.0-rc1+/kernel/drivers/i2c/i2c-dev.ko /lib/modules/5.11.0-rc1+/kernel/drivers/i2c/i2c-dev.ko: ELF 64-bit LSB relocatable, x86-64, version 1 (SYSV), BuildID[sha1]=36d21515c0b22eb2859b6419a6cdf87ef4cd01c8, with debug_info, not stripped # perf buildid-list -i /lib/modules/5.11.0-rc1+/kernel/drivers/i2c/i2c-dev.ko 36d21515c0b22eb2859b6419a6cdf87ef4cd01c8 # perf buildid-list -i /lib/modules/5.11.0-rc1+/kernel/fs/fuse/fuse.ko c4fbfea32d0518b3e7879de8deca40ea142bb782 # perf buildid-list -i /lib/modules/5.11.0-rc1+/kernel/drivers/block/zram/zram.ko dd6cfb10ae66aa7b1e7b37000a004004be8092e0 # Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 05616d4138a9..fbe8578e4c47 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -288,17 +288,36 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) { - return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 - " %02x:%02x %"PRI_lu64" %"PRI_lu64"]: %c%c%c%c %s\n", - event->mmap2.pid, event->mmap2.tid, event->mmap2.start, - event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj, - event->mmap2.min, event->mmap2.ino, - event->mmap2.ino_generation, - (event->mmap2.prot & PROT_READ) ? 'r' : '-', - (event->mmap2.prot & PROT_WRITE) ? 'w' : '-', - (event->mmap2.prot & PROT_EXEC) ? 'x' : '-', - (event->mmap2.flags & MAP_SHARED) ? 's' : 'p', - event->mmap2.filename); + if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { + char sbuild_id[SBUILD_ID_SIZE]; + struct build_id bid; + + build_id__init(&bid, event->mmap2.build_id, + event->mmap2.build_id_size); + build_id__sprintf(&bid, sbuild_id); + + return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 + " <%s>]: %c%c%c%c %s\n", + event->mmap2.pid, event->mmap2.tid, event->mmap2.start, + event->mmap2.len, event->mmap2.pgoff, sbuild_id, + (event->mmap2.prot & PROT_READ) ? 'r' : '-', + (event->mmap2.prot & PROT_WRITE) ? 'w' : '-', + (event->mmap2.prot & PROT_EXEC) ? 'x' : '-', + (event->mmap2.flags & MAP_SHARED) ? 's' : 'p', + event->mmap2.filename); + } else { + return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 + " %02x:%02x %"PRI_lu64" %"PRI_lu64"]: %c%c%c%c %s\n", + event->mmap2.pid, event->mmap2.tid, event->mmap2.start, + event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj, + event->mmap2.min, event->mmap2.ino, + event->mmap2.ino_generation, + (event->mmap2.prot & PROT_READ) ? 'r' : '-', + (event->mmap2.prot & PROT_WRITE) ? 'w' : '-', + (event->mmap2.prot & PROT_EXEC) ? 'x' : '-', + (event->mmap2.flags & MAP_SHARED) ? 's' : 'p', + event->mmap2.filename); + } } size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp) From e8a2061f0b60c06abfd2fbd9476c9925991579c0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:55 +0100 Subject: [PATCH 011/148] perf buildid-cache: Add --debuginfod option to specify a server to fetch debug files Add the --debuginfod option to specify debuginfod URL and support to do that through config file as well. Use the following in ~/.perfconfig file: [buildid-cache] debuginfod=http://192.168.122.174:8002 Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-14-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/Documentation/perf-buildid-cache.txt | 6 ++++ tools/perf/Documentation/perf-config.txt | 7 +++++ tools/perf/builtin-buildid-cache.c | 28 +++++++++++++++++-- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index f6de0952ff3c..bb167e32a1d7 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -74,6 +74,12 @@ OPTIONS used when creating a uprobe for a process that resides in a different mount namespace from the perf(1) utility. +--debuginfod=URLs:: + Specify debuginfod URL to be used when retrieving perf.data binaries, + it follows the same syntax as the DEBUGINFOD_URLS variable, like: + + buildid-cache.debuginfod=http://192.168.122.174:8002 + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1] diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 1a0e13404d08..c3ce48f1b379 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -238,6 +238,13 @@ buildid.*:: cache location, or to disable it altogether. If you want to disable it, set buildid.dir to /dev/null. The default is $HOME/.debug +buildid-cache.*:: + buildid-cache.debuginfod=URLs + Specify debuginfod URLs to be used when retrieving perf.data binaries, + it follows the same syntax as the DEBUGINFOD_URLS variable, like: + + buildid-cache.debuginfod=http://192.168.122.174:8002 + annotate.*:: These are in control of addresses, jump function, source code in lines of assembly code from a specific program. diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index a25411926e48..ecd0d3cb6f5c 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -27,6 +27,7 @@ #include "util/time-utils.h" #include "util/util.h" #include "util/probe-file.h" +#include "util/config.h" #include #include @@ -348,12 +349,21 @@ static int build_id_cache__show_all(void) return 0; } +static int perf_buildid_cache_config(const char *var, const char *value, void *cb) +{ + const char **debuginfod = cb; + + if (!strcmp(var, "buildid-cache.debuginfod")) + *debuginfod = strdup(value); + + return 0; +} + int cmd_buildid_cache(int argc, const char **argv) { struct strlist *list; struct str_node *pos; - int ret = 0; - int ns_id = -1; + int ret, ns_id = -1; bool force = false; bool list_files = false; bool opts_flag = false; @@ -363,7 +373,8 @@ int cmd_buildid_cache(int argc, const char **argv) *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, - *kcore_filename = NULL; + *kcore_filename = NULL, + *debuginfod = NULL; char sbuf[STRERR_BUFSIZE]; struct perf_data data = { @@ -388,6 +399,8 @@ int cmd_buildid_cache(int argc, const char **argv) OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_STRING('u', "update", &update_name_list_str, "file list", "file(s) to update"), + OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url", + "set debuginfod url"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"), OPT_END() @@ -397,6 +410,10 @@ int cmd_buildid_cache(int argc, const char **argv) NULL }; + ret = perf_config(perf_buildid_cache_config, &debuginfod); + if (ret) + return ret; + argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); @@ -408,6 +425,11 @@ int cmd_buildid_cache(int argc, const char **argv) if (argc || !(list_files || opts_flag)) usage_with_options(buildid_cache_usage, buildid_cache_options); + if (debuginfod) { + pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod); + setenv("DEBUGINFOD_URLS", debuginfod, 1); + } + /* -l is exclusive. It can not be used with other options. */ if (list_files && opts_flag) { usage_with_options_msg(buildid_cache_usage, From d176db955827656791a6c0c44caa169e1c7458ec Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Dec 2020 11:54:56 +0100 Subject: [PATCH 012/148] perf buildid-list: Add support for mmap2's buildid events Add buildid-list support for mmap2's build id data, so we can display build ids for dso objects for data without the build id cache update. $ perf buildid-list 1805c738c8f3ec0f47b7ea09080c28f34d18a82b /usr/lib64/ld-2.31.so d278249792061c6b74d1693ca59513be1def13f2 /usr/lib64/libc-2.31.so By default only dso objects with hits are shown. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201214105457.543111-15-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-list.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index e3ef75583514..87f5b1a4a7fa 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -77,6 +77,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits) perf_header__has_feat(&session->header, HEADER_AUXTRACE)) with_hits = false; + if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID)) + with_hits = true; + /* * in pipe-mode, the only way to get the buildids is to parse * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID From c07b45a355ee42665a7a15fa1a09dcc459db9fb8 Mon Sep 17 00:00:00 2001 From: Hans-Peter Nilsson Date: Mon, 28 Dec 2020 04:19:08 +0100 Subject: [PATCH 013/148] perf record: Tweak "Lowering..." warning in record_opts__config_freq That is, instead of "Lowering default frequency rate to " say "Lowering default frequency rate from to ", specifying the overridden default frequency , so you don't have to grep through the source to "remember" that was e.g. 4000. Signed-off-by: Hans-Peter Nilsson Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201228031908.B049B203B5@pchp3.se.axis.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/record.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index e70c9dd04567..1c1f24d9b6d0 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -201,10 +201,10 @@ static int record_opts__config_freq(struct record_opts *opts) * Default frequency is over current maximum. */ if (max_rate < opts->freq) { - pr_warning("Lowering default frequency rate to %u.\n" + pr_warning("Lowering default frequency rate from %u to %u.\n" "Please consider tweaking " "/proc/sys/kernel/perf_event_max_sample_rate.\n", - max_rate); + opts->freq, max_rate); opts->freq = max_rate; } From d2032d45101670be3a0fe221c815145a41ae2672 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 28 Dec 2020 09:40:51 -0800 Subject: [PATCH 014/148] bpftool: Add Makefile target bootstrap This target is used to only build the bootstrap bpftool, which will be used to generate bpf skeletons for other tools, like perf. Signed-off-by: Song Liu Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: kernel-team@fb.com Link: http://lore.kernel.org/lkml/20201228174054.907740-2-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/bpf/bpftool/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f897cb5fb12d..e3292a3a0c46 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -148,6 +148,8 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +bootstrap: $(BPFTOOL_BOOTSTRAP) + ifneq ($(VMLINUX_BTF)$(VMLINUX_H),) ifeq ($(feature-clang-bpf-co-re),1) From fbcdaa1908e8f61aa56c71a1db9a9deb72110a9d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 29 Dec 2020 13:42:13 -0800 Subject: [PATCH 015/148] perf build: Support build BPF skeletons with perf BPF programs are useful in perf to profile BPF programs. BPF skeleton is by far the easiest way to write BPF tools. Enable building BPF skeletons in util/bpf_skel. A dummy bpf skeleton is added. More bpf skeletons will be added for different use cases. Signed-off-by: Song Liu Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Mark Rutland Cc: Peter Zijlstra Cc: kernel-team@fb.com Link: http://lore.kernel.org/lkml/20201229214214.3413833-3-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 4 ++- tools/perf/Makefile.config | 9 ++++++ tools/perf/Makefile.perf | 49 +++++++++++++++++++++++++++-- tools/perf/util/bpf_skel/.gitignore | 3 ++ tools/scripts/Makefile.include | 1 + 5 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 tools/perf/util/bpf_skel/.gitignore diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 97cbfb31b762..74e255d58d8d 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -99,7 +99,9 @@ FEATURE_TESTS_EXTRA := \ clang \ libbpf \ libpfm4 \ - libdebuginfod + libdebuginfod \ + clang-bpf-co-re + FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index ce8516e4de34..d8e59d31399a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -621,6 +621,15 @@ ifndef NO_LIBBPF endif endif +ifdef BUILD_BPF_SKEL + $(call feature_check,clang-bpf-co-re) + ifeq ($(feature-clang-bpf-co-re), 0) + dummy := $(error Error: clang too old. Please install recent clang) + endif + $(call detected,CONFIG_PERF_BPF_SKEL) + CFLAGS += -DHAVE_BPF_SKEL +endif + dwarf-post-unwind := 1 dwarf-post-unwind-text := BUG diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 62f3deb1d3a8..d182a2dbb9bb 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -126,6 +126,8 @@ include ../scripts/utilities.mak # # Define NO_LIBDEBUGINFOD if you do not want support debuginfod # +# Define BUILD_BPF_SKEL to enable BPF skeletons +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -175,6 +177,12 @@ endef LD += $(EXTRA_LDFLAGS) +HOSTCC ?= gcc +HOSTLD ?= ld +HOSTAR ?= ar +CLANG ?= clang +LLVM_STRIP ?= llvm-strip + PKG_CONFIG = $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config @@ -731,7 +739,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(x86_arch_prctl_code_array) \ $(rename_flags_array) \ $(arch_errno_name_array) \ - $(sync_file_range_arrays) + $(sync_file_range_arrays) \ + bpf-skel $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -1004,7 +1013,43 @@ config-clean: python-clean: $(python-clean) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean +SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) +SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) +SKELETONS := + +ifdef BUILD_BPF_SKEL +BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool +LIBBPF_SRC := $(abspath ../lib/bpf) +BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(BPF_PATH) -I$(LIBBPF_SRC)/.. + +$(SKEL_TMP_OUT): + $(Q)$(MKDIR) -p $@ + +$(BPFTOOL): | $(SKEL_TMP_OUT) + CFLAGS= $(MAKE) -C ../bpf/bpftool \ + OUTPUT=$(SKEL_TMP_OUT)/ bootstrap + +$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT) + $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf $(BPF_INCLUDE) \ + -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@ + +$(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL) + $(QUIET_GENSKEL)$(BPFTOOL) gen skeleton $< > $@ + +bpf-skel: $(SKELETONS) + +.PRECIOUS: $(SKEL_TMP_OUT)/%.bpf.o + +else # BUILD_BPF_SKEL + +bpf-skel: + +endif # BUILD_BPF_SKEL + +bpf-skel-clean: + $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) + +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean bpf-skel-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected diff --git a/tools/perf/util/bpf_skel/.gitignore b/tools/perf/util/bpf_skel/.gitignore new file mode 100644 index 000000000000..5263e9e6c5d8 --- /dev/null +++ b/tools/perf/util/bpf_skel/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +.tmp +*.skel.h \ No newline at end of file diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 1358e89cdf7d..62119ce69ad9 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -127,6 +127,7 @@ ifneq ($(silent),1) $(MAKE) $(PRINT_DIR) -C $$subdir QUIET_FLEX = @echo ' FLEX '$@; QUIET_BISON = @echo ' BISON '$@; + QUIET_GENSKEL = @echo ' GEN-SKEL '$@; descend = \ +@echo ' DESCEND '$(1); \ From fa853c4b839ece9cd589e8858819240933cc4d78 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 29 Dec 2020 13:42:14 -0800 Subject: [PATCH 016/148] perf stat: Enable counting events for BPF programs Introduce 'perf stat -b' option, which counts events for BPF programs, like: [root@localhost ~]# ~/perf stat -e ref-cycles,cycles -b 254 -I 1000 1.487903822 115,200 ref-cycles 1.487903822 86,012 cycles 2.489147029 80,560 ref-cycles 2.489147029 73,784 cycles 3.490341825 60,720 ref-cycles 3.490341825 37,797 cycles 4.491540887 37,120 ref-cycles 4.491540887 31,963 cycles The example above counts 'cycles' and 'ref-cycles' of BPF program of id 254. This is similar to bpftool-prog-profile command, but more flexible. 'perf stat -b' creates per-cpu perf_event and loads fentry/fexit BPF programs (monitor-progs) to the target BPF program (target-prog). The monitor-progs read perf_event before and after the target-prog, and aggregate the difference in a BPF map. Then the user space reads data from these maps. A new 'struct bpf_counter' is introduced to provide a common interface that uses BPF programs/maps to count perf events. Committer notes: Removed all but bpf_counter.h includes from evsel.h, not needed at all. Also BPF map lookups for PERCPU_ARRAYs need to have as its value receive buffer passed to the kernel libbpf_num_possible_cpus() entries, not evsel__nr_cpus(evsel), as the former uses /sys/devices/system/cpu/possible while the later uses /sys/devices/system/cpu/online, which may be less than the 'possible' number making the bpf map lookup overwrite memory and cause hard to debug memory corruption. We need to continue using evsel__nr_cpus(evsel) when accessing the perf_counts array tho, not to overwrite another are of memory :-) Signed-off-by: Song Liu Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/lkml/20210120163031.GU12699@kernel.org/ Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: kernel-team@fb.com Link: http://lore.kernel.org/lkml/20201229214214.3413833-4-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 18 + tools/perf/Makefile.perf | 2 +- tools/perf/builtin-stat.c | 82 ++++- tools/perf/util/Build | 1 + tools/perf/util/bpf_counter.c | 314 ++++++++++++++++++ tools/perf/util/bpf_counter.h | 72 ++++ .../util/bpf_skel/bpf_prog_profiler.bpf.c | 93 ++++++ tools/perf/util/evsel.c | 5 + tools/perf/util/evsel.h | 5 + tools/perf/util/python.c | 21 ++ tools/perf/util/stat-display.c | 4 +- tools/perf/util/stat.c | 2 +- tools/perf/util/target.c | 34 +- tools/perf/util/target.h | 10 + 14 files changed, 645 insertions(+), 18 deletions(-) create mode 100644 tools/perf/util/bpf_counter.c create mode 100644 tools/perf/util/bpf_counter.h create mode 100644 tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 5d4a673d7621..98a4dfd3b2dc 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -75,6 +75,24 @@ report:: --tid=:: stat events on existing thread id (comma separated list) +-b:: +--bpf-prog:: + stat events on existing bpf program id (comma separated list), + requiring root rights. bpftool-prog could be used to find program + id all bpf programs in the system. For example: + + # bpftool prog | head -n 1 + 17247: tracepoint name sys_enter tag 192d548b9d754067 gpl + + # perf stat -e cycles,instructions --bpf-prog 17247 --timeout 1000 + + Performance counter stats for 'BPF program(s) 17247': + + 85,967 cycles + 28,982 instructions # 0.34 insn per cycle + + 1.102235068 seconds time elapsed + ifdef::HAVE_LIBPFM[] --pfm-events events:: Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d182a2dbb9bb..8c4e039c3b81 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1015,7 +1015,7 @@ python-clean: SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) -SKELETONS := +SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8cc24967bc27..3c054b8d4677 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -67,6 +67,7 @@ #include "util/top.h" #include "util/affinity.h" #include "util/pfm.h" +#include "util/bpf_counter.h" #include "asm/bug.h" #include @@ -409,12 +410,32 @@ static int read_affinity_counters(struct timespec *rs) return 0; } +static int read_bpf_map_counters(void) +{ + struct evsel *counter; + int err; + + evlist__for_each_entry(evsel_list, counter) { + err = bpf_counter__read(counter); + if (err) + return err; + } + return 0; +} + static void read_counters(struct timespec *rs) { struct evsel *counter; + int err; - if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0)) - return; + if (!stat_config.stop_read_counter) { + if (target__has_bpf(&target)) + err = read_bpf_map_counters(); + else + err = read_affinity_counters(rs); + if (err < 0) + return; + } evlist__for_each_entry(evsel_list, counter) { if (counter->err) @@ -496,11 +517,22 @@ static bool handle_interval(unsigned int interval, int *times) return false; } -static void enable_counters(void) +static int enable_counters(void) { + struct evsel *evsel; + int err; + + if (target__has_bpf(&target)) { + evlist__for_each_entry(evsel_list, evsel) { + err = bpf_counter__enable(evsel); + if (err) + return err; + } + } + if (stat_config.initial_delay < 0) { pr_info(EVLIST_DISABLED_MSG); - return; + return 0; } if (stat_config.initial_delay > 0) { @@ -518,6 +550,7 @@ static void enable_counters(void) if (stat_config.initial_delay > 0) pr_info(EVLIST_ENABLED_MSG); } + return 0; } static void disable_counters(void) @@ -720,7 +753,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; struct affinity affinity; - int i, cpu; + int i, cpu, err; bool second_pass = false; if (forks) { @@ -737,6 +770,13 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (affinity__setup(&affinity) < 0) return -1; + if (target__has_bpf(&target)) { + evlist__for_each_entry(evsel_list, counter) { + if (bpf_counter__load(counter, &target)) + return -1; + } + } + evlist__for_each_cpu (evsel_list, i, cpu) { affinity__set(&affinity, cpu); @@ -850,7 +890,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } if (STAT_RECORD) { - int err, fd = perf_data__fd(&perf_stat.data); + int fd = perf_data__fd(&perf_stat.data); if (is_pipe) { err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); @@ -876,7 +916,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (forks) { evlist__start_workload(evsel_list); - enable_counters(); + err = enable_counters(); + if (err) + return -1; if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) status = dispatch_events(forks, timeout, interval, ×); @@ -895,7 +937,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (WIFSIGNALED(status)) psignal(WTERMSIG(status), argv[0]); } else { - enable_counters(); + err = enable_counters(); + if (err) + return -1; status = dispatch_events(forks, timeout, interval, ×); } @@ -1085,6 +1129,10 @@ static struct option stat_options[] = { "stat events on existing process id"), OPT_STRING('t', "tid", &target.tid, "tid", "stat events on existing thread id"), +#ifdef HAVE_BPF_SKEL + OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id", + "stat events on existing bpf program id"), +#endif OPT_BOOLEAN('a', "all-cpus", &target.system_wide, "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, @@ -2064,11 +2112,12 @@ int cmd_stat(int argc, const char **argv) "perf stat [] []", NULL }; - int status = -EINVAL, run_idx; + int status = -EINVAL, run_idx, err; const char *mode; FILE *output = stderr; unsigned int interval, timeout; const char * const stat_subcommands[] = { "record", "report" }; + char errbuf[BUFSIZ]; setlocale(LC_ALL, ""); @@ -2179,6 +2228,12 @@ int cmd_stat(int argc, const char **argv) } else if (big_num_opt == 0) /* User passed --no-big-num */ stat_config.big_num = false; + err = target__validate(&target); + if (err) { + target__strerror(&target, err, errbuf, BUFSIZ); + pr_warning("%s\n", errbuf); + } + setup_system_wide(argc); /* @@ -2252,8 +2307,6 @@ int cmd_stat(int argc, const char **argv) } } - target__validate(&target); - if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) target.per_thread = true; @@ -2384,9 +2437,10 @@ int cmd_stat(int argc, const char **argv) * tools remain -acme */ int fd = perf_data__fd(&perf_stat.data); - int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, - process_synthesized_event, - &perf_stat.session->machines.host); + + err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, + process_synthesized_event, + &perf_stat.session->machines.host); if (err) { pr_warning("Couldn't synthesize the kernel mmap record, harmless, " "older tools may produce warnings about this file\n."); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e2563d0154eb..188521f34347 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -135,6 +135,7 @@ perf-y += clockid.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c new file mode 100644 index 000000000000..04f89120b323 --- /dev/null +++ b/tools/perf/util/bpf_counter.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (c) 2019 Facebook */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_counter.h" +#include "counts.h" +#include "debug.h" +#include "evsel.h" +#include "target.h" + +#include "bpf_skel/bpf_prog_profiler.skel.h" + +static inline void *u64_to_ptr(__u64 ptr) +{ + return (void *)(unsigned long)ptr; +} + +static void set_max_rlimit(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); +} + +static struct bpf_counter *bpf_counter_alloc(void) +{ + struct bpf_counter *counter; + + counter = zalloc(sizeof(*counter)); + if (counter) + INIT_LIST_HEAD(&counter->list); + return counter; +} + +static int bpf_program_profiler__destroy(struct evsel *evsel) +{ + struct bpf_counter *counter, *tmp; + + list_for_each_entry_safe(counter, tmp, + &evsel->bpf_counter_list, list) { + list_del_init(&counter->list); + bpf_prog_profiler_bpf__destroy(counter->skel); + free(counter); + } + assert(list_empty(&evsel->bpf_counter_list)); + + return 0; +} + +static char *bpf_target_prog_name(int tgt_fd) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_func_info *func_info; + const struct btf_type *t; + char *name = NULL; + struct btf *btf; + + info_linear = bpf_program__get_prog_info_linear( + tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); + if (IS_ERR_OR_NULL(info_linear)) { + pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd); + return NULL; + } + + if (info_linear->info.btf_id == 0 || + btf__get_from_id(info_linear->info.btf_id, &btf)) { + pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd); + goto out; + } + + func_info = u64_to_ptr(info_linear->info.func_info); + t = btf__type_by_id(btf, func_info[0].type_id); + if (!t) { + pr_debug("btf %d doesn't have type %d\n", + info_linear->info.btf_id, func_info[0].type_id); + goto out; + } + name = strdup(btf__name_by_offset(btf, t->name_off)); +out: + free(info_linear); + return name; +} + +static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id) +{ + struct bpf_prog_profiler_bpf *skel; + struct bpf_counter *counter; + struct bpf_program *prog; + char *prog_name; + int prog_fd; + int err; + + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd < 0) { + pr_err("Failed to open fd for bpf prog %u\n", prog_id); + return -1; + } + counter = bpf_counter_alloc(); + if (!counter) { + close(prog_fd); + return -1; + } + + skel = bpf_prog_profiler_bpf__open(); + if (!skel) { + pr_err("Failed to open bpf skeleton\n"); + goto err_out; + } + + skel->rodata->num_cpu = evsel__nr_cpus(evsel); + + bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel)); + bpf_map__resize(skel->maps.fentry_readings, 1); + bpf_map__resize(skel->maps.accum_readings, 1); + + prog_name = bpf_target_prog_name(prog_fd); + if (!prog_name) { + pr_err("Failed to get program name for bpf prog %u. Does it have BTF?\n", prog_id); + goto err_out; + } + + bpf_object__for_each_program(prog, skel->obj) { + err = bpf_program__set_attach_target(prog, prog_fd, prog_name); + if (err) { + pr_err("bpf_program__set_attach_target failed.\n" + "Does bpf prog %u have BTF?\n", prog_id); + goto err_out; + } + } + set_max_rlimit(); + err = bpf_prog_profiler_bpf__load(skel); + if (err) { + pr_err("bpf_prog_profiler_bpf__load failed\n"); + goto err_out; + } + + assert(skel != NULL); + counter->skel = skel; + list_add(&counter->list, &evsel->bpf_counter_list); + close(prog_fd); + return 0; +err_out: + bpf_prog_profiler_bpf__destroy(skel); + free(counter); + close(prog_fd); + return -1; +} + +static int bpf_program_profiler__load(struct evsel *evsel, struct target *target) +{ + char *bpf_str, *bpf_str_, *tok, *saveptr = NULL, *p; + u32 prog_id; + int ret; + + bpf_str_ = bpf_str = strdup(target->bpf_str); + if (!bpf_str) + return -1; + + while ((tok = strtok_r(bpf_str, ",", &saveptr)) != NULL) { + prog_id = strtoul(tok, &p, 10); + if (prog_id == 0 || prog_id == UINT_MAX || + (*p != '\0' && *p != ',')) { + pr_err("Failed to parse bpf prog ids %s\n", + target->bpf_str); + return -1; + } + + ret = bpf_program_profiler_load_one(evsel, prog_id); + if (ret) { + bpf_program_profiler__destroy(evsel); + free(bpf_str_); + return -1; + } + bpf_str = NULL; + } + free(bpf_str_); + return 0; +} + +static int bpf_program_profiler__enable(struct evsel *evsel) +{ + struct bpf_counter *counter; + int ret; + + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { + assert(counter->skel != NULL); + ret = bpf_prog_profiler_bpf__attach(counter->skel); + if (ret) { + bpf_program_profiler__destroy(evsel); + return ret; + } + } + return 0; +} + +static int bpf_program_profiler__read(struct evsel *evsel) +{ + // perf_cpu_map uses /sys/devices/system/cpu/online + int num_cpu = evsel__nr_cpus(evsel); + // BPF_MAP_TYPE_PERCPU_ARRAY uses /sys/devices/system/cpu/possible + // Sometimes possible > online, like on a Ryzen 3900X that has 24 + // threads but its possible showed 0-31 -acme + int num_cpu_bpf = libbpf_num_possible_cpus(); + struct bpf_perf_event_value values[num_cpu_bpf]; + struct bpf_counter *counter; + int reading_map_fd; + __u32 key = 0; + int err, cpu; + + if (list_empty(&evsel->bpf_counter_list)) + return -EAGAIN; + + for (cpu = 0; cpu < num_cpu; cpu++) { + perf_counts(evsel->counts, cpu, 0)->val = 0; + perf_counts(evsel->counts, cpu, 0)->ena = 0; + perf_counts(evsel->counts, cpu, 0)->run = 0; + } + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { + struct bpf_prog_profiler_bpf *skel = counter->skel; + + assert(skel != NULL); + reading_map_fd = bpf_map__fd(skel->maps.accum_readings); + + err = bpf_map_lookup_elem(reading_map_fd, &key, values); + if (err) { + pr_err("failed to read value\n"); + return err; + } + + for (cpu = 0; cpu < num_cpu; cpu++) { + perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter; + perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled; + perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running; + } + } + return 0; +} + +static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, + int fd) +{ + struct bpf_prog_profiler_bpf *skel; + struct bpf_counter *counter; + int ret; + + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { + skel = counter->skel; + assert(skel != NULL); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events), + &cpu, &fd, BPF_ANY); + if (ret) + return ret; + } + return 0; +} + +struct bpf_counter_ops bpf_program_profiler_ops = { + .load = bpf_program_profiler__load, + .enable = bpf_program_profiler__enable, + .read = bpf_program_profiler__read, + .destroy = bpf_program_profiler__destroy, + .install_pe = bpf_program_profiler__install_pe, +}; + +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) +{ + if (list_empty(&evsel->bpf_counter_list)) + return 0; + return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); +} + +int bpf_counter__load(struct evsel *evsel, struct target *target) +{ + if (target__has_bpf(target)) + evsel->bpf_counter_ops = &bpf_program_profiler_ops; + + if (evsel->bpf_counter_ops) + return evsel->bpf_counter_ops->load(evsel, target); + return 0; +} + +int bpf_counter__enable(struct evsel *evsel) +{ + if (list_empty(&evsel->bpf_counter_list)) + return 0; + return evsel->bpf_counter_ops->enable(evsel); +} + +int bpf_counter__read(struct evsel *evsel) +{ + if (list_empty(&evsel->bpf_counter_list)) + return -EAGAIN; + return evsel->bpf_counter_ops->read(evsel); +} + +void bpf_counter__destroy(struct evsel *evsel) +{ + if (list_empty(&evsel->bpf_counter_list)) + return; + evsel->bpf_counter_ops->destroy(evsel); + evsel->bpf_counter_ops = NULL; +} diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h new file mode 100644 index 000000000000..2eca210e5dc1 --- /dev/null +++ b/tools/perf/util/bpf_counter.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_BPF_COUNTER_H +#define __PERF_BPF_COUNTER_H 1 + +#include + +struct evsel; +struct target; +struct bpf_counter; + +typedef int (*bpf_counter_evsel_op)(struct evsel *evsel); +typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel, + struct target *target); +typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, + int cpu, + int fd); + +struct bpf_counter_ops { + bpf_counter_evsel_target_op load; + bpf_counter_evsel_op enable; + bpf_counter_evsel_op read; + bpf_counter_evsel_op destroy; + bpf_counter_evsel_install_pe_op install_pe; +}; + +struct bpf_counter { + void *skel; + struct list_head list; +}; + +#ifdef HAVE_BPF_SKEL + +int bpf_counter__load(struct evsel *evsel, struct target *target); +int bpf_counter__enable(struct evsel *evsel); +int bpf_counter__read(struct evsel *evsel); +void bpf_counter__destroy(struct evsel *evsel); +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); + +#else /* HAVE_BPF_SKEL */ + +#include + +static inline int bpf_counter__load(struct evsel *evsel __maybe_unused, + struct target *target __maybe_unused) +{ + return 0; +} + +static inline int bpf_counter__enable(struct evsel *evsel __maybe_unused) +{ + return 0; +} + +static inline int bpf_counter__read(struct evsel *evsel __maybe_unused) +{ + return -EAGAIN; +} + +static inline void bpf_counter__destroy(struct evsel *evsel __maybe_unused) +{ +} + +static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, + int cpu __maybe_unused, + int fd __maybe_unused) +{ + return 0; +} + +#endif /* HAVE_BPF_SKEL */ + +#endif /* __PERF_BPF_COUNTER_H */ diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c new file mode 100644 index 000000000000..c7cec92d0236 --- /dev/null +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2020 Facebook +#include +#include +#include + +/* map of perf event fds, num_cpu * num_metric entries */ +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(int)); +} events SEC(".maps"); + +/* readings at fentry */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} fentry_readings SEC(".maps"); + +/* accumulated readings */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} accum_readings SEC(".maps"); + +const volatile __u32 num_cpu = 1; + +SEC("fentry/XXX") +int BPF_PROG(fentry_XXX) +{ + __u32 key = bpf_get_smp_processor_id(); + struct bpf_perf_event_value *ptr; + __u32 zero = 0; + long err; + + /* look up before reading, to reduce error */ + ptr = bpf_map_lookup_elem(&fentry_readings, &zero); + if (!ptr) + return 0; + + err = bpf_perf_event_read_value(&events, key, ptr, sizeof(*ptr)); + if (err) + return 0; + + return 0; +} + +static inline void +fexit_update_maps(struct bpf_perf_event_value *after) +{ + struct bpf_perf_event_value *before, diff, *accum; + __u32 zero = 0; + + before = bpf_map_lookup_elem(&fentry_readings, &zero); + /* only account samples with a valid fentry_reading */ + if (before && before->counter) { + struct bpf_perf_event_value *accum; + + diff.counter = after->counter - before->counter; + diff.enabled = after->enabled - before->enabled; + diff.running = after->running - before->running; + + accum = bpf_map_lookup_elem(&accum_readings, &zero); + if (accum) { + accum->counter += diff.counter; + accum->enabled += diff.enabled; + accum->running += diff.running; + } + } +} + +SEC("fexit/XXX") +int BPF_PROG(fexit_XXX) +{ + struct bpf_perf_event_value reading; + __u32 cpu = bpf_get_smp_processor_id(); + __u32 one = 1, zero = 0; + int err; + + /* read all events before updating the maps, to reduce error */ + err = bpf_perf_event_read_value(&events, cpu, &reading, sizeof(reading)); + if (err) + return 0; + + fexit_update_maps(&reading); + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dc0cfa5f2610..f6668ece71d7 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -25,6 +25,7 @@ #include #include #include "asm/bug.h" +#include "bpf_counter.h" #include "callchain.h" #include "cgroup.h" #include "counts.h" @@ -247,6 +248,7 @@ void evsel__init(struct evsel *evsel, evsel->bpf_obj = NULL; evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->config_terms); + INIT_LIST_HEAD(&evsel->bpf_counter_list); perf_evsel__object.init(evsel); evsel->sample_size = __evsel__sample_size(attr->sample_type); evsel__calc_id_pos(evsel); @@ -1368,6 +1370,7 @@ void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); assert(evsel->evlist == NULL); + bpf_counter__destroy(evsel); evsel__free_counts(evsel); perf_evsel__free_fd(&evsel->core); perf_evsel__free_id(&evsel->core); @@ -1783,6 +1786,8 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, FD(evsel, cpu, thread) = fd; + bpf_counter__install_pe(evsel, cpu, fd); + if (unlikely(test_attr__enabled)) { test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], fd, group_fd, flags); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index cd1d8dd43199..8226b1fefa8c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -17,6 +17,8 @@ struct cgroup; struct perf_counts; struct perf_stat_evsel; union perf_event; +struct bpf_counter_ops; +struct target; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -127,6 +129,8 @@ struct evsel { * See also evsel__has_callchain(). */ __u64 synth_sample_type; + struct list_head bpf_counter_list; + struct bpf_counter_ops *bpf_counter_ops; }; struct perf_missing_features { @@ -424,4 +428,5 @@ static inline bool evsel__is_dummy_event(struct evsel *evsel) struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index cc5ade85a33f..278abecb5bdf 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -79,6 +79,27 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, return 0; } +/* + * XXX: All these evsel destructors need some better mechanism, like a linked + * list of destructors registered when the relevant code indeed is used instead + * of having more and more calls in perf_evsel__delete(). -- acme + * + * For now, add some more: + * + * Not to drag the BPF bandwagon... + */ +void bpf_counter__destroy(struct evsel *evsel); +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); + +void bpf_counter__destroy(struct evsel *evsel __maybe_unused) +{ +} + +int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_unused, int fd __maybe_unused) +{ + return 0; +} + /* * Support debug printing even though util/debug.c is not linked. That means * implementing 'verbose' and 'eprintf'. diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 583ae4f09c5d..cce7a76d6473 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1045,7 +1045,9 @@ static void print_header(struct perf_stat_config *config, if (!config->csv_output) { fprintf(output, "\n"); fprintf(output, " Performance counter stats for "); - if (_target->system_wide) + if (_target->bpf_str) + fprintf(output, "\'BPF program(s) %s", _target->bpf_str); + else if (_target->system_wide) fprintf(output, "\'system wide"); else if (_target->cpu_list) fprintf(output, "\'CPU(s) %s", _target->cpu_list); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 8ce1479c98f0..0b3957323f66 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -527,7 +527,7 @@ int create_perf_stat_counter(struct evsel *evsel, if (leader->core.nr_members > 1) attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; - attr->inherit = !config->no_inherit; + attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list); /* * Some events get initialized with sample_(period/type) set, diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index a3db13dea937..0f383418e3df 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -56,6 +56,34 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM; } + /* BPF and CPU are mutually exclusive */ + if (target->bpf_str && target->cpu_list) { + target->cpu_list = NULL; + if (ret == TARGET_ERRNO__SUCCESS) + ret = TARGET_ERRNO__BPF_OVERRIDE_CPU; + } + + /* BPF and PID/TID are mutually exclusive */ + if (target->bpf_str && target->tid) { + target->tid = NULL; + if (ret == TARGET_ERRNO__SUCCESS) + ret = TARGET_ERRNO__BPF_OVERRIDE_PID; + } + + /* BPF and UID are mutually exclusive */ + if (target->bpf_str && target->uid_str) { + target->uid_str = NULL; + if (ret == TARGET_ERRNO__SUCCESS) + ret = TARGET_ERRNO__BPF_OVERRIDE_UID; + } + + /* BPF and THREADS are mutually exclusive */ + if (target->bpf_str && target->per_thread) { + target->per_thread = false; + if (ret == TARGET_ERRNO__SUCCESS) + ret = TARGET_ERRNO__BPF_OVERRIDE_THREAD; + } + /* THREAD and SYSTEM/CPU are mutually exclusive */ if (target->per_thread && (target->system_wide || target->cpu_list)) { target->per_thread = false; @@ -109,6 +137,10 @@ static const char *target__error_str[] = { "PID/TID switch overriding SYSTEM", "UID switch overriding SYSTEM", "SYSTEM/CPU switch overriding PER-THREAD", + "BPF switch overriding CPU", + "BPF switch overriding PID/TID", + "BPF switch overriding UID", + "BPF switch overriding THREAD", "Invalid User: %s", "Problems obtaining information for user %s", }; @@ -134,7 +166,7 @@ int target__strerror(struct target *target, int errnum, switch (errnum) { case TARGET_ERRNO__PID_OVERRIDE_CPU ... - TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD: + TARGET_ERRNO__BPF_OVERRIDE_THREAD: snprintf(buf, buflen, "%s", msg); break; diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 6ef01a83b24e..f132c6c2eef8 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -10,6 +10,7 @@ struct target { const char *tid; const char *cpu_list; const char *uid_str; + const char *bpf_str; uid_t uid; bool system_wide; bool uses_mmap; @@ -36,6 +37,10 @@ enum target_errno { TARGET_ERRNO__PID_OVERRIDE_SYSTEM, TARGET_ERRNO__UID_OVERRIDE_SYSTEM, TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD, + TARGET_ERRNO__BPF_OVERRIDE_CPU, + TARGET_ERRNO__BPF_OVERRIDE_PID, + TARGET_ERRNO__BPF_OVERRIDE_UID, + TARGET_ERRNO__BPF_OVERRIDE_THREAD, /* for target__parse_uid() */ TARGET_ERRNO__INVALID_UID, @@ -59,6 +64,11 @@ static inline bool target__has_cpu(struct target *target) return target->system_wide || target->cpu_list; } +static inline bool target__has_bpf(struct target *target) +{ + return target->bpf_str; +} + static inline bool target__none(struct target *target) { return !target__has_task(target) && !target__has_cpu(target); From 1834436e340ce0ec00e8114f61009246a5b36fe9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 14 Jan 2021 23:46:41 +0800 Subject: [PATCH 017/148] perf c2c: Rename for shared cache line stats For shared cache line statistics, 'perf c2c' relies on HITM. We can use more general naming rather than only binding to HITM, so replace "hitm_stats" with "shared_clines_stats" in structure perf_c2c, and rename function resort_hitm_cb() to resort_shared_cl_cb(). Signed-off-by: Leo Yan Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20210114154646.209024-2-leo.yan@linaro.org --- tools/perf/builtin-c2c.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c5babeaa3b38..2d0c71300dbf 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -97,8 +97,8 @@ struct perf_c2c { bool symbol_full; bool stitch_lbr; - /* HITM shared clines stats */ - struct c2c_stats hitm_stats; + /* Shared cache line stats */ + struct c2c_stats shared_clines_stats; int shared_clines; int display; @@ -1961,7 +1961,7 @@ static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; struct c2c_hists *c2c_hists; - bool display = he__display(he, &c2c.hitm_stats); + bool display = he__display(he, &c2c.shared_clines_stats); c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; @@ -2048,14 +2048,14 @@ static int setup_nodes(struct perf_session *session) #define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) -static int resort_hitm_cb(struct hist_entry *he, void *arg __maybe_unused) +static int resort_shared_cl_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; c2c_he = container_of(he, struct c2c_hist_entry, he); if (HAS_HITMS(c2c_he)) { c2c.shared_clines++; - c2c_add_stats(&c2c.hitm_stats, &c2c_he->stats); + c2c_add_stats(&c2c.shared_clines_stats, &c2c_he->stats); } return 0; @@ -2126,7 +2126,7 @@ static void print_c2c__display_stats(FILE *out) static void print_shared_cacheline_info(FILE *out) { - struct c2c_stats *stats = &c2c.hitm_stats; + struct c2c_stats *stats = &c2c.shared_clines_stats; int hitm_cnt = stats->lcl_hitm + stats->rmt_hitm; fprintf(out, "=================================================\n"); @@ -2827,7 +2827,7 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); hists__collapse_resort(&c2c.hists.hists, NULL); - hists__output_resort_cb(&c2c.hists.hists, &prog, resort_hitm_cb); + hists__output_resort_cb(&c2c.hists.hists, &prog, resort_shared_cl_cb); hists__iterate_cb(&c2c.hists.hists, resort_cl_cb); ui_progress__finish(); From 2290e1d6193bc7c760a47a4c2208a87fd8dab202 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 14 Jan 2021 23:46:42 +0800 Subject: [PATCH 018/148] perf c2c: Refactor hist entry validation This patch has no functionality changes but refactors hist entry validation for cache line resorting. It renames function "valid_hitm_or_store()" to "is_valid_hist_entry()", changes return type from integer type to bool type. In the function, it uses switch-case instead of ternary operators, which is easier to extend for more display types. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210114154646.209024-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 2d0c71300dbf..bc2ee84298ff 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1888,16 +1888,32 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats) return he->filtered == 0; } -static inline int valid_hitm_or_store(struct hist_entry *he) +static inline bool is_valid_hist_entry(struct hist_entry *he) { struct c2c_hist_entry *c2c_he; - bool has_hitm; + bool has_record = false; c2c_he = container_of(he, struct c2c_hist_entry, he); - has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm : - c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm : - c2c_he->stats.rmt_hitm; - return has_hitm || c2c_he->stats.store; + + /* It's a valid entry if contains stores */ + if (c2c_he->stats.store) + return true; + + switch (c2c.display) { + case DISPLAY_LCL: + has_record = !!c2c_he->stats.lcl_hitm; + break; + case DISPLAY_RMT: + has_record = !!c2c_he->stats.rmt_hitm; + break; + case DISPLAY_TOT: + has_record = !!c2c_he->stats.tot_hitm; + break; + default: + break; + } + + return has_record; } static void set_node_width(struct c2c_hist_entry *c2c_he, int len) @@ -1951,7 +1967,7 @@ static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) calc_width(c2c_he); - if (!valid_hitm_or_store(he)) + if (!is_valid_hist_entry(he)) he->filtered = HIST_FILTER__C2C; return 0; From 69a95bfdf95b23ad9bd3d240cef92408823656f9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 14 Jan 2021 23:46:43 +0800 Subject: [PATCH 019/148] perf c2c: Refactor display filter When sorting on the respective metrics (lcl_hitm, rmt_hitm, tot_hitm), the FILTER_HITM macro is used to filter out the cache line entries if its overhead is less than 1%. This patch introduces a static function filter_display() to replace that macro and refines its parameters with a more flexible way, rather than passing field name, it changes to pass the cache line's statistic and sum value. Signed-off-by: Leo Yan Acked-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210114154646.209024-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index bc2ee84298ff..7aaccea00883 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1851,40 +1851,40 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, #define DISPLAY_LINE_LIMIT 0.001 +static u8 filter_display(u32 val, u32 sum) +{ + if (sum == 0 || ((double)val / sum) < DISPLAY_LINE_LIMIT) + return HIST_FILTER__C2C; + + return 0; +} + static bool he__display(struct hist_entry *he, struct c2c_stats *stats) { struct c2c_hist_entry *c2c_he; - double ld_dist; if (c2c.show_all) return true; c2c_he = container_of(he, struct c2c_hist_entry, he); -#define FILTER_HITM(__h) \ - if (stats->__h) { \ - ld_dist = ((double)c2c_he->stats.__h / stats->__h); \ - if (ld_dist < DISPLAY_LINE_LIMIT) \ - he->filtered = HIST_FILTER__C2C; \ - } else { \ - he->filtered = HIST_FILTER__C2C; \ - } - switch (c2c.display) { case DISPLAY_LCL: - FILTER_HITM(lcl_hitm); + he->filtered = filter_display(c2c_he->stats.lcl_hitm, + stats->lcl_hitm); break; case DISPLAY_RMT: - FILTER_HITM(rmt_hitm); + he->filtered = filter_display(c2c_he->stats.rmt_hitm, + stats->rmt_hitm); break; case DISPLAY_TOT: - FILTER_HITM(tot_hitm); + he->filtered = filter_display(c2c_he->stats.tot_hitm, + stats->tot_hitm); + break; default: break; } -#undef FILTER_HITM - return he->filtered == 0; } From 111c141591178881314ba1b50d550d31cba34c1a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 14 Jan 2021 23:46:44 +0800 Subject: [PATCH 020/148] perf c2c: Fix argument type for percent() For percent() its arguments are defined as integers; this is not consistent with its consumers which pass u32 arguments. Thus this patch makes argument type as u32 for percent(). Signed-off-by: Leo Yan Acked-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210114154646.209024-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 7aaccea00883..7702f9599162 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -876,7 +876,7 @@ static struct c2c_stats *total_stats(struct hist_entry *he) return &hists->stats; } -static double percent(int st, int tot) +static double percent(u32 st, u32 tot) { return tot ? 100. * (double) st / (double) tot : 0; } From f3d0a551db13b7cdf8addbe265b4fd7026944d24 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 14 Jan 2021 23:46:45 +0800 Subject: [PATCH 021/148] perf c2c: Refactor node display The macro DISPLAY_HITM() is used to calculate HITM percentage introduced by every node and it's shown for the node info. This patch introduces the static function display_metrics() to replace the macro, and the parameters are refined for passing the metric's statistic and sum value. Signed-off-by: Leo Yan Acked-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210114154646.209024-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 7702f9599162..62213bef7b98 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1048,6 +1048,19 @@ empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return 0; } +static int display_metrics(struct perf_hpp *hpp, u32 val, u32 sum) +{ + int ret; + + if (sum != 0) + ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", + percent(val, sum)); + else + ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); + + return ret; +} + static int node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, struct hist_entry *he) @@ -1091,29 +1104,23 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num); advance_hpp(hpp, ret); - #define DISPLAY_HITM(__h) \ - if (c2c_he->stats.__h> 0) { \ - ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", \ - percent(stats->__h, c2c_he->stats.__h));\ - } else { \ - ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); \ - } - switch (c2c.display) { case DISPLAY_RMT: - DISPLAY_HITM(rmt_hitm); + ret = display_metrics(hpp, stats->rmt_hitm, + c2c_he->stats.rmt_hitm); break; case DISPLAY_LCL: - DISPLAY_HITM(lcl_hitm); + ret = display_metrics(hpp, stats->lcl_hitm, + c2c_he->stats.lcl_hitm); break; case DISPLAY_TOT: - DISPLAY_HITM(tot_hitm); + ret = display_metrics(hpp, stats->tot_hitm, + c2c_he->stats.tot_hitm); + break; default: break; } - #undef DISPLAY_HITM - advance_hpp(hpp, ret); if (c2c_he->stats.store > 0) { From 0998d960489215a61023e837c08f7acc158c3228 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 14 Jan 2021 23:46:46 +0800 Subject: [PATCH 022/148] perf c2c: Add local variables for output metrics This patch adds several local variables: "cl_output": pointer for outputting single cache line metrics; "output_str": pointer for outputting cache line metrics; "sort_str": pointer to the sorting metrics. This can improve readability for the code and it's more flexible when later extend to different strings for the output metrics. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210114154646.209024-7-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 59 ++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 62213bef7b98..d247f9878948 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2199,16 +2199,17 @@ static void print_pareto(FILE *out) struct perf_hpp_list hpp_list; struct rb_node *nd; int ret; + const char *cl_output; + + cl_output = "cl_num," + "cl_rmt_hitm," + "cl_lcl_hitm," + "cl_stores_l1hit," + "cl_stores_l1miss," + "dcacheline"; perf_hpp_list__init(&hpp_list); - ret = hpp_list__parse(&hpp_list, - "cl_num," - "cl_rmt_hitm," - "cl_lcl_hitm," - "cl_stores_l1hit," - "cl_stores_l1miss," - "dcacheline", - NULL); + ret = hpp_list__parse(&hpp_list, cl_output, NULL); if (WARN_ONCE(ret, "failed to setup sort entries\n")) return; @@ -2752,6 +2753,7 @@ static int perf_c2c__report(int argc, const char **argv) OPT_END() }; int err = 0; + const char *output_str, *sort_str = NULL; argc = parse_options(argc, argv, options, report_c2c_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -2828,24 +2830,29 @@ static int perf_c2c__report(int argc, const char **argv) goto out_mem2node; } - c2c_hists__reinit(&c2c.hists, - "cl_idx," - "dcacheline," - "dcacheline_node," - "dcacheline_count," - "percent_hitm," - "tot_hitm,lcl_hitm,rmt_hitm," - "tot_recs," - "tot_loads," - "tot_stores," - "stores_l1hit,stores_l1miss," - "ld_fbhit,ld_l1hit,ld_l2hit," - "ld_lclhit,lcl_hitm," - "ld_rmthit,rmt_hitm," - "dram_lcl,dram_rmt", - c2c.display == DISPLAY_TOT ? "tot_hitm" : - c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm" - ); + output_str = "cl_idx," + "dcacheline," + "dcacheline_node," + "dcacheline_count," + "percent_hitm," + "tot_hitm,lcl_hitm,rmt_hitm," + "tot_recs," + "tot_loads," + "tot_stores," + "stores_l1hit,stores_l1miss," + "ld_fbhit,ld_l1hit,ld_l2hit," + "ld_lclhit,lcl_hitm," + "ld_rmthit,rmt_hitm," + "dram_lcl,dram_rmt"; + + if (c2c.display == DISPLAY_TOT) + sort_str = "tot_hitm"; + else if (c2c.display == DISPLAY_RMT) + sort_str = "rmt_hitm"; + else if (c2c.display == DISPLAY_LCL) + sort_str = "lcl_hitm"; + + c2c_hists__reinit(&c2c.hists, output_str, sort_str); ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); From 80ec45d9f6d11db2e17b6d6aba6e8ad04b020060 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 8 Jan 2021 16:27:52 +0200 Subject: [PATCH 023/148] perf cs-etm: Update ARM's CoreSight hardware tracing OpenCSD library to v1.0.0 Replace the OCSD_INSTR switch statement with an if to fix compilation error about unhandled values and avoid this issue again in the future. Add new OCSD_GEN_TRC_ELEM_SYNC_MARKER and OCSD_GEN_TRC_ELEM_MEMTRANS enum values to fix unhandled value compilation error. Currently they are ignored. Increase the minimum version number to v1.0.0 now that new enum values are used that are only present in this version. Signed-off-by: James Clark Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Tested-by: Mike Leach Cc: Alexander Shishkin Cc: Al Grant Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki K Poulose Cc: Will Deacon Link: https://lore.kernel.org/r/20210108142752.27872-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-libopencsd.c | 4 ++-- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 15 ++++----------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index 1547bc2c0950..52c790b0317b 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((0 << 16) | (14 << 8) | (0)) +#define OCSD_MIN_VER ((1 << 16) | (0 << 8) | (0)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 0.14.0 is required" +#error "OpenCSD >= 1.0.0 is required" #endif int main(void) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index cd007cc9c283..3f4bc4050477 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -419,19 +419,10 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq, packet->last_instr_subtype = elem->last_i_subtype; packet->last_instr_cond = elem->last_instr_cond; - switch (elem->last_i_type) { - case OCSD_INSTR_BR: - case OCSD_INSTR_BR_INDIRECT: + if (elem->last_i_type == OCSD_INSTR_BR || elem->last_i_type == OCSD_INSTR_BR_INDIRECT) packet->last_instr_taken_branch = elem->last_instr_exec; - break; - case OCSD_INSTR_ISB: - case OCSD_INSTR_DSB_DMB: - case OCSD_INSTR_WFI_WFE: - case OCSD_INSTR_OTHER: - default: + else packet->last_instr_taken_branch = false; - break; - } packet->last_instr_size = elem->last_instr_sz; @@ -572,6 +563,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( case OCSD_GEN_TRC_ELEM_EVENT: case OCSD_GEN_TRC_ELEM_SWTRACE: case OCSD_GEN_TRC_ELEM_CUSTOM: + case OCSD_GEN_TRC_ELEM_SYNC_MARKER: + case OCSD_GEN_TRC_ELEM_MEMTRANS: default: break; } From 407ee5c920dfead7b3fcff0644843c2f84d24245 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 5 Jan 2021 11:57:47 -0800 Subject: [PATCH 024/148] perf mem: Clean up output format Now, "--phys-data" is the only option which impacts the output format. A simple "if else" is enough to handle the option. But there will be more options added, e.g. "--data-page-size", which also impact the output format. The code will become too complex to be maintained. Divide the big printf into several small pieces. Output the specific piece only if the related option is applied. No functional change. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 95 +++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 56 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 823742036ddb..7d6ee2208709 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -172,7 +172,7 @@ dump_raw_samples(struct perf_tool *tool, { struct perf_mem *mem = container_of(tool, struct perf_mem, tool); struct addr_location al; - const char *fmt; + const char *fmt, *field_sep; if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", @@ -186,60 +186,41 @@ dump_raw_samples(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; - if (mem->phys_addr) { - if (symbol_conf.field_sep) { - fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64 - "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n"; - } else { - fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 - "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64 - "%s%s:%s\n"; - symbol_conf.field_sep = " "; - } - - printf(fmt, - sample->pid, - symbol_conf.field_sep, - sample->tid, - symbol_conf.field_sep, - sample->ip, - symbol_conf.field_sep, - sample->addr, - symbol_conf.field_sep, - sample->phys_addr, - symbol_conf.field_sep, - sample->weight, - symbol_conf.field_sep, - sample->data_src, - symbol_conf.field_sep, - al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", - al.sym ? al.sym->name : "???"); + field_sep = symbol_conf.field_sep; + if (field_sep) { + fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s"; } else { - if (symbol_conf.field_sep) { - fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64 - "%s0x%"PRIx64"%s%s:%s\n"; - } else { - fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 - "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; - symbol_conf.field_sep = " "; - } - - printf(fmt, - sample->pid, - symbol_conf.field_sep, - sample->tid, - symbol_conf.field_sep, - sample->ip, - symbol_conf.field_sep, - sample->addr, - symbol_conf.field_sep, - sample->weight, - symbol_conf.field_sep, - sample->data_src, - symbol_conf.field_sep, - al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", - al.sym ? al.sym->name : "???"); + fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64"%s"; + symbol_conf.field_sep = " "; } + printf(fmt, + sample->pid, + symbol_conf.field_sep, + sample->tid, + symbol_conf.field_sep, + sample->ip, + symbol_conf.field_sep, + sample->addr, + symbol_conf.field_sep); + + if (mem->phys_addr) { + printf("0x%016"PRIx64"%s", + sample->phys_addr, + symbol_conf.field_sep); + } + + if (field_sep) + fmt = "%"PRIu64"%s0x%"PRIx64"%s%s:%s\n"; + else + fmt = "%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; + + printf(fmt, + sample->weight, + symbol_conf.field_sep, + sample->data_src, + symbol_conf.field_sep, + al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", + al.sym ? al.sym->name : "???"); out_put: addr_location__put(&al); return 0; @@ -287,10 +268,12 @@ static int report_raw_events(struct perf_mem *mem) if (ret < 0) goto out_delete; + printf("# PID, TID, IP, ADDR, "); + if (mem->phys_addr) - printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); - else - printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); + printf("PHYS ADDR, "); + + printf("LOCAL WEIGHT, DSRC, SYMBOL\n"); ret = perf_session__process_events(session); From 06280e3b15fdaa28f02d995c0a74ec46f75db90a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 5 Jan 2021 11:57:48 -0800 Subject: [PATCH 025/148] perf mem: Support data page size Add option --data-page-size in "perf mem" to record/report data page size. Here are some examples: # perf mem --phys-data --data-page-size report -D # PID, TID, IP, ADDR, PHYS ADDR, DATA PAGE SIZE, LOCAL WEIGHT, DSRC, SYMBOL 20134 20134 0xffffffffb5bd2fd0 0x016ffff9a274e96a308 0x000000044e96a308 4K 1168 0x5080144 /lib/modules/4.18.0-rc7+/build/vmlinux:perf_ctx_unlock 20134 20134 0xffffffffb63f645c 0xffffffffb752b814 0xcfb52b814 2M 225 0x26a100142 /lib/modules/4.18.0-rc7+/build/vmlinux:_raw_spin_lock 20134 20134 0xffffffffb660300c 0xfffffe00016b8bb0 0x0 4K 0 0x5080144 /lib/modules/4.18.0-rc7+/build/vmlinux:__x86_indirect_thunk_rax # # perf mem --phys-data --data-page-size report --stdio # To display the perf.data header info, please use # --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 5K of event 'cpu/mem-loads,ldlat=30/P' # Total weight : 281234 # Sort order : # mem,sym,dso,symbol_daddr,dso_daddr,tlb,locked,phys_daddr,data_page_size # # Overhead Samples Memory access Symbol Shared Object Data Symbol Data Object TLB access Locked Data Physical Address Data Page Size # ........ ....... ............. ............................ ................ ...................... ........... ............ ...... ...................... .............. 28.54% 1826 L1 or L1 hit [k] __x86_indirect_thunk_rax [kernel.vmlinux] [k] 0xffffb0df31b0ff28 [unknown] L1 or L2 hit No [k] 0x0000000000000000 4K 6.02% 256 L1 or L1 hit [.] touch_buffer dtlb [.] 0x00007ffd50109da8 [stack] L1 or L2 hit No [.] 0x000000042454ada8 4K 3.23% 5 L1 or L1 hit [k] clear_huge_page [kernel.vmlinux] [k] 0xffff9a2753b8ce60 [unknown] L1 or L2 hit No [k] 0x0000000453b8ce60 2M 2.98% 4 L1 or L1 hit [k] clear_page_erms [kernel.vmlinux] [k] 0xffffb0df31b0fd00 [unknown] L1 or L2 hit No [k] 0x0000000000000000 4K Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-mem.txt | 3 +++ tools/perf/builtin-mem.c | 20 +++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 199ea0f0a6c0..66177511c5c4 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -63,6 +63,9 @@ OPTIONS --phys-data:: Record/Report sample physical addresses +--data-page-size:: + Record/Report sample data address page size + RECORD OPTIONS -------------- -e:: diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 7d6ee2208709..f3aac85aa9d4 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -30,6 +30,7 @@ struct perf_mem { bool dump_raw; bool force; bool phys_addr; + bool data_page_size; int operation; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -124,6 +125,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (mem->phys_addr) rec_argv[i++] = "--phys-data"; + if (mem->data_page_size) + rec_argv[i++] = "--data-page-size"; + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { e = perf_mem_events__ptr(j); if (!e->record) @@ -173,6 +177,7 @@ dump_raw_samples(struct perf_tool *tool, struct perf_mem *mem = container_of(tool, struct perf_mem, tool); struct addr_location al; const char *fmt, *field_sep; + char str[PAGE_SIZE_NAME_LEN]; if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", @@ -209,6 +214,12 @@ dump_raw_samples(struct perf_tool *tool, symbol_conf.field_sep); } + if (mem->data_page_size) { + printf("%s%s", + get_page_size_name(sample->data_page_size, str), + symbol_conf.field_sep); + } + if (field_sep) fmt = "%"PRIu64"%s0x%"PRIx64"%s%s:%s\n"; else @@ -273,6 +284,9 @@ static int report_raw_events(struct perf_mem *mem) if (mem->phys_addr) printf("PHYS ADDR, "); + if (mem->data_page_size) + printf("DATA PAGE SIZE, "); + printf("LOCAL WEIGHT, DSRC, SYMBOL\n"); ret = perf_session__process_events(session); @@ -283,7 +297,7 @@ static int report_raw_events(struct perf_mem *mem) } static char *get_sort_order(struct perf_mem *mem) { - bool has_extra_options = mem->phys_addr ? true : false; + bool has_extra_options = (mem->phys_addr | mem->data_page_size) ? true : false; char sort[128]; /* @@ -302,6 +316,9 @@ static char *get_sort_order(struct perf_mem *mem) if (mem->phys_addr) strcat(sort, ",phys_daddr"); + if (mem->data_page_size) + strcat(sort, ",data_page_size"); + return strdup(sort); } @@ -447,6 +464,7 @@ int cmd_mem(int argc, const char **argv) " between columns '.' is reserved."), OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"), + OPT_BOOLEAN(0, "data-page-size", &mem.data_page_size, "Record/Report sample data address page size"), OPT_END() }; const char *const mem_subcommands[] = { "record", "report", NULL }; From c1de7f3d84ca324c7cda85c3ce27b11741af2124 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 5 Jan 2021 11:57:49 -0800 Subject: [PATCH 026/148] perf record: Add support for PERF_SAMPLE_CODE_PAGE_SIZE Adds the infrastructure to sample the code address page size. Introduce a new --code-page-size option for perf record. Signed-off-by: Kan Liang Originally-by: Stephane Eranian Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-4-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 +++ tools/perf/builtin-record.c | 2 ++ tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 18 +++++++++++++++++- tools/perf/util/evsel.h | 1 + tools/perf/util/perf_event_attr_fprintf.c | 2 +- tools/perf/util/record.h | 1 + tools/perf/util/synthetic-events.c | 8 ++++++++ 8 files changed, 34 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 0042ff7f6f33..9087b223e324 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -296,6 +296,9 @@ OPTIONS --data-page-size:: Record the sampled data address data page size. +--code-page-size:: + Record the sampled code address (ip) page size + -T:: --timestamp:: Record the sample timestamps. Use it with 'perf report -D' to see the diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7bb10e9863bd..7704c33bfe31 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2477,6 +2477,8 @@ static struct option __record_options[] = { "Record the sample physical addresses"), OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, "Record the sampled data address data page size"), + OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, + "Record the sampled code address (ip) page size"), OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, &record.opts.sample_time_set, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index ff403ea578e1..2afea7247dd3 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -136,6 +136,7 @@ struct perf_sample { u64 data_src; u64 phys_addr; u64 data_page_size; + u64 code_page_size; u64 cgroup; u32 flags; u16 insn_len; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f6668ece71d7..8b18ec56e266 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1195,6 +1195,9 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->sample_data_page_size) evsel__set_sample_bit(evsel, DATA_PAGE_SIZE); + if (opts->sample_code_page_size) + evsel__set_sample_bit(evsel, CODE_PAGE_SIZE); + if (opts->record_switch_events) attr->context_switch = track; @@ -1880,7 +1883,12 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.data_page_size && + if (!perf_missing_features.code_page_size && + (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { + perf_missing_features.code_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); + goto out_close; + } else if (!perf_missing_features.data_page_size && (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { perf_missing_features.data_page_size = true; pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); @@ -2376,6 +2384,12 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + data->code_page_size = 0; + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) { + data->code_page_size = *array; + array++; + } + if (type & PERF_SAMPLE_AUX) { OVERFLOW_CHECK_u64(array); sz = *array++; @@ -2685,6 +2699,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, "We found oprofile daemon running, please stop it and try again."); break; case EINVAL: + if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size) + return scnprintf(msg, size, "Asking for the code page size isn't supported by this kernel."); if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size) return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel."); if (evsel->core.attr.write_backward && perf_missing_features.write_backward) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8226b1fefa8c..e25594494cf6 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -149,6 +149,7 @@ struct perf_missing_features { bool branch_hw_idx; bool cgroup; bool data_page_size; + bool code_page_size; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 22b417f43470..1bd6cfd74257 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -35,7 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), - bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), + bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), bit_name(CODE_PAGE_SIZE), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index b996ce61fadd..68f471d9a88b 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -23,6 +23,7 @@ struct record_opts { bool sample_address; bool sample_phys_addr; bool sample_data_page_size; + bool sample_code_page_size; bool sample_weight; bool sample_time; bool sample_time_set; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 69688f20db11..3a898520f05c 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1473,6 +1473,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_DATA_PAGE_SIZE) result += sizeof(u64); + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) + result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { result += sizeof(u64); result += sample->aux_sample.size; @@ -1657,6 +1660,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) { + *array = sample->code_page_size; + array++; + } + if (type & PERF_SAMPLE_AUX) { sz = sample->aux_sample.size; *array++ = sz; From c513de8a703183fc228280b31a4091363037950f Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 5 Jan 2021 11:57:50 -0800 Subject: [PATCH 027/148] perf script: Add support for PERF_SAMPLE_CODE_PAGE_SIZE Display sampled code page sizes when PERF_SAMPLE_CODE_PAGE_SIZE was set. For example: # perf script --fields comm,event,ip,code_page_size dtlb mem-loads:uP: 445777 4K dtlb mem-loads:uP: 40f724 4K dtlb mem-loads:uP: 474926 4K dtlb mem-loads:uP: 401075 4K dtlb mem-loads:uP: 401095 4K dtlb mem-loads:uP: 401095 4K dtlb mem-loads:uP: 4010cc 4K dtlb mem-loads:uP: 440b6f 4K # Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-5-kan.liang@linux.intel.com Signed-off-by: Kan Liang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 2 +- tools/perf/builtin-script.c | 13 +++++++++++-- tools/perf/util/session.c | 3 +++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 44d37210fc8f..60dae302db27 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -118,7 +118,7 @@ OPTIONS comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr, - metric, misc, srccode, ipc, data_page_size. + metric, misc, srccode, ipc, data_page_size, code_page_size. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index edacfa98d073..9e995311a9b8 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -117,6 +117,7 @@ enum perf_output_field { PERF_OUTPUT_IPC = 1ULL << 31, PERF_OUTPUT_TOD = 1ULL << 32, PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, + PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34, }; struct perf_script { @@ -182,6 +183,7 @@ struct output_option { {.str = "ipc", .field = PERF_OUTPUT_IPC}, {.str = "tod", .field = PERF_OUTPUT_TOD}, {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, + {.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE}, }; enum { @@ -255,7 +257,7 @@ static struct { PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | - PERF_OUTPUT_DATA_PAGE_SIZE, + PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -507,6 +509,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_DATA_PAGE_SIZE, "DATA_PAGE_SIZE", PERF_OUTPUT_DATA_PAGE_SIZE)) return -EINVAL; + if (PRINT_FIELD(CODE_PAGE_SIZE) && + evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE)) + return -EINVAL; + return 0; } @@ -2020,6 +2026,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(DATA_PAGE_SIZE)) fprintf(fp, " %s", get_page_size_name(sample->data_page_size, str)); + if (PRINT_FIELD(CODE_PAGE_SIZE)) + fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str)); + perf_sample__fprintf_ipc(sample, attr, fp); fprintf(fp, "\n"); @@ -3519,7 +3528,7 @@ int cmd_script(int argc, const char **argv) "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," - "data_page_size", + "data_page_size,code_page_size", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 357d6b972b9d..492c994c948a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1312,6 +1312,9 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str)); + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + printf(" .. code page size: %s\n", get_page_size_name(sample->code_page_size, str)); + if (sample_type & PERF_SAMPLE_TRANSACTION) printf("... transaction: %" PRIx64 "\n", sample->transaction); From 9fd74f209c69c9157584af9cdc500af9bbc06b82 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 5 Jan 2021 11:57:51 -0800 Subject: [PATCH 028/148] perf report: Add support for PERF_SAMPLE_CODE_PAGE_SIZE Add a new sort dimension "code_page_size" for common sort. With this option applied, perf can sort and report by sample's code page size. For example: # perf report --stdio --sort=comm,symbol,code_page_size # To display the perf.data header info, please use # --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 3K of event 'mem-loads:uP' # Event count (approx.): 1470769 # # Overhead Command Symbol Code Page Size IPC [IPC Coverage] # ........ ....... ............................ .............. .................... # 69.56% dtlb [.] GetTickCount 4K - - 17.93% dtlb [.] Calibrate 4K - - 11.40% dtlb [.] __gettimeofday 4K - - # Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-6-kan.liang@linux.intel.com Signed-off-by: Kan Liang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/util/hist.c | 2 ++ tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 26 ++++++++++++++++++++++++ tools/perf/util/sort.h | 2 ++ 5 files changed, 32 insertions(+) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8f7f4e9605d8..e44045842c5c 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -108,6 +108,7 @@ OPTIONS - period: Raw number of event count of sample - time: Separate the samples by time stamp with the resolution specified by --time-quantum (default 100ms). Specify with overhead and before it. + - code_page_size: the code page size of sampled code address (ip) By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a08fb9ea411b..6d50379af90e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -212,6 +212,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_TIME, 16); else hists__new_col_len(hists, HISTC_TIME, 12); + hists__new_col_len(hists, HISTC_CODE_PAGE_SIZE, 6); if (h->srcline) { len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header)); @@ -718,6 +719,7 @@ __hists__add_entry(struct hists *hists, .cpumode = al->cpumode, .ip = al->addr, .level = al->level, + .code_page_size = sample->code_page_size, .stat = { .nr_events = 1, .period = sample->period, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 14f66330923d..361108533a56 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -53,6 +53,7 @@ enum hist_column { HISTC_DSO_TO, HISTC_LOCAL_WEIGHT, HISTC_GLOBAL_WEIGHT, + HISTC_CODE_PAGE_SIZE, HISTC_MEM_DADDR_SYMBOL, HISTC_MEM_DADDR_DSO, HISTC_MEM_PHYS_DADDR, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 80907bc32683..c00934c91b58 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1491,6 +1491,31 @@ struct sort_entry sort_mem_data_page_size = { .se_width_idx = HISTC_MEM_DATA_PAGE_SIZE, }; +static int64_t +sort__code_page_size_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = left->code_page_size; + uint64_t r = right->code_page_size; + + return (int64_t)(r - l); +} + +static int hist_entry__code_page_size_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + char str[PAGE_SIZE_NAME_LEN]; + + return repsep_snprintf(bf, size, "%-*s", width, + get_page_size_name(he->code_page_size, str)); +} + +struct sort_entry sort_code_page_size = { + .se_header = "Code Page Size", + .se_cmp = sort__code_page_size_cmp, + .se_snprintf = hist_entry__code_page_size_snprintf, + .se_width_idx = HISTC_CODE_PAGE_SIZE, +}; + static int64_t sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1735,6 +1760,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null), DIM(SORT_TIME, "time", sort_time), + DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e50f2b695bc4..cab4172a6ec3 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -106,6 +106,7 @@ struct hist_entry { u64 transaction; s32 socket; s32 cpu; + u64 code_page_size; u8 cpumode; u8 depth; @@ -229,6 +230,7 @@ enum sort_type { SORT_CGROUP_ID, SORT_SYM_IPC_NULL, SORT_TIME, + SORT_CODE_PAGE_SIZE, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, From d8eda898057e6fab8b2a9137485c574c91b2554f Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 5 Jan 2021 11:57:52 -0800 Subject: [PATCH 029/148] perf test: Add test case for PERF_SAMPLE_CODE_PAGE_SIZE Extend sample-parsing test cases to support new sample type PERF_SAMPLE_CODE_PAGE_SIZE. Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-7-kan.liang@linux.intel.com Signed-off-by: Kan Liang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sample-parsing.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 2393916f6128..e93d0689a27b 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -157,6 +157,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_DATA_PAGE_SIZE) COMP(data_page_size); + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) + COMP(code_page_size); + if (type & PERF_SAMPLE_AUX) { COMP(aux_sample.size); if (memcmp(s1->aux_sample.data, s2->aux_sample.data, @@ -238,6 +241,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .phys_addr = 113, .cgroup = 114, .data_page_size = 115, + .code_page_size = 116, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, From 64b9705b548b7ab38e39a8670471a3e8de0ef149 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 2 Jan 2021 23:04:20 +0100 Subject: [PATCH 030/148] perf config: Make perf_config_from_file() static It's not used outside config.c object. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210102220441.794923-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 2 +- tools/perf/util/config.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 6969f82843ee..20be0504fb95 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -489,7 +489,7 @@ int perf_default_config(const char *var, const char *value, return 0; } -int perf_config_from_file(config_fn_t fn, const char *filename, void *data) +static int perf_config_from_file(config_fn_t fn, const char *filename, void *data) { int ret; FILE *f = fopen(filename, "r"); diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 8c881e3a3ec3..2f753b2a034b 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -27,7 +27,6 @@ extern const char *config_exclusive_filename; typedef int (*config_fn_t)(const char *, const char *, void *); -int perf_config_from_file(config_fn_t fn, const char *filename, void *data); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); int perf_config_int(int *dest, const char *, const char *); From a523026cacac89132baa90d77ea59aa5dc0a40ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 2 Jan 2021 23:04:21 +0100 Subject: [PATCH 031/148] perf config: Add config set interface Add interface to load config set from custom file by using perf_config_set__load_file function. It will be used in perf daemon command to process custom config file. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210102220441.794923-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 28 +++++++++++++++++++++++----- tools/perf/util/config.h | 3 +++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 20be0504fb95..222cb2e2de25 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -738,6 +738,18 @@ struct perf_config_set *perf_config_set__new(void) return set; } +struct perf_config_set *perf_config_set__load_file(const char *file) +{ + struct perf_config_set *set = zalloc(sizeof(*set)); + + if (set) { + INIT_LIST_HEAD(&set->sections); + perf_config_from_file(collect_config, file, set); + } + + return set; +} + static int perf_config__init(void) { if (config_set == NULL) @@ -746,17 +758,15 @@ static int perf_config__init(void) return config_set == NULL; } -int perf_config(config_fn_t fn, void *data) +int perf_config_set(struct perf_config_set *set, + config_fn_t fn, void *data) { int ret = 0; char key[BUFSIZ]; struct perf_config_section *section; struct perf_config_item *item; - if (config_set == NULL && perf_config__init()) - return -1; - - perf_config_set__for_each_entry(config_set, section, item) { + perf_config_set__for_each_entry(set, section, item) { char *value = item->value; if (value) { @@ -778,6 +788,14 @@ int perf_config(config_fn_t fn, void *data) return ret; } +int perf_config(config_fn_t fn, void *data) +{ + if (config_set == NULL && perf_config__init()) + return -1; + + return perf_config_set(config_set, fn, data); +} + void perf_config__exit(void) { perf_config_set__delete(config_set); diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 2f753b2a034b..ee5a242446e9 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -29,6 +29,8 @@ typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); +int perf_config_set(struct perf_config_set *set, + config_fn_t fn, void *data); int perf_config_int(int *dest, const char *, const char *); int perf_config_u8(u8 *dest, const char *name, const char *value); int perf_config_u64(u64 *dest, const char *, const char *); @@ -37,6 +39,7 @@ int config_error_nonbool(const char *); const char *perf_etc_perfconfig(void); struct perf_config_set *perf_config_set__new(void); +struct perf_config_set *perf_config_set__load_file(const char *file); void perf_config_set__delete(struct perf_config_set *set); int perf_config_set__collect(struct perf_config_set *set, const char *file_name, const char *var, const char *value); From bcbd79d1cfde93ee5b40900be2c9fbae44640f98 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 2 Jan 2021 23:04:22 +0100 Subject: [PATCH 032/148] perf debug: Add debug_set_display_time function Allow to display time in perf debug output via new debug_set_display_time function. It will be used in perf daemon command to get verbose output into log file. The debug time format is: [2020-12-03 18:25:31.822152] affinity: SYS [2020-12-03 18:25:31.822164] mmap flush: 1 [2020-12-03 18:25:31.822175] comp level: 0 [2020-12-03 18:25:32.002047] mmap size 528384B Committer notes: Cast tod.tv_usec to long to avoid this problem: 78 12.70 ubuntu:18.04-x-sparc64 : FAIL sparc64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 util/debug.c: In function 'fprintf_time': util/debug.c:63:32: error: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type '__suseconds_t {aka int}' [-Werror=format=] return fprintf(file, "[%s.%06lu] ", date, tod.tv_usec); ~~~~^ ~~~~~~~~~~~ %06u Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210102220441.794923-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 34 +++++++++++++++++++++++++++++++--- tools/perf/util/debug.h | 1 + 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 50fd6a4be4e0..2c06abf6dcd2 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef HAVE_BACKTRACE_SUPPORT #include #endif @@ -31,21 +32,48 @@ int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; static FILE *debug_file; +bool debug_display_time; void debug_set_file(FILE *file) { debug_file = file; } +void debug_set_display_time(bool set) +{ + debug_display_time = set; +} + +static int fprintf_time(FILE *file) +{ + struct timeval tod; + struct tm ltime; + char date[64]; + + if (!debug_display_time) + return 0; + + if (gettimeofday(&tod, NULL) != 0) + return 0; + + if (localtime_r(&tod.tv_sec, <ime) == NULL) + return 0; + + strftime(date, sizeof(date), "%F %H:%M:%S", <ime); + return fprintf(file, "[%s.%06lu] ", date, (long)tod.tv_usec); +} + int veprintf(int level, int var, const char *fmt, va_list args) { int ret = 0; if (var >= level) { - if (use_browser >= 1 && !redirect_to_stderr) + if (use_browser >= 1 && !redirect_to_stderr) { ui_helpline__vshow(fmt, args); - else - ret = vfprintf(debug_file, fmt, args); + } else { + ret = fprintf_time(debug_file); + ret += vfprintf(debug_file, fmt, args); + } } return ret; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 43f712295645..48f631966067 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -64,6 +64,7 @@ int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); void debug_set_file(FILE *file); +void debug_set_display_time(bool set); void perf_debug_setup(void); int perf_quiet_option(void); From f5f03e19ce14fc31aa8668f9c511ce6be0ca0bb7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 2 Jan 2021 23:04:23 +0100 Subject: [PATCH 033/148] perf config: Add perf_home_perfconfig function Factor out the perf_home_perfconfig, that looks for .perfconfig in home directory including check for PERF_CONFIG_NOGLOBAL and for proper permission. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210102220441.794923-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 89 ++++++++++++++++++++++++---------------- tools/perf/util/config.h | 1 + 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 222cb2e2de25..34fe80ccdad1 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -531,6 +531,56 @@ static int perf_config_global(void) return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); } +static char *home_perfconfig(void) +{ + const char *home = NULL; + char *config; + struct stat st; + + home = getenv("HOME"); + + /* + * Skip reading user config if: + * - there is no place to read it from (HOME) + * - we are asked not to (PERF_CONFIG_NOGLOBAL=1) + */ + if (!home || !*home || !perf_config_global()) + return NULL; + + config = strdup(mkpath("%s/.perfconfig", home)); + if (config == NULL) { + pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home); + return NULL; + } + + if (stat(config, &st) < 0) + goto out_free; + + if (st.st_uid && (st.st_uid != geteuid())) { + pr_warning("File %s not owned by current user or root, ignoring it.", config); + goto out_free; + } + + if (st.st_size) + return config; + +out_free: + free(config); + return NULL; +} + +const char *perf_home_perfconfig(void) +{ + static const char *config; + static bool failed; + + config = failed ? NULL : home_perfconfig(); + if (!config) + failed = true; + + return config; +} + static struct perf_config_section *find_section(struct list_head *sections, const char *section_name) { @@ -676,9 +726,6 @@ int perf_config_set__collect(struct perf_config_set *set, const char *file_name, static int perf_config_set__init(struct perf_config_set *set) { int ret = -1; - const char *home = NULL; - char *user_config; - struct stat st; /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ if (config_exclusive_filename) @@ -687,41 +734,11 @@ static int perf_config_set__init(struct perf_config_set *set) if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0) goto out; } - - home = getenv("HOME"); - - /* - * Skip reading user config if: - * - there is no place to read it from (HOME) - * - we are asked not to (PERF_CONFIG_NOGLOBAL=1) - */ - if (!home || !*home || !perf_config_global()) - return 0; - - user_config = strdup(mkpath("%s/.perfconfig", home)); - if (user_config == NULL) { - pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home); - goto out; + if (perf_config_global() && perf_home_perfconfig()) { + if (perf_config_from_file(collect_config, perf_home_perfconfig(), set) < 0) + goto out; } - if (stat(user_config, &st) < 0) { - if (errno == ENOENT) - ret = 0; - goto out_free; - } - - ret = 0; - - if (st.st_uid && (st.st_uid != geteuid())) { - pr_warning("File %s not owned by current user or root, ignoring it.", user_config); - goto out_free; - } - - if (st.st_size) - ret = perf_config_from_file(collect_config, user_config, set); - -out_free: - free(user_config); out: return ret; } diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index ee5a242446e9..d6c4f80f367c 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -37,6 +37,7 @@ int perf_config_u64(u64 *dest, const char *, const char *); int perf_config_bool(const char *, const char *); int config_error_nonbool(const char *); const char *perf_etc_perfconfig(void); +const char *perf_home_perfconfig(void); struct perf_config_set *perf_config_set__new(void); struct perf_config_set *perf_config_set__load_file(const char *file); From b2946282c02b4d4deb0f3b47db8131e9afbef624 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 2 Jan 2021 23:04:24 +0100 Subject: [PATCH 034/148] perf config: Make perf_config_system() global Make perf_config_system global, it will be used outside the config.c object in the following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210102220441.794923-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 2 +- tools/perf/util/config.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 34fe80ccdad1..4e0455a6bb5f 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -521,7 +521,7 @@ static int perf_env_bool(const char *k, int def) return v ? perf_config_bool(k, v) : def; } -static int perf_config_system(void) +int perf_config_system(void) { return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0); } diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index d6c4f80f367c..bf68e4acea73 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -38,6 +38,7 @@ int perf_config_bool(const char *, const char *); int config_error_nonbool(const char *); const char *perf_etc_perfconfig(void); const char *perf_home_perfconfig(void); +int perf_config_system(void); struct perf_config_set *perf_config_set__new(void); struct perf_config_set *perf_config_set__load_file(const char *file); From e8b2db07810a0449cb754095c2d85cdfb39e3f57 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 2 Jan 2021 23:04:25 +0100 Subject: [PATCH 035/148] perf config: Make perf_config_global() global Make perf_config_global global, it will be used outside the config.c object in the following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210102220441.794923-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 2 +- tools/perf/util/config.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 4e0455a6bb5f..6984c77068a3 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -526,7 +526,7 @@ int perf_config_system(void) return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0); } -static int perf_config_global(void) +int perf_config_global(void) { return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); } diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index bf68e4acea73..2fd77aaff4d2 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -39,6 +39,7 @@ int config_error_nonbool(const char *); const char *perf_etc_perfconfig(void); const char *perf_home_perfconfig(void); int perf_config_system(void); +int perf_config_global(void); struct perf_config_set *perf_config_set__new(void); struct perf_config_set *perf_config_set__load_file(const char *file); From 991ae4eb36911fe9a99bef1c22b9578ceec3896a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 27 Dec 2020 00:20:35 +0100 Subject: [PATCH 036/148] perf tools: Allow to enable/disable events via control file Adding new control events to enable/disable specific event. The interface string for control file are: 'enable ' 'disable ' when received the command, perf will scan the current evlist for and if found it's enabled/disabled. Example session: terminal 1: # mkfifo control ack perf.pipe # perf record --control=fifo:control,ack -D -1 --no-buffering -e 'sched:*' -o - > perf.pipe terminal 2: # cat perf.pipe | perf --no-pager script -i - terminal 1: Events disabled NOTE Above message will show only after read side of the pipe ('>') is started on 'terminal 2'. The 'terminal 1's bash does not execute perf before that, hence the delyaed perf record message. terminal 3: # echo 'enable sched:sched_process_fork' > control terminal 1: event sched:sched_process_fork enabled terminal 2: bash 33349 [034] 149587.674295: sched:sched_process_fork: comm=bash pid=33349 child_comm=bash child_pid=34056 bash 33349 [034] 149588.239521: sched:sched_process_fork: comm=bash pid=33349 child_comm=bash child_pid=34057 terminal 3: # echo 'enable sched:sched_wakeup_new' > control terminal 1: event sched:sched_wakeup_new enabled terminal 2: bash 33349 [034] 149632.228023: sched:sched_process_fork: comm=bash pid=33349 child_comm=bash child_pid=34059 bash 33349 [034] 149632.228050: sched:sched_wakeup_new: bash:34059 [120] success=1 CPU:036 bash 33349 [034] 149633.950005: sched:sched_process_fork: comm=bash pid=33349 child_comm=bash child_pid=34060 bash 33349 [034] 149633.950030: sched:sched_wakeup_new: bash:34060 [120] success=1 CPU:036 Committer testing: If I use 'sched:*' and then enable all events, I can't get 'perf record' to react to further commands, so I tested it with: [root@five ~]# perf record --control=fifo:control,ack -D -1 --no-buffering -e 'sched:sched_process_*' -o - > perf.pipe Events disabled Events enabled Events disabled And then it works as expected, so we need to fix this pre-existing problem. Another issue, we need to check if a event is already enabled or disabled and change the message to be clearer, i.e.: [root@five ~]# perf record --control=fifo:control,ack -D -1 --no-buffering -e 'sched:sched_process_*' -o - > perf.pipe Events disabled If we receive a 'disable' command, then it should say: [root@five ~]# perf record --control=fifo:control,ack -D -1 --no-buffering -e 'sched:sched_process_*' -o - > perf.pipe Events disabled Events already disabled Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201226232038.390883-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 8 +-- tools/perf/builtin-record.c | 8 +-- tools/perf/builtin-stat.c | 2 - tools/perf/util/evlist.c | 63 ++++++++++++++++++++++-- 4 files changed, 67 insertions(+), 14 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 9087b223e324..5ab78dcbadfb 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -646,9 +646,11 @@ ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. Listen on ctl-fd descriptor for command to control measurement. Available commands: - 'enable' : enable events - 'disable' : disable events - 'snapshot': AUX area tracing snapshot). + 'enable' : enable events + 'disable' : disable events + 'enable name' : enable event 'name' + 'disable name' : disable event 'name' + 'snapshot' : AUX area tracing snapshot). Measurements can be started with events disabled using --delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor to synchronize with the diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7704c33bfe31..71061a9af1c2 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1938,18 +1938,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { switch (cmd) { - case EVLIST_CTL_CMD_ENABLE: - pr_info(EVLIST_ENABLED_MSG); - break; - case EVLIST_CTL_CMD_DISABLE: - pr_info(EVLIST_DISABLED_MSG); - break; case EVLIST_CTL_CMD_SNAPSHOT: hit_auxtrace_snapshot_trigger(rec); evlist__ctlfd_ack(rec->evlist); break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: + case EVLIST_CTL_CMD_ENABLE: + case EVLIST_CTL_CMD_DISABLE: default: break; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3c054b8d4677..3ec12366bdd4 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -611,14 +611,12 @@ static void process_evlist(struct evlist *evlist, unsigned int interval) if (evlist__ctlfd_process(evlist, &cmd) > 0) { switch (cmd) { case EVLIST_CTL_CMD_ENABLE: - pr_info(EVLIST_ENABLED_MSG); if (interval) process_interval(); break; case EVLIST_CTL_CMD_DISABLE: if (interval) process_interval(); - pr_info(EVLIST_DISABLED_MSG); break; case EVLIST_CTL_CMD_SNAPSHOT: case EVLIST_CTL_CMD_ACK: diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 05363a7247c4..c71c7e035641 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1957,6 +1957,64 @@ int evlist__ctlfd_ack(struct evlist *evlist) return err; } +static int get_cmd_arg(char *cmd_data, size_t cmd_size, char **arg) +{ + char *data = cmd_data + cmd_size; + + /* no argument */ + if (!*data) + return 0; + + /* there's argument */ + if (*data == ' ') { + *arg = data + 1; + return 1; + } + + /* malformed */ + return -1; +} + +static int evlist__ctlfd_enable(struct evlist *evlist, char *cmd_data, bool enable) +{ + struct evsel *evsel; + char *name; + int err; + + err = get_cmd_arg(cmd_data, + enable ? sizeof(EVLIST_CTL_CMD_ENABLE_TAG) - 1 : + sizeof(EVLIST_CTL_CMD_DISABLE_TAG) - 1, + &name); + if (err < 0) { + pr_info("failed: wrong command\n"); + return -1; + } + + if (err) { + evsel = evlist__find_evsel_by_str(evlist, name); + if (evsel) { + if (enable) + evlist__enable_evsel(evlist, name); + else + evlist__disable_evsel(evlist, name); + pr_info("Event %s %s\n", evsel->name, + enable ? "enabled" : "disabled"); + } else { + pr_info("failed: can't find '%s' event\n", name); + } + } else { + if (enable) { + evlist__enable(evlist); + pr_info(EVLIST_ENABLED_MSG); + } else { + evlist__disable(evlist); + pr_info(EVLIST_DISABLED_MSG); + } + } + + return 0; +} + int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) { int err = 0; @@ -1973,10 +2031,9 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) if (err > 0) { switch (*cmd) { case EVLIST_CTL_CMD_ENABLE: - evlist__enable(evlist); - break; case EVLIST_CTL_CMD_DISABLE: - evlist__disable(evlist); + err = evlist__ctlfd_enable(evlist, cmd_data, + *cmd == EVLIST_CTL_CMD_ENABLE); break; case EVLIST_CTL_CMD_SNAPSHOT: break; From 142544a938f436a6567a88d307346bfe198465b0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 27 Dec 2020 00:20:36 +0100 Subject: [PATCH 037/148] perf tools: Add 'evlist' control command Add a new 'evlist' control command to display all the evlist events. When it is received, perf will scan and print current evlist into perf record terminal. The interface string for control file is: evlist [-v|-g|-F] The syntax follows perf evlist command: -F Show just the sample frequency used for each event. -v Show all fields. -g Show event group information. Example session: terminal 1: # mkfifo control ack # perf record --control=fifo:control,ack -e '{cycles,instructions}' terminal 2: # echo evlist > control terminal 1: cycles instructions dummy:HG terminal 2: # echo 'evlist -v' > control terminal 1: cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: \ IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, inherit: 1, freq: 1, \ sample_id_all: 1, exclude_guest: 1 instructions: size: 120, config: 0x1, { sample_period, sample_freq }: 4000, \ sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, inherit: 1, freq: 1, \ sample_id_all: 1, exclude_guest: 1 dummy:HG: type: 1, size: 120, config: 0x9, { sample_period, sample_freq }: 4000, \ sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, inherit: 1, mmap: 1, \ comm: 1, freq: 1, task: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, \ bpf_event: 1 terminal 2: # echo 'evlist -g' > control terminal 1: {cycles,instructions} dummy:HG terminal 2: # echo 'evlist -F' > control terminal 1: cycles: sample_freq=4000 instructions: sample_freq=4000 dummy:HG: sample_freq=4000 This new evlist command is handy to get real event names when wildcards are used. Adding evsel_fprintf.c object to python/perf.so build, because it's now evlist.c dependency. Adding PYTHON_PERF define for python/perf.so compilation, so we can use it to compile in only evsel__fprintf from evsel_fprintf.c object. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201226232038.390883-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 15 ++++++--- tools/perf/builtin-record.c | 1 + tools/perf/builtin-stat.c | 1 + tools/perf/util/evlist.c | 41 ++++++++++++++++++++++++ tools/perf/util/evlist.h | 2 ++ tools/perf/util/evsel_fprintf.c | 2 ++ tools/perf/util/python-ext-sources | 1 + tools/perf/util/setup.py | 2 +- 8 files changed, 59 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5ab78dcbadfb..3405268bd9a5 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -646,11 +646,16 @@ ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. Listen on ctl-fd descriptor for command to control measurement. Available commands: - 'enable' : enable events - 'disable' : disable events - 'enable name' : enable event 'name' - 'disable name' : disable event 'name' - 'snapshot' : AUX area tracing snapshot). + 'enable' : enable events + 'disable' : disable events + 'enable name' : enable event 'name' + 'disable name' : disable event 'name' + 'snapshot' : AUX area tracing snapshot). + + 'evlist [-v|-g|-F] : display all events + -F Show just the sample frequency used for each event. + -v Show all fields. + -g Show event group information. Measurements can be started with events disabled using --delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor to synchronize with the diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 71061a9af1c2..a0e832002eca 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1946,6 +1946,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) case EVLIST_CTL_CMD_UNSUPPORTED: case EVLIST_CTL_CMD_ENABLE: case EVLIST_CTL_CMD_DISABLE: + case EVLIST_CTL_CMD_EVLIST: default: break; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3ec12366bdd4..fc535e2df857 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -621,6 +621,7 @@ static void process_evlist(struct evlist *evlist, unsigned int interval) case EVLIST_CTL_CMD_SNAPSHOT: case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: + case EVLIST_CTL_CMD_EVLIST: default: break; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c71c7e035641..9e890b482220 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -24,6 +24,7 @@ #include "bpf-event.h" #include "util/string2.h" #include "util/perf_api_probe.h" +#include "util/evsel_fprintf.h" #include #include #include @@ -1936,6 +1937,9 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, (sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG)-1))) { *cmd = EVLIST_CTL_CMD_SNAPSHOT; pr_debug("is snapshot\n"); + } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_EVLIST_TAG, + (sizeof(EVLIST_CTL_CMD_EVLIST_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_EVLIST; } } @@ -2015,6 +2019,40 @@ static int evlist__ctlfd_enable(struct evlist *evlist, char *cmd_data, bool enab return 0; } +static int evlist__ctlfd_list(struct evlist *evlist, char *cmd_data) +{ + struct perf_attr_details details = { .verbose = false, }; + struct evsel *evsel; + char *arg; + int err; + + err = get_cmd_arg(cmd_data, + sizeof(EVLIST_CTL_CMD_EVLIST_TAG) - 1, + &arg); + if (err < 0) { + pr_info("failed: wrong command\n"); + return -1; + } + + if (err) { + if (!strcmp(arg, "-v")) { + details.verbose = true; + } else if (!strcmp(arg, "-g")) { + details.event_group = true; + } else if (!strcmp(arg, "-F")) { + details.freq = true; + } else { + pr_info("failed: wrong command\n"); + return -1; + } + } + + evlist__for_each_entry(evlist, evsel) + evsel__fprintf(evsel, &details, stderr); + + return 0; +} + int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) { int err = 0; @@ -2035,6 +2073,9 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) err = evlist__ctlfd_enable(evlist, cmd_data, *cmd == EVLIST_CTL_CMD_ENABLE); break; + case EVLIST_CTL_CMD_EVLIST: + err = evlist__ctlfd_list(evlist, cmd_data); + break; case EVLIST_CTL_CMD_SNAPSHOT: break; case EVLIST_CTL_CMD_ACK: diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 1aae75895dea..e79c64d81d21 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -330,6 +330,7 @@ struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evse #define EVLIST_CTL_CMD_DISABLE_TAG "disable" #define EVLIST_CTL_CMD_ACK_TAG "ack\n" #define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot" +#define EVLIST_CTL_CMD_EVLIST_TAG "evlist" #define EVLIST_CTL_CMD_MAX_LEN 64 @@ -339,6 +340,7 @@ enum evlist_ctl_cmd { EVLIST_CTL_CMD_DISABLE, EVLIST_CTL_CMD_ACK, EVLIST_CTL_CMD_SNAPSHOT, + EVLIST_CTL_CMD_EVLIST, }; int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close); diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index fb498a723a00..bfedd7b23521 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -100,6 +100,7 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE return ++printed; } +#ifndef PYTHON_PERF int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, unsigned int print_opts, struct callchain_cursor *cursor, struct strlist *bt_stop_list, FILE *fp) @@ -239,3 +240,4 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, return printed; } +#endif /* PYTHON_PERF */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index a9d9c142eb7c..71b753523fac 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/python.c util/cap.c util/evlist.c util/evsel.c +util/evsel_fprintf.c util/perf_event_attr_fprintf.c util/cpumap.c util/memswap.c diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index c5e3e9a68162..483f05004e68 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -43,7 +43,7 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) -cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] +cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls', '-DPYTHON_PERF' ] if not cc_is_clang: cflags += ['-Wno-cast-function-type' ] From f186cd614878dc886edea5df42ca3225d5ab97ea Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 27 Dec 2020 00:20:37 +0100 Subject: [PATCH 038/148] perf tools: Add 'stop' control command Adding control 'stop' command to stop perf record. When it is received, perf will set the 'done' variable to 1 to stop its mmap ring buffer reading loop. Example session: terminal 1: # mkfifo control ack # perf record --control=fifo:control,ack terminal 2: # echo stop > control terminal 1: [ perf record: Woken up 7 times to write data ] [ perf record: Captured and wrote 3.214 MB perf.data (38280 samples) ] # Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201226232038.390883-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 1 + tools/perf/builtin-record.c | 3 +++ tools/perf/builtin-stat.c | 1 + tools/perf/util/evlist.c | 4 ++++ tools/perf/util/evlist.h | 2 ++ 5 files changed, 11 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 3405268bd9a5..c0ee766bca05 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -651,6 +651,7 @@ Available commands: 'enable name' : enable event 'name' 'disable name' : disable event 'name' 'snapshot' : AUX area tracing snapshot). + 'stop' : stop perf record 'evlist [-v|-g|-F] : display all events -F Show just the sample frequency used for each event. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a0e832002eca..36fc71097822 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1942,6 +1942,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) hit_auxtrace_snapshot_trigger(rec); evlist__ctlfd_ack(rec->evlist); break; + case EVLIST_CTL_CMD_STOP: + done = 1; + break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: case EVLIST_CTL_CMD_ENABLE: diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fc535e2df857..46a3f4ab1371 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -622,6 +622,7 @@ static void process_evlist(struct evlist *evlist, unsigned int interval) case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: case EVLIST_CTL_CMD_EVLIST: + case EVLIST_CTL_CMD_STOP: default: break; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 9e890b482220..c6de70cefbae 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1940,6 +1940,9 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_EVLIST_TAG, (sizeof(EVLIST_CTL_CMD_EVLIST_TAG)-1))) { *cmd = EVLIST_CTL_CMD_EVLIST; + } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_STOP_TAG, + (sizeof(EVLIST_CTL_CMD_STOP_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_STOP; } } @@ -2077,6 +2080,7 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) err = evlist__ctlfd_list(evlist, cmd_data); break; case EVLIST_CTL_CMD_SNAPSHOT: + case EVLIST_CTL_CMD_STOP: break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e79c64d81d21..6f45c3630355 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -331,6 +331,7 @@ struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evse #define EVLIST_CTL_CMD_ACK_TAG "ack\n" #define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot" #define EVLIST_CTL_CMD_EVLIST_TAG "evlist" +#define EVLIST_CTL_CMD_STOP_TAG "stop" #define EVLIST_CTL_CMD_MAX_LEN 64 @@ -341,6 +342,7 @@ enum evlist_ctl_cmd { EVLIST_CTL_CMD_ACK, EVLIST_CTL_CMD_SNAPSHOT, EVLIST_CTL_CMD_EVLIST, + EVLIST_CTL_CMD_STOP, }; int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close); From 47fddcb479e73c341fb414e40a89572dacadd360 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 27 Dec 2020 00:20:38 +0100 Subject: [PATCH 039/148] perf tools: Add 'ping' control command Add a control 'ping' command to detect if perf is up and its control interface is operational. It will be used in following daemon patches to synchronize with record session - when control interface is up and running, we know that perf record is monitoring and ready to receive signals. Example session: terminal 1: # mkfifo control ack # perf record --control=fifo:control,ack terminal 2: # echo ping > control # cat ack ack Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201226232038.390883-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 1 + tools/perf/builtin-record.c | 1 + tools/perf/builtin-stat.c | 1 + tools/perf/util/evlist.c | 4 ++++ tools/perf/util/evlist.h | 2 ++ 5 files changed, 9 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index c0ee766bca05..f3161c9673e9 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -652,6 +652,7 @@ Available commands: 'disable name' : disable event 'name' 'snapshot' : AUX area tracing snapshot). 'stop' : stop perf record + 'ping' : ping 'evlist [-v|-g|-F] : display all events -F Show just the sample frequency used for each event. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 36fc71097822..8a0127d4fb52 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1950,6 +1950,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) case EVLIST_CTL_CMD_ENABLE: case EVLIST_CTL_CMD_DISABLE: case EVLIST_CTL_CMD_EVLIST: + case EVLIST_CTL_CMD_PING: default: break; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 46a3f4ab1371..a380e38ca937 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -623,6 +623,7 @@ static void process_evlist(struct evlist *evlist, unsigned int interval) case EVLIST_CTL_CMD_UNSUPPORTED: case EVLIST_CTL_CMD_EVLIST: case EVLIST_CTL_CMD_STOP: + case EVLIST_CTL_CMD_PING: default: break; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c6de70cefbae..61b2408821e5 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1943,6 +1943,9 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_STOP_TAG, (sizeof(EVLIST_CTL_CMD_STOP_TAG)-1))) { *cmd = EVLIST_CTL_CMD_STOP; + } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_PING_TAG, + (sizeof(EVLIST_CTL_CMD_PING_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_PING; } } @@ -2081,6 +2084,7 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) break; case EVLIST_CTL_CMD_SNAPSHOT: case EVLIST_CTL_CMD_STOP: + case EVLIST_CTL_CMD_PING: break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 6f45c3630355..7c2521cb6b09 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -332,6 +332,7 @@ struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evse #define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot" #define EVLIST_CTL_CMD_EVLIST_TAG "evlist" #define EVLIST_CTL_CMD_STOP_TAG "stop" +#define EVLIST_CTL_CMD_PING_TAG "ping" #define EVLIST_CTL_CMD_MAX_LEN 64 @@ -343,6 +344,7 @@ enum evlist_ctl_cmd { EVLIST_CTL_CMD_SNAPSHOT, EVLIST_CTL_CMD_EVLIST, EVLIST_CTL_CMD_STOP, + EVLIST_CTL_CMD_PING, }; int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close); From 7efce5c2404ee03506b15efb010dee6851d1bfa8 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 25 Jan 2021 20:47:22 +0800 Subject: [PATCH 040/148] perf test: Add parse-metric memory bandwidth testcase Event duration_time in a metric expression requires special handling. Improve test coverage by including a metric whose expression includes duration_time. The actual metric is a copied from the L1D_Cache_Fill_BW metric on my broadwell machine. Signed-off-by: John Garry Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linuxarm@openeuler.org Link: http://lore.kernel.org/lkml/1611578842-5749-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-metric.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index ce7be37f0d88..6dc1db1626ad 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -69,6 +69,10 @@ static struct pmu_event pme_test[] = { .metric_expr = "1/m3", .metric_name = "M3", }, +{ + .metric_expr = "64 * l1d.replacement / 1000000000 / duration_time", + .metric_name = "L1D_Cache_Fill_BW", +}, { .name = NULL, } @@ -107,6 +111,8 @@ static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, evlist__for_each_entry(evlist, evsel) { count = find_value(evsel->name, vals); perf_stat__update_shadow_stats(evsel, count, 0, st); + if (!strcmp(evsel->name, "duration_time")) + update_stats(&walltime_nsecs_stats, count); } } @@ -321,6 +327,23 @@ static int test_recursion_fail(void) return 0; } +static int test_memory_bandwidth(void) +{ + double ratio; + struct value vals[] = { + { .event = "l1d.replacement", .val = 4000000 }, + { .event = "duration_time", .val = 200000000 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("L1D_Cache_Fill_BW", vals, &ratio) == 0); + TEST_ASSERT_VAL("L1D_Cache_Fill_BW, wrong ratio", + 1.28 == ratio); + + return 0; +} + static int test_metric_group(void) { double ratio1, ratio2; @@ -353,5 +376,6 @@ int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unu TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0); TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0); TEST_ASSERT_VAL("test metric group", test_metric_group() == 0); + TEST_ASSERT_VAL("Memory bandwidth", test_memory_bandwidth() == 0); return 0; } From 42641d6f4d15e6dbc883195be8fe51b5f5f57fc7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 21 Jan 2021 05:37:52 -0800 Subject: [PATCH 041/148] perf stat: Add Topdown metrics events as default events The Topdown Microarchitecture Analysis (TMA) Method is a structured analysis methodology to identify critical performance bottlenecks in out-of-order processors. From the Ice Lake and later platforms, the Topdown information can be retrieved from the dedicated "metrics" register, which isn't impacted by other events. Also, the Topdown metrics support both per thread/process and per core measuring. Adding Topdown metrics events as default events can enrich the default measuring information, and would not cost any extra multiplexing. Introduce arch_evlist__add_default_attrs() to allow architecture specific default events. Add the Topdown metrics events in the X86 specific arch_evlist__add_default_attrs(). Other architectures can add their own default events later separately. With the patch: $ perf stat sleep 1 Performance counter stats for 'sleep 1': 0.82 msec task-clock:u # 0.001 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 61 page-faults:u # 0.074 M/sec 319,941 cycles:u # 0.388 GHz 242,802 instructions:u # 0.76 insn per cycle 54,380 branches:u # 66.028 M/sec 4,043 branch-misses:u # 7.43% of all branches 1,585,555 slots:u # 1925.189 M/sec 238,941 topdown-retiring:u # 15.0% retiring 410,378 topdown-bad-spec:u # 25.8% bad speculation 634,222 topdown-fe-bound:u # 39.9% frontend bound 304,675 topdown-be-bound:u # 19.2% backend bound 1.001791625 seconds time elapsed 0.000000000 seconds user 0.001572000 seconds sys Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20210121133752.118327-1-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/Build | 1 + tools/perf/arch/x86/util/evlist.c | 15 +++++++++++++++ tools/perf/builtin-stat.c | 3 +++ tools/perf/util/evlist.c | 5 +++++ tools/perf/util/evlist.h | 2 ++ 5 files changed, 26 insertions(+) create mode 100644 tools/perf/arch/x86/util/evlist.c diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 347c39b960eb..ce1ec92fecdc 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -6,6 +6,7 @@ perf-y += perf_regs.o perf-y += topdown.o perf-y += machine.o perf-y += event.o +perf-y += evlist.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c new file mode 100644 index 000000000000..8c6732cc7794 --- /dev/null +++ b/tools/perf/arch/x86/util/evlist.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "util/pmu.h" +#include "util/evlist.h" +#include "util/parse-events.h" + +#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}" + +int arch_evlist__add_default_attrs(struct evlist *evlist) +{ + if (!pmu_have_event("cpu", "slots")) + return 0; + + return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL); +} diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a380e38ca937..b009dde5c601 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1828,6 +1828,9 @@ static int add_default_attributes(void) } if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; + + if (arch_evlist__add_default_attrs(evsel_list) < 0) + return -1; } /* Detailed events get appended to the event list: */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 61b2408821e5..5983a9f13930 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -304,6 +304,11 @@ int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *a return evlist__add_attrs(evlist, attrs, nr_attrs); } +__weak int arch_evlist__add_default_attrs(struct evlist *evlist __maybe_unused) +{ + return 0; +} + struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id) { struct evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 7c2521cb6b09..7243e94da398 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -110,6 +110,8 @@ int __evlist__add_default_attrs(struct evlist *evlist, #define evlist__add_default_attrs(evlist, array) \ __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) +int arch_evlist__add_default_attrs(struct evlist *evlist); + int evlist__add_dummy(struct evlist *evlist); int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, From c69bf11ad3d30b6bf01cfa538ddff1a59467c734 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 28 Jan 2021 09:52:47 -0300 Subject: [PATCH 042/148] perf tools: Fix DSO filtering when not finding a map for a sampled address When we lookup an address and don't find a map we should filter that sample if the user specified a list of --dso entries to filter on, fix it. Before: $ perf script sleep 274800 2843.556162: 1 cycles:u: ffffffffbb26bff4 [unknown] ([unknown]) sleep 274800 2843.556168: 1 cycles:u: ffffffffbb2b047d [unknown] ([unknown]) sleep 274800 2843.556171: 1 cycles:u: ffffffffbb2706b2 [unknown] ([unknown]) sleep 274800 2843.556174: 6 cycles:u: ffffffffbb2b0267 [unknown] ([unknown]) sleep 274800 2843.556176: 59 cycles:u: ffffffffbb2b03b1 [unknown] ([unknown]) sleep 274800 2843.556180: 691 cycles:u: ffffffffbb26bff4 [unknown] ([unknown]) sleep 274800 2843.556189: 9160 cycles:u: 7fa9550eeaa3 __GI___tunables_init+0xf3 (/usr/lib64/ld-2.32.so) sleep 274800 2843.556312: 86937 cycles:u: 7fa9550e157b _dl_lookup_symbol_x+0x4b (/usr/lib64/ld-2.32.so) $ So we have some samples we somehow didn't find in a map for, if we now do: $ perf report --stdio --dso /usr/lib64/ld-2.32.so # dso: /usr/lib64/ld-2.32.so # # Total Lost Samples: 0 # # Samples: 8 of event 'cycles:u' # Event count (approx.): 96856 # # Overhead Command Symbol # ........ ....... ........................ # 89.76% sleep [.] _dl_lookup_symbol_x 9.46% sleep [.] __GI___tunables_init 0.71% sleep [k] 0xffffffffbb26bff4 0.06% sleep [k] 0xffffffffbb2b03b1 0.01% sleep [k] 0xffffffffbb2b0267 0.00% sleep [k] 0xffffffffbb2706b2 0.00% sleep [k] 0xffffffffbb2b047d $ After this patch we get the right output with just entries for the DSOs specified in --dso: $ perf report --stdio --dso /usr/lib64/ld-2.32.so # dso: /usr/lib64/ld-2.32.so # # Total Lost Samples: 0 # # Samples: 8 of event 'cycles:u' # Event count (approx.): 96856 # # Overhead Command Symbol # ........ ....... ........................ # 89.76% sleep [.] _dl_lookup_symbol_x 9.46% sleep [.] __GI___tunables_init $ # Fixes: 96415e4d3f5fdf9c ("perf symbols: Avoid unnecessary symbol loading when dso list is specified") Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210128131209.GD775562@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index fbe8578e4c47..208b6c141d98 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -692,6 +692,8 @@ int machine__resolve(struct machine *machine, struct addr_location *al, } al->sym = map__find_symbol(al->map, al->addr); + } else if (symbol_conf.dso_list) { + al->filtered |= (1 << HIST_FILTER__DSO); } if (symbol_conf.sym_list) { From 4b799a9b772fc32e893408a22ae92fb96b5aaaa1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 25 Jan 2021 07:27:50 +0800 Subject: [PATCH 043/148] perf script: Support DSO filter like in other perf tools Other perf tool builtins already supported a DSO filter. For example: $ perf report --dsos a,b,c which only considers symbols in these dsos. Now the DSO filter is supported in 'perf script': root@kbl-ppc:~# ./perf script --dsos "[kernel.kallsyms]" perf 18123 [000] 6142863.075104: 1 cycles: ffffffff9ca77308 native_write_msr+0x8 ([kernel.kallsyms]) perf 18123 [000] 6142863.075107: 1 cycles: ffffffff9ca77308 native_write_msr+0x8 ([kernel.kallsyms]) perf 18123 [000] 6142863.075108: 10 cycles: ffffffff9ca77308 native_write_msr+0x8 ([kernel.kallsyms]) perf 18123 [000] 6142863.075109: 273 cycles: ffffffff9ca7730a native_write_msr+0xa ([kernel.kallsyms]) perf 18123 [000] 6142863.075110: 7684 cycles: ffffffff9ca3c9c0 native_sched_clock+0x50 ([kernel.kallsyms]) perf 18123 [000] 6142863.075112: 213017 cycles: ffffffff9d765a92 syscall_exit_to_user_mode+0x32 ([kernel.kallsyms]) perf 18123 [001] 6142863.075156: 1 cycles: ffffffff9ca77308 native_write_msr+0x8 ([kernel.kallsyms]) perf 18123 [001] 6142863.075158: 1 cycles: ffffffff9ca77308 native_write_msr+0x8 ([kernel.kallsyms]) perf 18123 [001] 6142863.075159: 17 cycles: ffffffff9ca77308 native_write_msr+0x8 ([kernel.kallsyms]) Committer testing: $ perf script ls 2364888 29303.010949: 1 cycles:u: ffffffffa4bbc6a9 [unknown] ([unknown]) ls 2364888 29303.010957: 1 cycles:u: ffffffffa429ef48 [unknown] ([unknown]) ls 2364888 29303.010961: 1 cycles:u: ffffffffa4260133 [unknown] ([unknown]) ls 2364888 29303.010964: 5 cycles:u: ffffffffa429efad [unknown] ([unknown]) ls 2364888 29303.010967: 41 cycles:u: ffffffffa42a4586 [unknown] ([unknown]) ls 2364888 29303.010972: 435 cycles:u: ffffffffa429efe0 [unknown] ([unknown]) ls 2364888 29303.010978: 5142 cycles:u: 7f9b95bc2abf __GI___tunables_init+0x11f (/usr/lib64/ld-2.32.so) ls 2364888 29303.011006: 38551 cycles:u: ffffffffa4290f61 [unknown] ([unknown]) ls 2364888 29303.011486: 238234 cycles:u: 7f9b95bb7741 _dl_relocate_object+0xa71 (/usr/lib64/ld-2.32.so) ls 2364888 29303.011937: 415870 cycles:u: 7f9b95a1c80e __strcoll_l+0xe (/usr/lib64/libc-2.32.so) $ Before: $ perf script --dsos /usr/lib64/libc-2.32.so |& head -5 Error: unknown option `dsos' Usage: perf script [] or: perf script [] record