Merge branch 'perf-tools-next' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git

This commit is contained in:
Stephen Rothwell 2024-12-20 10:39:10 +11:00
commit 2cbe95f541
129 changed files with 2493 additions and 1670 deletions

View File

@ -106,6 +106,7 @@ FEATURE_TESTS += libbfd-liberty
FEATURE_TESTS += libbfd-liberty-z
FEATURE_TESTS += disassembler-four-args
FEATURE_TESTS += disassembler-init-styled
FEATURE_TESTS += libelf-zstd
FEATURE_DISPLAY := clang-bpf-co-re
FEATURE_DISPLAY += llvm
@ -132,6 +133,12 @@ endif
LIBS = $(LIBBPF) -lelf -lz
LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
ifeq ($(feature-libelf-zstd),1)
LIBS += -lzstd
LIBS_BOOTSTRAP += -lzstd
endif
ifeq ($(feature-libcap), 1)
CFLAGS += -DUSE_LIBCAP
LIBS += -lcap

View File

@ -28,6 +28,41 @@ endef
# the rule that uses them - an example for that is the 'bionic'
# feature check. ]
#
# These + the ones in FEATURE_TESTS_EXTRA are included in
# tools/build/feature/test-all.c and we try to build it all together
# then setting all those features to '1' meaning they are all enabled.
#
# There are things like fortify-source that will be set to 1 because test-all
# is built with the flags needed to test if its enabled, resulting in
#
# $ rm -rf /tmp/b ; mkdir /tmp/b ; make -C tools/perf O=/tmp/b feature-dump
# $ grep fortify-source /tmp/b/FEATURE-DUMP
# feature-fortify-source=1
# $
#
# All the others should have lines in tools/build/feature/test-all.c like:
#
# #define main main_test_disassembler_init_styled
# # include "test-disassembler-init-styled.c"
# #undef main
#
# #define main main_test_libzstd
# # include "test-libzstd.c"
# #undef main
#
# int main(int argc, char *argv[])
# {
# main_test_disassembler_four_args();
# main_test_libzstd();
# return 0;
# }
#
# If the sample above works, then we end up with these lines in the FEATURE-DUMP
# file:
#
# feature-disassembler-four-args=1
# feature-libzstd=1
#
FEATURE_TESTS_BASIC := \
backtrace \
libdw \
@ -38,17 +73,16 @@ FEATURE_TESTS_BASIC := \
glibc \
libbfd \
libbfd-buildid \
libcap \
libelf \
libelf-getphdrnum \
libelf-gelf_getnote \
libelf-getshdrstrndx \
libelf-zstd \
libnuma \
numa_num_possible_cpus \
libperl \
libpython \
libslang \
libslang-include-subdir \
libtraceevent \
libtracefs \
libcpupower \
@ -89,13 +123,6 @@ FEATURE_TESTS_EXTRA := \
libbfd-liberty \
libbfd-liberty-z \
libopencsd \
libunwind-x86 \
libunwind-x86_64 \
libunwind-arm \
libunwind-aarch64 \
libunwind-debug-frame \
libunwind-debug-frame-arm \
libunwind-debug-frame-aarch64 \
cxx \
llvm \
clang \
@ -122,7 +149,6 @@ FEATURE_DISPLAY ?= \
glibc \
libbfd \
libbfd-buildid \
libcap \
libelf \
libnuma \
numa_num_possible_cpus \

View File

@ -28,6 +28,7 @@ FILES= \
test-libelf-getphdrnum.bin \
test-libelf-gelf_getnote.bin \
test-libelf-getshdrstrndx.bin \
test-libelf-zstd.bin \
test-libdebuginfod.bin \
test-libnuma.bin \
test-numa_num_possible_cpus.bin \
@ -110,7 +111,7 @@ all: $(FILES)
__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
BUILD_BFD = $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap
BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd
__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
@ -196,6 +197,9 @@ $(OUTPUT)test-libelf-gelf_getnote.bin:
$(OUTPUT)test-libelf-getshdrstrndx.bin:
$(BUILD) -lelf
$(OUTPUT)test-libelf-zstd.bin:
$(BUILD) -lelf -lz -lzstd
$(OUTPUT)test-libdebuginfod.bin:
$(BUILD) -ldebuginfod

View File

@ -58,8 +58,8 @@
# include "test-libelf-getshdrstrndx.c"
#undef main
#define main main_test_libunwind
# include "test-libunwind.c"
#define main main_test_libelf_zstd
# include "test-libelf-zstd.c"
#undef main
#define main main_test_libslang
@ -170,6 +170,14 @@
# include "test-libzstd.c"
#undef main
#define main main_test_libtraceevent
# include "test-libtraceevent.c"
#undef main
#define main main_test_libtracefs
# include "test-libtracefs.c"
#undef main
int main(int argc, char *argv[])
{
main_test_libpython();
@ -184,7 +192,6 @@ int main(int argc, char *argv[])
main_test_libelf_getphdrnum();
main_test_libelf_gelf_getnote();
main_test_libelf_getshdrstrndx();
main_test_libunwind();
main_test_libslang();
main_test_libbfd();
main_test_libbfd_buildid();
@ -208,6 +215,8 @@ int main(int argc, char *argv[])
main_test_reallocarray();
main_test_disassembler_four_args();
main_test_libzstd();
main_test_libtraceevent();
main_test_libtracefs();
return 0;
}

View File

@ -0,0 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <stddef.h>
#include <libelf.h>
int main(void)
{
elf_compress(NULL, ELFCOMPRESS_ZSTD, 0);
return 0;
}

View File

@ -296,7 +296,7 @@ int filename__read_int(const char *filename, int *value)
int fd = open(filename, O_RDONLY), err = -1;
if (fd < 0)
return -1;
return -errno;
if (read(fd, line, sizeof(line)) > 0) {
*value = atoi(line);
@ -314,7 +314,7 @@ static int filename__read_ull_base(const char *filename,
int fd = open(filename, O_RDONLY), err = -1;
if (fd < 0)
return -1;
return -errno;
if (read(fd, line, sizeof(line)) > 0) {
*value = strtoull(line, NULL, base);
@ -372,7 +372,7 @@ int filename__write_int(const char *filename, int value)
char buf[64];
if (fd < 0)
return err;
return -errno;
sprintf(buf, "%d", value);
if (write(fd, buf, sizeof(buf)) == sizeof(buf))

View File

@ -39,7 +39,6 @@ SYNOPSIS
struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
struct perf_cpu_map *perf_cpu_map__read(FILE *file);
struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);

View File

@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <errno.h>
#include <perf/cpumap.h>
#include <stdlib.h>
#include <linux/refcount.h>
@ -10,6 +11,9 @@
#include <ctype.h>
#include <limits.h>
#include "internal.h"
#include <api/fs/fs.h>
#define MAX_NR_CPUS 4096
void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
{
@ -100,12 +104,12 @@ static struct perf_cpu_map *cpu_map__new_sysconf(void)
static struct perf_cpu_map *cpu_map__new_sysfs_online(void)
{
struct perf_cpu_map *cpus = NULL;
FILE *onlnf;
char *buf = NULL;
size_t buf_len;
onlnf = fopen("/sys/devices/system/cpu/online", "r");
if (onlnf) {
cpus = perf_cpu_map__read(onlnf);
fclose(onlnf);
if (sysfs__read_str("devices/system/cpu/online", &buf, &buf_len) >= 0) {
cpus = perf_cpu_map__new(buf);
free(buf);
}
return cpus;
}
@ -158,62 +162,6 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu
return cpus;
}
struct perf_cpu_map *perf_cpu_map__read(FILE *file)
{
struct perf_cpu_map *cpus = NULL;
int nr_cpus = 0;
struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
int n, cpu, prev;
char sep;
sep = 0;
prev = -1;
for (;;) {
n = fscanf(file, "%u%c", &cpu, &sep);
if (n <= 0)
break;
if (prev >= 0) {
int new_max = nr_cpus + cpu - prev - 1;
WARN_ONCE(new_max >= MAX_NR_CPUS, "Perf can support %d CPUs. "
"Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS);
if (new_max >= max_entries) {
max_entries = new_max + MAX_NR_CPUS / 2;
tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
while (++prev < cpu)
tmp_cpus[nr_cpus++].cpu = prev;
}
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
tmp_cpus[nr_cpus++].cpu = cpu;
if (n == 2 && sep == '-')
prev = cpu;
else
prev = -1;
if (n == 1 || sep == '\n')
break;
}
if (nr_cpus > 0)
cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
out_free_tmp:
free(tmp_cpus);
return cpus;
}
struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
{
struct perf_cpu_map *cpus = NULL;
@ -238,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
p = NULL;
start_cpu = strtoul(cpu_list, &p, 0);
if (start_cpu >= INT_MAX
|| (*p != '\0' && *p != ',' && *p != '-'))
|| (*p != '\0' && *p != ',' && *p != '-' && *p != '\n'))
goto invalid;
if (*p == '-') {
@ -246,7 +194,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
p = NULL;
end_cpu = strtoul(cpu_list, &p, 0);
if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
if (end_cpu >= INT_MAX || (*p != '\0' && *p != ',' && *p != '\n'))
goto invalid;
if (end_cpu < start_cpu)
@ -265,7 +213,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
goto invalid;
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
max_entries += max(end_cpu - start_cpu + 1, 16UL);
tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto invalid;
@ -279,14 +227,15 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
cpu_list = p;
}
if (nr_cpus > 0)
if (nr_cpus > 0) {
cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
else if (*cpu_list != '\0') {
} else if (*cpu_list != '\0') {
pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.",
cpu_list);
cpus = perf_cpu_map__new_online_cpus();
} else
} else {
cpus = perf_cpu_map__new_any_cpu();
}
invalid:
free(tmp_cpus);
out:
@ -436,46 +385,49 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
}
/*
* Merge two cpumaps
* Merge two cpumaps.
*
* orig either gets freed and replaced with a new map, or reused
* with no reference count change (similar to "realloc")
* other has its reference count increased.
* If 'other' is subset of '*orig', '*orig' keeps itself with no reference count
* change (similar to "realloc").
*
* If '*orig' is subset of 'other', '*orig' reuses 'other' with its reference
* count increased.
*
* Otherwise, '*orig' gets freed and replaced with a new map.
*/
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other)
int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other)
{
struct perf_cpu *tmp_cpus;
int tmp_len;
int i, j, k;
struct perf_cpu_map *merged;
if (perf_cpu_map__is_subset(orig, other))
return orig;
if (perf_cpu_map__is_subset(other, orig)) {
perf_cpu_map__put(orig);
return perf_cpu_map__get(other);
if (perf_cpu_map__is_subset(*orig, other))
return 0;
if (perf_cpu_map__is_subset(other, *orig)) {
perf_cpu_map__put(*orig);
*orig = perf_cpu_map__get(other);
return 0;
}
tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other);
tmp_len = __perf_cpu_map__nr(*orig) + __perf_cpu_map__nr(other);
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
if (!tmp_cpus)
return NULL;
return -ENOMEM;
/* Standard merge algorithm from wikipedia */
i = j = k = 0;
while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
while (i < __perf_cpu_map__nr(*orig) && j < __perf_cpu_map__nr(other)) {
if (__perf_cpu_map__cpu(*orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
if (__perf_cpu_map__cpu(*orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
j++;
tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++);
} else
tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
}
while (i < __perf_cpu_map__nr(orig))
tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
while (i < __perf_cpu_map__nr(*orig))
tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++);
while (j < __perf_cpu_map__nr(other))
tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
@ -483,8 +435,9 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
merged = cpu_map__trim_new(k, tmp_cpus);
free(tmp_cpus);
perf_cpu_map__put(orig);
return merged;
perf_cpu_map__put(*orig);
*orig = merged;
return 0;
}
struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,

View File

@ -89,7 +89,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
evsel->threads = perf_thread_map__get(evlist->threads);
}
evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
perf_cpu_map__merge(&evlist->all_cpus, evsel->cpus);
}
static void perf_evlist__propagate_maps(struct perf_evlist *evlist)

View File

@ -21,10 +21,6 @@ DECLARE_RC_STRUCT(perf_cpu_map) {
struct perf_cpu map[];
};
#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 2048
#endif
struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus);
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b);

View File

@ -3,7 +3,6 @@
#define __LIBPERF_CPUMAP_H
#include <perf/core.h>
#include <stdio.h>
#include <stdbool.h>
/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
@ -37,10 +36,9 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void);
* perf_cpu_map__new_online_cpus is returned.
*/
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
LIBPERF_API int perf_cpu_map__merge(struct perf_cpu_map **orig,
struct perf_cpu_map *other);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);

View File

@ -6,7 +6,6 @@ LIBPERF_0.0.1 {
perf_cpu_map__get;
perf_cpu_map__put;
perf_cpu_map__new;
perf_cpu_map__read;
perf_cpu_map__nr;
perf_cpu_map__cpu;
perf_cpu_map__has_any_cpu_or_is_empty;

View File

@ -40,7 +40,7 @@ The '$HOME/.perfconfig' file is used to store a per-user configuration.
The file '$(sysconfdir)/perfconfig' can be used to
store a system-wide default configuration.
One an disable reading config files by setting the PERF_CONFIG environment
One can disable reading config files by setting the PERF_CONFIG environment
variable to /dev/null, or provide an alternate config file by setting that
variable.

View File

@ -148,6 +148,17 @@ OPTIONS for 'perf ftrace latency'
--use-nsec::
Use nano-second instead of micro-second as a base unit of the histogram.
--bucket-range=::
Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode.
--min-latency=::
Minimum latency for the start of the first bucket, in ms or ns (according to
-n/--use-nsec).
--max-latency=::
Maximum latency for the start of the last bucket, in ms or ns (according to
-n/--use-nsec). The setting is ignored if the value results in more than
22 buckets.
OPTIONS for 'perf ftrace profile'
---------------------------------

View File

@ -151,7 +151,7 @@ displayed as follows:
There are two ways that instructions-per-cycle (IPC) can be calculated depending
on the recording.
If the 'cyc' config term (see config terms section below) was used, then IPC
If the 'cyc' config term (see <<_config_terms,config terms>> section below) was used, then IPC
and cycle events are calculated using the cycle count from CYC packets, otherwise
MTC packets are used - refer to the 'mtc' config term. When MTC is used, however,
the values are less accurate because the timing is less accurate.
@ -239,7 +239,7 @@ which is the same as
-e intel_pt/tsc=1,noretcomp=0/
Note there are now new config terms - see section 'config terms' further below.
Note there are other config terms - see section <<_config_terms,config terms>> further below.
The config terms are listed in /sys/devices/intel_pt/format. They are bit
fields within the config member of the struct perf_event_attr which is
@ -311,218 +311,271 @@ perf_event_attr is displayed if the -vv option is used e.g.
config terms
~~~~~~~~~~~~
The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
Some of the features are reflect in new config terms. All the config terms are
described below.
tsc Always supported. Produces TSC timestamp packets to provide
timing information. In some cases it is possible to decode
without timing information, for example a per-thread context
that does not overlap executable memory maps.
The default config selects tsc (i.e. tsc=1).
noretcomp Always supported. Disables "return compression" so a TIP packet
is produced when a function returns. Causes more packets to be
produced but might make decoding more reliable.
The default config does not select noretcomp (i.e. noretcomp=0).
psb_period Allows the frequency of PSB packets to be specified.
The PSB packet is a synchronization packet that provides a
starting point for decoding or recovery from errors.
Support for psb_period is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
which contains "1" if the feature is supported and "0"
otherwise.
Valid values are given by:
/sys/bus/event_source/devices/intel_pt/caps/psb_periods
which contains a hexadecimal value, the bits of which represent
valid values e.g. bit 2 set means value 2 is valid.
The psb_period value is converted to the approximate number of
trace bytes between PSB packets as:
2 ^ (value + 11)
e.g. value 3 means 16KiB bytes between PSBs
If an invalid value is entered, the error message
will give a list of valid values e.g.
$ perf record -e intel_pt/psb_period=15/u uname
Invalid psb_period for intel_pt. Valid values are: 0-5
If MTC packets are selected, the default config selects a value
of 3 (i.e. psb_period=3) or the nearest lower value that is
supported (0 is always supported). Otherwise the default is 0.
If decoding is expected to be reliable and the buffer is large
then a large PSB period can be used.
Because a TSC packet is produced with PSB, the PSB period can
also affect the granularity to timing information in the absence
of MTC or CYC.
mtc Produces MTC timing packets.
MTC packets provide finer grain timestamp information than TSC
packets. MTC packets record time using the hardware crystal
clock (CTC) which is related to TSC packets using a TMA packet.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/mtc
which contains "1" if the feature is supported and
"0" otherwise.
The frequency of MTC packets can also be specified - see
mtc_period below.
mtc_period Specifies how frequently MTC packets are produced - see mtc
above for how to determine if MTC packets are supported.
Valid values are given by:
/sys/bus/event_source/devices/intel_pt/caps/mtc_periods
which contains a hexadecimal value, the bits of which represent
valid values e.g. bit 2 set means value 2 is valid.
The mtc_period value is converted to the MTC frequency as:
CTC-frequency / (2 ^ value)
e.g. value 3 means one eighth of CTC-frequency
Where CTC is the hardware crystal clock, the frequency of which
can be related to TSC via values provided in cpuid leaf 0x15.
If an invalid value is entered, the error message
will give a list of valid values e.g.
$ perf record -e intel_pt/mtc_period=15/u uname
Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
The default value is 3 or the nearest lower value
that is supported (0 is always supported).
cyc Produces CYC timing packets.
CYC packets provide even finer grain timestamp information than
MTC and TSC packets. A CYC packet contains the number of CPU
cycles since the last CYC packet. Unlike MTC and TSC packets,
CYC packets are only sent when another packet is also sent.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
which contains "1" if the feature is supported and
"0" otherwise.
The number of CYC packets produced can be reduced by specifying
a threshold - see cyc_thresh below.
cyc_thresh Specifies how frequently CYC packets are produced - see cyc
above for how to determine if CYC packets are supported.
Valid cyc_thresh values are given by:
/sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
which contains a hexadecimal value, the bits of which represent
valid values e.g. bit 2 set means value 2 is valid.
The cyc_thresh value represents the minimum number of CPU cycles
that must have passed before a CYC packet can be sent. The
number of CPU cycles is:
2 ^ (value - 1)
e.g. value 4 means 8 CPU cycles must pass before a CYC packet
can be sent. Note a CYC packet is still only sent when another
packet is sent, not at, e.g. every 8 CPU cycles.
If an invalid value is entered, the error message
will give a list of valid values e.g.
$ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
Invalid cyc_thresh for intel_pt. Valid values are: 0-12
CYC packets are not requested by default.
pt Specifies pass-through which enables the 'branch' config term.
The default config selects 'pt' if it is available, so a user will
never need to specify this term.
branch Enable branch tracing. Branch tracing is enabled by default so to
disable branch tracing use 'branch=0'.
The default config selects 'branch' if it is available.
ptw Enable PTWRITE packets which are produced when a ptwrite instruction
is executed.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/ptwrite
which contains "1" if the feature is supported and
"0" otherwise.
As an alternative, refer to "Emulated PTWRITE" further below.
fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
provides the address of the ptwrite instruction. In the absence of
fup_on_ptw, the decoder will use the address of the previous branch
if branch tracing is enabled, otherwise the address will be zero.
Note that fup_on_ptw will work even when branch tracing is disabled.
pwr_evt Enable power events. The power events provide information about
changes to the CPU C-state.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
which contains "1" if the feature is supported and
"0" otherwise.
event Enable Event Trace. The events provide information about asynchronous
events.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/event_trace
which contains "1" if the feature is supported and
"0" otherwise.
notnt Disable TNT packets. Without TNT packets, it is not possible to walk
executable code to reconstruct control flow, however FUP, TIP, TIP.PGE
and TIP.PGD packets still indicate asynchronous control flow, and (if
return compression is disabled - see noretcomp) return statements.
The advantage of eliminating TNT packets is reducing the size of the
trace and corresponding tracing overhead.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/tnt_disable
which contains "1" if the feature is supported and
"0" otherwise.
Config terms are parameters specified with the -e intel_pt// event option,
for example:
-e intel_pt/cyc/
which selects cycle accurate mode. Each config term can have a value which
defaults to 1, so the above is the same as:
-e intel_pt/cyc=1/
Some terms are set by default, so must be set to 0 to turn them off. For
example, to turn off branch tracing:
-e intel_pt/branch=0/
Multiple config terms are separated by commas, for example:
-e intel_pt/cyc,mtc_period=9/
There are also common config terms, see linkperf:perf-record[1] documentation.
Intel PT config terms are described below.
*tsc*::
Always supported. Produces TSC timestamp packets to provide
timing information. In some cases it is possible to decode
without timing information, for example a per-thread context
that does not overlap executable memory maps.
+
The default config selects tsc (i.e. tsc=1).
*noretcomp*::
Always supported. Disables "return compression" so a TIP packet
is produced when a function returns. Causes more packets to be
produced but might make decoding more reliable.
+
The default config does not select noretcomp (i.e. noretcomp=0).
*psb_period*::
Allows the frequency of PSB packets to be specified.
+
The PSB packet is a synchronization packet that provides a
starting point for decoding or recovery from errors.
+
Support for psb_period is indicated by:
+
/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
which contains "1" if the feature is supported and "0"
otherwise.
+
Valid values are given by:
+
/sys/bus/event_source/devices/intel_pt/caps/psb_periods
+
which contains a hexadecimal value, the bits of which represent
valid values e.g. bit 2 set means value 2 is valid.
+
The psb_period value is converted to the approximate number of
trace bytes between PSB packets as:
+
2 ^ (value + 11)
+
e.g. value 3 means 16KiB bytes between PSBs
+
If an invalid value is entered, the error message
will give a list of valid values e.g.
+
$ perf record -e intel_pt/psb_period=15/u uname
Invalid psb_period for intel_pt. Valid values are: 0-5
+
If MTC packets are selected, the default config selects a value
of 3 (i.e. psb_period=3) or the nearest lower value that is
supported (0 is always supported). Otherwise the default is 0.
+
If decoding is expected to be reliable and the buffer is large
then a large PSB period can be used.
+
Because a TSC packet is produced with PSB, the PSB period can
also affect the granularity to timing information in the absence
of MTC or CYC.
*mtc*::
Produces MTC timing packets.
+
MTC packets provide finer grain timestamp information than TSC
packets. MTC packets record time using the hardware crystal
clock (CTC) which is related to TSC packets using a TMA packet.
+
Support for this feature is indicated by:
+
/sys/bus/event_source/devices/intel_pt/caps/mtc
+
which contains "1" if the feature is supported and
"0" otherwise.
+
The frequency of MTC packets can also be specified - see
mtc_period below.
*mtc_period*::
Specifies how frequently MTC packets are produced - see mtc
above for how to determine if MTC packets are supported.
+
Valid values are given by:
+
/sys/bus/event_source/devices/intel_pt/caps/mtc_periods
+
which contains a hexadecimal value, the bits of which represent
valid values e.g. bit 2 set means value 2 is valid.
+
The mtc_period value is converted to the MTC frequency as:
CTC-frequency / (2 ^ value)
+
e.g. value 3 means one eighth of CTC-frequency
+
Where CTC is the hardware crystal clock, the frequency of which
can be related to TSC via values provided in cpuid leaf 0x15.
+
If an invalid value is entered, the error message
will give a list of valid values e.g.
+
$ perf record -e intel_pt/mtc_period=15/u uname
Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
+
The default value is 3 or the nearest lower value
that is supported (0 is always supported).
*cyc*::
Produces CYC timing packets.
+
CYC packets provide even finer grain timestamp information than
MTC and TSC packets. A CYC packet contains the number of CPU
cycles since the last CYC packet. Unlike MTC and TSC packets,
CYC packets are only sent when another packet is also sent.
+
Support for this feature is indicated by:
+
/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
which contains "1" if the feature is supported and
"0" otherwise.
+
The number of CYC packets produced can be reduced by specifying
a threshold - see cyc_thresh below.
*cyc_thresh*::
Specifies how frequently CYC packets are produced - see cyc
above for how to determine if CYC packets are supported.
+
Valid cyc_thresh values are given by:
+
/sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
+
which contains a hexadecimal value, the bits of which represent
valid values e.g. bit 2 set means value 2 is valid.
+
The cyc_thresh value represents the minimum number of CPU cycles
that must have passed before a CYC packet can be sent. The
number of CPU cycles is:
+
2 ^ (value - 1)
+
e.g. value 4 means 8 CPU cycles must pass before a CYC packet
can be sent. Note a CYC packet is still only sent when another
packet is sent, not at, e.g. every 8 CPU cycles.
+
If an invalid value is entered, the error message
will give a list of valid values e.g.
+
$ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
Invalid cyc_thresh for intel_pt. Valid values are: 0-12
+
CYC packets are not requested by default.
*pt*::
Specifies pass-through which enables the 'branch' config term.
+
The default config selects 'pt' if it is available, so a user will
never need to specify this term.
*branch*::
Enable branch tracing. Branch tracing is enabled by default so to
disable branch tracing use 'branch=0'.
+
The default config selects 'branch' if it is available.
*ptw*::
Enable PTWRITE packets which are produced when a ptwrite instruction
is executed.
+
Support for this feature is indicated by:
+
/sys/bus/event_source/devices/intel_pt/caps/ptwrite
+
which contains "1" if the feature is supported and
"0" otherwise.
+
As an alternative, refer to "Emulated PTWRITE" further below.
*fup_on_ptw*::
Enable a FUP packet to follow the PTWRITE packet. The FUP packet
provides the address of the ptwrite instruction. In the absence of
fup_on_ptw, the decoder will use the address of the previous branch
if branch tracing is enabled, otherwise the address will be zero.
Note that fup_on_ptw will work even when branch tracing is disabled.
*pwr_evt*::
Enable power events. The power events provide information about
changes to the CPU C-state.
+
Support for this feature is indicated by:
+
/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
+
which contains "1" if the feature is supported and
"0" otherwise.
*event*::
Enable Event Trace. The events provide information about asynchronous
events.
+
Support for this feature is indicated by:
+
/sys/bus/event_source/devices/intel_pt/caps/event_trace
+
which contains "1" if the feature is supported and
"0" otherwise.
*notnt*::
Disable TNT packets. Without TNT packets, it is not possible to walk
executable code to reconstruct control flow, however FUP, TIP, TIP.PGE
and TIP.PGD packets still indicate asynchronous control flow, and (if
return compression is disabled - see noretcomp) return statements.
The advantage of eliminating TNT packets is reducing the size of the
trace and corresponding tracing overhead.
+
Support for this feature is indicated by:
+
/sys/bus/event_source/devices/intel_pt/caps/tnt_disable
+
which contains "1" if the feature is supported and
"0" otherwise.
*aux-action=start-paused*::
Start tracing paused, refer to the section <<_pause_or_resume_tracing,Pause or Resume Tracing>>
config terms on other events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Some Intel PT features work with other events, features such as AUX area sampling
and PEBS-via-PT. In those cases, the other events can have config terms below:
*aux-sample-size*::
Used to set the AUX area sample size, refer to the section
<<_aux_area_sampling_option,AUX area sampling option>>
*aux-output*::
Used to select PEBS-via-PT, refer to the
section <<_pebs_via_intel_pt,PEBS via Intel PT>>
*aux-action*::
Used to pause or resume tracing, refer to the section
<<_pause_or_resume_tracing,Pause or Resume Tracing>>
AUX area sampling option
~~~~~~~~~~~~~~~~~~~~~~~~
@ -596,7 +649,8 @@ The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap s
nor snapshot size is specified, then the default is 4MiB for privileged users
(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
If an unprivileged user does not specify mmap pages, the mmap pages will be
reduced as described in the 'new auxtrace mmap size option' section below.
reduced as described in the <<_new_auxtrace_mmap_size_option,new auxtrace mmap size option>>
section below.
The snapshot size is displayed if the option -vv is used e.g.
@ -952,11 +1006,11 @@ transaction start, commit or abort.
Note that "instructions", "cycles", "branches" and "transactions" events
depend on code flow packets which can be disabled by using the config term
"branch=0". Refer to the config terms section above.
"branch=0". Refer to the <<_config_terms,config terms>> section above.
"ptwrite" events record the payload of the ptwrite instruction and whether
"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are
recorded only if the "ptw" config term was used. Refer to the config terms
recorded only if the "ptw" config term was used. Refer to the <<_config_terms,config terms>>
section above. perf script "synth" field displays "ptwrite" information like
this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was
used.
@ -964,7 +1018,7 @@ used.
"Power" events correspond to power event packets and CBR (core-to-bus ratio)
packets. While CBR packets are always recorded when tracing is enabled, power
event packets are recorded only if the "pwr_evt" config term was used. Refer to
the config terms section above. The power events record information about
the <<_config_terms,config terms>> section above. The power events record information about
C-state changes, whereas CBR is indicative of CPU frequency. perf script
"event,synth" fields display information like this:
@ -1120,7 +1174,7 @@ What *will* be decoded with the (single) q option:
- asynchronous branches such as interrupts
- indirect branches
- function return target address *if* the noretcomp config term (refer
config terms section) was used
<<_config_terms,config terms>> section) was used
- start of (control-flow) tracing
- end of (control-flow) tracing, if it is not out of context
- power events, ptwrite, transaction start and abort
@ -1133,7 +1187,7 @@ Repeating the q option (double-q i.e. qq) results in even faster decoding and ev
less detail. The decoder decodes only extended PSB (PSB+) packets, getting the
instruction pointer if there is a FUP packet within PSB+ (i.e. between PSB and
PSBEND). Note PSB packets occur regularly in the trace based on the psb_period
config term (refer config terms section). There will be a FUP packet if the
config term (refer <<_config_terms,config terms>> section). There will be a FUP packet if the
PSB+ occurs while control flow is being traced.
What will *not* be decoded with the qq option:
@ -1867,6 +1921,108 @@ For pipe mode, the order of events and timestamps can presumably
be messed up.
Pause or Resume Tracing
-----------------------
With newer Kernels, it is possible to use other selected events to pause
or resume Intel PT tracing. This is configured by using the "aux-action"
config term:
"aux-action=pause" is used with events that are to pause Intel PT tracing.
"aux-action=resume" is used with events that are to resume Intel PT tracing.
"aux-action=start-paused" is used with the Intel PT event to start in a
paused state.
For example, to trace only the uname system call (sys_newuname) when running the
command line utility uname:
$ perf record --kcore -e intel_pt/aux-action=start-paused/k,syscalls:sys_enter_newuname/aux-action=resume/,syscalls:sys_exit_newuname/aux-action=pause/ uname
Linux
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.043 MB perf.data ]
$ perf script --call-trace
uname 30805 [000] 24001.058782799: name: 0x7ffc9c1865b0
uname 30805 [000] 24001.058784424: psb offs: 0
uname 30805 [000] 24001.058784424: cbr: 39 freq: 3904 MHz (139%)
uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) debug_smp_processor_id
uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) __x64_sys_newuname
uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) down_read
uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) __cond_resched
uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_add
uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) in_lock_functions
uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_sub
uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) up_read
uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_add
uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) in_lock_functions
uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) preempt_count_sub
uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) _copy_to_user
uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) syscall_exit_to_user_mode
uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) syscall_exit_work
uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) perf_syscall_exit
uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) debug_smp_processor_id
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_trace_buf_alloc
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_swevent_get_recursion_context
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) debug_smp_processor_id
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) debug_smp_processor_id
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_tp_event
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_trace_buf_update
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) tracing_gen_ctx_irq_test
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_swevent_event
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __perf_event_account_interrupt
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __this_cpu_preempt_check
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_event_output_forward
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_event_aux_pause
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) ring_buffer_get
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __rcu_read_lock
uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __rcu_read_unlock
uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) pt_event_stop
uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) debug_smp_processor_id
uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) debug_smp_processor_id
uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) native_write_msr
uname 30805 [000] 24001.058785463: ([kernel.kallsyms]) native_write_msr
uname 30805 [000] 24001.058785639: 0x0
The example above uses tracepoints, but any kind of sampled event can be used.
For example:
Tracing between arch_cpu_idle_enter() and arch_cpu_idle_exit() using breakpoint events:
$ sudo cat /proc/kallsyms | sort | grep ' arch_cpu_idle_enter\| arch_cpu_idle_exit'
ffffffffb605bf60 T arch_cpu_idle_enter
ffffffffb614d8a0 W arch_cpu_idle_exit
$ sudo perf record --kcore -a -e intel_pt/aux-action=start-paused/k -e mem:0xffffffffb605bf60:x/aux-action=resume/ -e mem:0xffffffffb614d8a0:x/aux-action=pause/ -- sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.387 MB perf.data ]
Tracing __alloc_pages() using kprobes:
$ sudo perf probe --add '__alloc_pages order'
Added new event: probe:__alloc_pages (on __alloc_pages with order)
$ sudo perf probe --add __alloc_pages%return
Added new event: probe:__alloc_pages__return (on __alloc_pages%return)
$ sudo perf record --kcore -aR -e intel_pt/aux-action=start-paused/k -e probe:__alloc_pages/aux-action=resume/ -e probe:__alloc_pages__return/aux-action=pause/ -- sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.490 MB perf.data ]
Tracing starting at main() using a uprobe event:
$ sudo perf probe -x /usr/bin/uname main
Added new event: probe_uname:main (on main in /usr/bin/uname)
$ sudo perf record -e intel_pt/-aux-action=start-paused/u -e probe_uname:main/aux-action=resume/ -- uname
Linux
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.031 MB perf.data ]
Tracing occasionally using cycles events with different periods:
$ perf record --kcore -a -m,64M -e intel_pt/aux-action=start-paused/k -e cycles/aux-action=pause,period=1000000/Pk -e cycles/aux-action=resume,period=10500000/Pk -- firefox
[ perf record: Woken up 19 times to write data ]
[ perf record: Captured and wrote 16.561 MB perf.data ]
EXAMPLE
-------

View File

@ -68,6 +68,10 @@ OPTIONS
like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'.
- 'aux-output': Generate AUX records instead of events. This requires
that an AUX area event is also provided.
- 'aux-action': "pause" or "resume" to pause or resume an AUX
area event (the group leader) when this event occurs.
"start-paused" on an AUX area event itself, will
start in a paused state.
- 'aux-sample-size': Set sample size for AUX area sampling. If the
'--aux-sample' option has been used, set aux-sample-size=0 to disable
AUX area sampling for the event.

View File

@ -43,7 +43,9 @@ endif
# Additional ARCH settings for ppc
ifeq ($(SRCARCH),powerpc)
CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
ifndef NO_LIBUNWIND
LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
endif
endif
# Additional ARCH settings for x86
@ -53,25 +55,35 @@ ifeq ($(SRCARCH),x86)
ifeq (${IS_64_BIT}, 1)
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
ifndef NO_LIBUNWIND
LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
endif
$(call detected,CONFIG_X86_64)
else
LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
ifndef NO_LIBUNWIND
LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
endif
endif
endif
ifeq ($(SRCARCH),arm)
LIBUNWIND_LIBS = -lunwind -lunwind-arm
ifndef NO_LIBUNWIND
LIBUNWIND_LIBS = -lunwind -lunwind-arm
endif
endif
ifeq ($(SRCARCH),arm64)
CFLAGS += -I$(OUTPUT)arch/arm64/include/generated
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
ifndef NO_LIBUNWIND
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif
endif
ifeq ($(SRCARCH),loongarch)
CFLAGS += -I$(OUTPUT)arch/loongarch/include/generated
LIBUNWIND_LIBS = -lunwind -lunwind-loongarch64
ifndef NO_LIBUNWIND
LIBUNWIND_LIBS = -lunwind -lunwind-loongarch64
endif
endif
ifeq ($(ARCH),s390)
@ -80,7 +92,9 @@ endif
ifeq ($(ARCH),mips)
CFLAGS += -I$(OUTPUT)arch/mips/include/generated
LIBUNWIND_LIBS = -lunwind -lunwind-mips
ifndef NO_LIBUNWIND
LIBUNWIND_LIBS = -lunwind -lunwind-mips
endif
endif
ifeq ($(ARCH),riscv)
@ -121,16 +135,18 @@ ifdef LIBUNWIND_DIR
$(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch)))
endif
# Set per-feature check compilation flags
FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_LDFLAGS-libunwind-arm += -lunwind -lunwind-arm
FEATURE_CHECK_LDFLAGS-libunwind-aarch64 += -lunwind -lunwind-aarch64
FEATURE_CHECK_LDFLAGS-libunwind-x86 += -lunwind -llzma -lunwind-x86
FEATURE_CHECK_LDFLAGS-libunwind-x86_64 += -lunwind -llzma -lunwind-x86_64
ifndef NO_LIBUNWIND
# Set per-feature check compilation flags
FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_LDFLAGS-libunwind-arm += -lunwind -lunwind-arm
FEATURE_CHECK_LDFLAGS-libunwind-aarch64 += -lunwind -lunwind-aarch64
FEATURE_CHECK_LDFLAGS-libunwind-x86 += -lunwind -llzma -lunwind-x86
FEATURE_CHECK_LDFLAGS-libunwind-x86_64 += -lunwind -llzma -lunwind-x86_64
endif
FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
@ -155,7 +171,7 @@ ifdef LIBDW_DIR
endif
DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd
DWARFLIBS += -lelf -lz -llzma -lbz2
LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw).0.0
LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION)))
@ -550,6 +566,12 @@ ifndef NO_LIBELF
CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT
endif
ifeq ($(feature-libelf-zstd), 1)
ifdef NO_LIBZSTD
$(error Error: libzstd is required by libelf, please do not set NO_LIBZSTD)
endif
endif
ifndef NO_LIBDEBUGINFOD
$(call feature_check,libdebuginfod)
ifeq ($(feature-libdebuginfod), 1)
@ -734,26 +756,25 @@ ifeq ($(dwarf-post-unwind),1)
$(call detected,CONFIG_DWARF_UNWIND)
endif
ifndef NO_LOCAL_LIBUNWIND
ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
$(call feature_check,libunwind-debug-frame)
ifneq ($(feature-libunwind-debug-frame), 1)
$(warning No debug_frame support found in libunwind)
ifndef NO_LIBUNWIND
ifndef NO_LOCAL_LIBUNWIND
ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
$(call feature_check,libunwind-debug-frame)
ifneq ($(feature-libunwind-debug-frame), 1)
$(warning No debug_frame support found in libunwind)
CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
endif
else
# non-ARM has no dwarf_find_debug_frame() function:
CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
endif
else
# non-ARM has no dwarf_find_debug_frame() function:
CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
EXTLIBS += $(LIBUNWIND_LIBS)
LDFLAGS += $(LIBUNWIND_LIBS)
endif
ifeq ($(findstring -static,${LDFLAGS}),-static)
# gcc -static links libgcc_eh which contans piece of libunwind
LIBUNWIND_LDFLAGS += -Wl,--allow-multiple-definition
endif
EXTLIBS += $(LIBUNWIND_LIBS)
LDFLAGS += $(LIBUNWIND_LIBS)
endif
ifeq ($(findstring -static,${LDFLAGS}),-static)
# gcc -static links libgcc_eh which contans piece of libunwind
LIBUNWIND_LDFLAGS += -Wl,--allow-multiple-definition
endif
ifndef NO_LIBUNWIND
CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
CFLAGS += $(LIBUNWIND_CFLAGS)
LDFLAGS += $(LIBUNWIND_LDFLAGS)
@ -1172,7 +1193,6 @@ endif
# libtraceevent is a recommended dependency picked up from the system.
ifneq ($(NO_LIBTRACEEVENT),1)
$(call feature_check,libtraceevent)
ifeq ($(feature-libtraceevent), 1)
CFLAGS += -DHAVE_LIBTRACEEVENT $(shell $(PKG_CONFIG) --cflags libtraceevent)
LDFLAGS += $(shell $(PKG_CONFIG) --libs-only-L libtraceevent)
@ -1188,7 +1208,6 @@ ifneq ($(NO_LIBTRACEEVENT),1)
$(error ERROR: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel and/or set LIBTRACEEVENT_DIR or build with NO_LIBTRACEEVENT=1)
endif
$(call feature_check,libtracefs)
ifeq ($(feature-libtracefs), 1)
CFLAGS += $(shell $(PKG_CONFIG) --cflags libtracefs)
LDFLAGS += $(shell $(PKG_CONFIG) --libs-only-L libtracefs)

View File

@ -487,6 +487,9 @@ endif
EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS))
LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
PERFLIBS_PY := $(call filter-out,$(LIBPERF_BENCH) $(LIBPERF_TEST),$(PERFLIBS))
LIBS_PY = -Wl,--whole-archive $(PERFLIBS_PY) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
export INSTALL SHELL_PATH
### Build rules
@ -735,9 +738,9 @@ all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
# Create python binding output directory if not already present
$(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python')
$(OUTPUT)python/perf$(PYTHON_EXTENSION_SUFFIX): util/python.c util/setup.py $(PERFLIBS)
$(OUTPUT)python/perf$(PYTHON_EXTENSION_SUFFIX): util/python.c util/setup.py $(PERFLIBS_PY)
$(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBS)' \
CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBS_PY)' \
$(PYTHON_WORD) util/setup.py \
--quiet build_ext; \
cp $(PYTHON_EXTBUILD_LIB)perf*.so $(OUTPUT)python/

View File

@ -16,6 +16,7 @@
#define PVR_POWER9 0x004E
#define PVR_POWER10 0x0080
#define PVR_POWER11 0x0082
static const struct sample_reg sample_reg_masks[] = {
SMPL_REG(r0, PERF_REG_POWERPC_R0),
@ -207,7 +208,7 @@ uint64_t arch__intr_reg_mask(void)
version = (((mfspr(SPRN_PVR)) >> 16) & 0xFFFF);
if (version == PVR_POWER9)
extended_mask = PERF_REG_PMU_MASK_300;
else if (version == PVR_POWER10)
else if ((version == PVR_POWER10) || (version == PVR_POWER11))
extended_mask = PERF_REG_PMU_MASK_31;
else
return mask;

View File

@ -15,6 +15,6 @@ perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
perf-util-$(CONFIG_AUXTRACE) += archinsn.o
perf-util-y += archinsn.o
perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
perf-util-$(CONFIG_AUXTRACE) += intel-bts.o

View File

@ -52,7 +52,7 @@ struct bench_dso {
static int nr_dsos;
static struct bench_dso *dsos;
extern int cmd_inject(int argc, const char *argv[]);
extern int main(int argc, const char **argv);
static const struct option options[] = {
OPT_UINTEGER('i', "iterations", &iterations,
@ -294,7 +294,7 @@ static int setup_injection(struct bench_data *data, bool build_id_all)
if (data->pid == 0) {
const char **inject_argv;
int inject_argc = 2;
int inject_argc = 3;
close(data->input_pipe[1]);
close(data->output_pipe[0]);
@ -318,15 +318,16 @@ static int setup_injection(struct bench_data *data, bool build_id_all)
if (inject_argv == NULL)
exit(1);
inject_argv[0] = strdup("inject");
inject_argv[1] = strdup("-b");
inject_argv[0] = strdup("perf");
inject_argv[1] = strdup("inject");
inject_argv[2] = strdup("-b");
if (build_id_all)
inject_argv[2] = strdup("--buildid-all");
inject_argv[3] = strdup("--buildid-all");
/* signal that we're ready to go */
close(ready_pipe[1]);
cmd_inject(inject_argc, inject_argv);
main(inject_argc, inject_argv);
exit(0);
}

View File

@ -7,6 +7,7 @@
* a histogram of results, along various sorting keys.
*/
#include "builtin.h"
#include "perf.h"
#include "util/color.h"
#include <linux/list.h>

View File

@ -6,6 +6,7 @@
* DSOs and symbol information, sort them and produce a diff.
*/
#include "builtin.h"
#include "perf.h"
#include "util/debug.h"
#include "util/event.h"

View File

@ -726,9 +726,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
return (done && !workload_exec_errno) ? 0 : -1;
}
static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
bool use_nsec)
static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
char *buf, size_t len, char *linebuf)
{
int min_latency = ftrace->min_latency;
int max_latency = ftrace->max_latency;
char *p, *q;
char *unit;
double num;
@ -774,15 +776,29 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
if (!unit || strncmp(unit, " us", 3))
goto next;
if (use_nsec)
if (ftrace->use_nsec)
num *= 1000;
i = log2(num);
if (i < 0)
i = 0;
if (i >= NUM_BUCKET)
i = 0;
if (num < min_latency)
goto do_inc;
num -= min_latency;
if (!ftrace->bucket_range) {
i = log2(num);
if (i < 0)
i = 0;
} else {
// Less than 1 unit (ms or ns), or, in the future,
// than the min latency desired.
if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
i = num / ftrace->bucket_range + 1;
}
if (i >= NUM_BUCKET || num >= max_latency - min_latency)
i = NUM_BUCKET - 1;
do_inc:
buckets[i]++;
next:
@ -794,8 +810,10 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
strcat(linebuf, p);
}
static void display_histogram(int buckets[], bool use_nsec)
static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
{
int min_latency = ftrace->min_latency;
bool use_nsec = ftrace->use_nsec;
int i;
int total = 0;
int bar_total = 46; /* to fit in 80 column */
@ -814,28 +832,66 @@ static void display_histogram(int buckets[], bool use_nsec)
" DURATION ", "COUNT", bar_total, "GRAPH");
bar_len = buckets[0] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
printf(" %4d - %4d %s | %10d | %.*s%*s |\n",
0, min_latency, use_nsec ? "ns" : "us",
buckets[0], bar_len, bar, bar_total - bar_len, "");
for (i = 1; i < NUM_BUCKET - 1; i++) {
int start = (1 << (i - 1));
int stop = 1 << i;
unsigned int start, stop;
const char *unit = use_nsec ? "ns" : "us";
if (start >= 1024) {
start >>= 10;
stop >>= 10;
unit = use_nsec ? "us" : "ms";
if (!ftrace->bucket_range) {
start = (1 << (i - 1));
stop = 1 << i;
if (start >= 1024) {
start >>= 10;
stop >>= 10;
unit = use_nsec ? "us" : "ms";
}
} else {
start = (i - 1) * ftrace->bucket_range + min_latency;
stop = i * ftrace->bucket_range + min_latency;
if (start >= ftrace->max_latency)
break;
if (stop > ftrace->max_latency)
stop = ftrace->max_latency;
if (start >= 1000) {
double dstart = start / 1000.0,
dstop = stop / 1000.0;
printf(" %4.2f - %-4.2f", dstart, dstop);
unit = use_nsec ? "us" : "ms";
goto print_bucket_info;
}
}
printf(" %4d - %4d", start, stop);
print_bucket_info:
bar_len = buckets[i] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
start, stop, unit, buckets[i], bar_len, bar,
printf(" %s | %10d | %.*s%*s |\n", unit, buckets[i], bar_len, bar,
bar_total - bar_len, "");
}
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
if (!ftrace->bucket_range) {
printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
} else {
unsigned int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range + min_latency;
if (upper_outlier > ftrace->max_latency)
upper_outlier = ftrace->max_latency;
if (upper_outlier >= 1000) {
double dstart = upper_outlier / 1000.0;
printf(" %4.2f - %-4s %s", dstart, "...", use_nsec ? "us" : "ms");
} else {
printf(" %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us");
}
}
printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
bar_len, bar, bar_total - bar_len, "");
}
@ -951,7 +1007,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
if (n < 0)
break;
make_histogram(buckets, buf, n, line, ftrace->use_nsec);
make_histogram(ftrace, buckets, buf, n, line);
}
}
@ -968,12 +1024,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
int n = read(trace_fd, buf, sizeof(buf) - 1);
if (n <= 0)
break;
make_histogram(buckets, buf, n, line, ftrace->use_nsec);
make_histogram(ftrace, buckets, buf, n, line);
}
read_func_latency(ftrace, buckets);
display_histogram(buckets, ftrace->use_nsec);
display_histogram(ftrace, buckets);
out:
close(trace_fd);
@ -1558,6 +1614,12 @@ int cmd_ftrace(int argc, const char **argv)
#endif
OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
"Use nano-second histogram"),
OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range,
"Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"),
OPT_UINTEGER(0, "min-latency", &ftrace.min_latency,
"Minimum latency (1st bucket). Works only with --bucket-range."),
OPT_UINTEGER(0, "max-latency", &ftrace.max_latency,
"Maximum latency (last bucket). Works only with --bucket-range and total buckets less than 22."),
OPT_PARENT(common_options),
};
const struct option profile_options[] = {
@ -1653,6 +1715,29 @@ int cmd_ftrace(int argc, const char **argv)
ret = -EINVAL;
goto out_delete_filters;
}
if (!ftrace.bucket_range && ftrace.min_latency) {
pr_err("--min-latency works only with --bucket-range\n");
parse_options_usage(ftrace_usage, options,
"min-latency", /*short_opt=*/false);
ret = -EINVAL;
goto out_delete_filters;
}
if (!ftrace.min_latency) {
/* default min latency should be the bucket range */
ftrace.min_latency = ftrace.bucket_range;
}
if (!ftrace.bucket_range && ftrace.max_latency) {
pr_err("--max-latency works only with --bucket-range\n");
parse_options_usage(ftrace_usage, options,
"max-latency", /*short_opt=*/false);
ret = -EINVAL;
goto out_delete_filters;
}
if (!ftrace.max_latency) {
/* default max latency should depend on bucket range and num_buckets */
ftrace.max_latency = (NUM_BUCKET - 2) * ftrace.bucket_range +
ftrace.min_latency;
}
cmd_func = __cmd_latency;
break;
case PERF_FTRACE_PROFILE:

View File

@ -761,6 +761,7 @@ static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
};
struct trace_seq seq;
char *str, *pos = NULL;
const struct tep_event *tp_format;
if (nr_gfps) {
struct gfp_flag key = {
@ -772,8 +773,9 @@ static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
}
trace_seq_init(&seq);
tep_print_event(evsel->tp_format->tep,
&seq, &record, "%s", TEP_PRINT_INFO);
tp_format = evsel__tp_format(evsel);
if (tp_format)
tep_print_event(tp_format->tep, &seq, &record, "%s", TEP_PRINT_INFO);
str = strtok_r(seq.buffer, " ", &pos);
while (str) {
@ -2012,13 +2014,13 @@ int cmd_kmem(int argc, const char **argv)
if (kmem_page) {
struct evsel *evsel = evlist__find_tracepoint_by_name(session->evlist, "kmem:mm_page_alloc");
const struct tep_event *tp_format = evsel ? evsel__tp_format(evsel) : NULL;
if (evsel == NULL) {
if (tp_format == NULL) {
pr_err(errmsg, "page", "page");
goto out_delete;
}
kmem_page_size = tep_get_page_size(evsel->tp_format->tep);
kmem_page_size = tep_get_page_size(tp_format->tep);
symbol_conf.use_callchain = true;
}

View File

@ -615,67 +615,6 @@ static const char *get_filename_for_perf_kvm(void)
#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
void exit_event_get_key(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
{
key->info = 0;
key->key = evsel__intval(evsel, sample, kvm_exit_reason);
}
bool kvm_exit_event(struct evsel *evsel)
{
return evsel__name_is(evsel, kvm_exit_trace);
}
bool exit_event_begin(struct evsel *evsel,
struct perf_sample *sample, struct event_key *key)
{
if (kvm_exit_event(evsel)) {
exit_event_get_key(evsel, sample, key);
return true;
}
return false;
}
bool kvm_entry_event(struct evsel *evsel)
{
return evsel__name_is(evsel, kvm_entry_trace);
}
bool exit_event_end(struct evsel *evsel,
struct perf_sample *sample __maybe_unused,
struct event_key *key __maybe_unused)
{
return kvm_entry_event(evsel);
}
static const char *get_exit_reason(struct perf_kvm_stat *kvm,
struct exit_reasons_table *tbl,
u64 exit_code)
{
while (tbl->reason != NULL) {
if (tbl->exit_code == exit_code)
return tbl->reason;
tbl++;
}
pr_err("unknown kvm exit code:%lld on %s\n",
(unsigned long long)exit_code, kvm->exit_reasons_isa);
return "UNKNOWN";
}
void exit_event_decode_key(struct perf_kvm_stat *kvm,
struct event_key *key,
char *decode)
{
const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
key->key);
scnprintf(decode, KVM_EVENT_NAME_LEN, "%s", exit_reason);
}
static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
{
struct kvm_reg_events_ops *events_ops = kvm_reg_events_ops;

View File

@ -6,6 +6,7 @@
*/
#include "builtin.h"
#include "perf.h"
#include "util/data.h"
#include "util/evlist.h"
@ -1103,7 +1104,8 @@ static char *evsel__softirq_name(struct evsel *evsel, u64 num)
char *name = NULL;
bool found = false;
struct tep_print_flag_sym *sym = NULL;
struct tep_print_arg *args = evsel->tp_format->print_fmt.args;
const struct tep_event *tp_format = evsel__tp_format(evsel);
struct tep_print_arg *args = tp_format ? tp_format->print_fmt.args : NULL;
if ((args == NULL) || (args->next == NULL))
return NULL;
@ -1846,7 +1848,7 @@ static void process_skipped_events(struct perf_kwork *kwork,
}
}
struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
static struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
struct kwork_class *class,
struct kwork_work *key)
{
@ -2344,6 +2346,7 @@ int cmd_kwork(int argc, const char **argv)
.all_runtime = 0,
.all_count = 0,
.nr_skipped_events = { 0 },
.add_work = perf_kwork_add_work,
};
static const char default_report_sort_order[] = "runtime, max, count";
static const char default_latency_sort_order[] = "avg, max, count";

View File

@ -46,15 +46,6 @@
static struct perf_session *session;
static struct target target;
/* based on kernel/lockdep.c */
#define LOCKHASH_BITS 12
#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
static struct hlist_head *lockhash_table;
#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
#define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
static struct rb_root thread_stats;
static bool combine_locks;
@ -67,24 +58,13 @@ static unsigned long bpf_map_entries = MAX_ENTRIES;
static int max_stack_depth = CONTENTION_STACK_DEPTH;
static int stack_skip = CONTENTION_STACK_SKIP;
static int print_nr_entries = INT_MAX / 2;
static LIST_HEAD(callstack_filters);
static const char *output_name = NULL;
static FILE *lock_output;
struct callstack_filter {
struct list_head list;
char name[];
};
static struct lock_filter filters;
static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
static bool needs_callstack(void)
{
return !list_empty(&callstack_filters);
}
static struct thread_stat *thread_stat_find(u32 tid)
{
struct rb_node *node;
@ -477,93 +457,6 @@ static struct lock_stat *pop_from_result(void)
return container_of(node, struct lock_stat, rb);
}
struct lock_stat *lock_stat_find(u64 addr)
{
struct hlist_head *entry = lockhashentry(addr);
struct lock_stat *ret;
hlist_for_each_entry(ret, entry, hash_entry) {
if (ret->addr == addr)
return ret;
}
return NULL;
}
struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
{
struct hlist_head *entry = lockhashentry(addr);
struct lock_stat *ret, *new;
hlist_for_each_entry(ret, entry, hash_entry) {
if (ret->addr == addr)
return ret;
}
new = zalloc(sizeof(struct lock_stat));
if (!new)
goto alloc_failed;
new->addr = addr;
new->name = strdup(name);
if (!new->name) {
free(new);
goto alloc_failed;
}
new->flags = flags;
new->wait_time_min = ULLONG_MAX;
hlist_add_head(&new->hash_entry, entry);
return new;
alloc_failed:
pr_err("memory allocation failed\n");
return NULL;
}
bool match_callstack_filter(struct machine *machine, u64 *callstack)
{
struct map *kmap;
struct symbol *sym;
u64 ip;
const char *arch = perf_env__arch(machine->env);
if (list_empty(&callstack_filters))
return true;
for (int i = 0; i < max_stack_depth; i++) {
struct callstack_filter *filter;
/*
* In powerpc, the callchain saved by kernel always includes
* first three entries as the NIP (next instruction pointer),
* LR (link register), and the contents of LR save area in the
* second stack frame. In certain scenarios its possible to have
* invalid kernel instruction addresses in either LR or the second
* stack frame's LR. In that case, kernel will store that address as
* zero.
*
* The below check will continue to look into callstack,
* incase first or second callstack index entry has 0
* address for powerpc.
*/
if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
(i != 1 && i != 2))))
break;
ip = callstack[i];
sym = machine__find_kernel_symbol(machine, ip, &kmap);
if (sym == NULL)
continue;
list_for_each_entry(filter, &callstack_filters, list) {
if (strstr(sym->name, filter->name))
return true;
}
}
return false;
}
struct trace_lock_handler {
/* it's used on CONFIG_LOCKDEP */
int (*acquire_event)(struct evsel *evsel,
@ -1165,7 +1058,7 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
if (callstack == NULL)
return -ENOMEM;
if (!match_callstack_filter(machine, callstack)) {
if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
free(callstack);
return 0;
}
@ -2449,34 +2342,6 @@ static int parse_lock_addr(const struct option *opt __maybe_unused, const char *
return ret;
}
static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
int unset __maybe_unused)
{
char *s, *tmp, *tok;
int ret = 0;
s = strdup(str);
if (s == NULL)
return -1;
for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
struct callstack_filter *entry;
entry = malloc(sizeof(*entry) + strlen(tok) + 1);
if (entry == NULL) {
pr_err("Memory allocation failure\n");
free(s);
return -1;
}
strcpy(entry->name, tok);
list_add_tail(&entry->list, &callstack_filters);
}
free(s);
return ret;
}
static int parse_output(const struct option *opt __maybe_unused, const char *str,
int unset __maybe_unused)
{

View File

@ -4,6 +4,7 @@
#include <sys/stat.h>
#include <unistd.h>
#include "builtin.h"
#include "perf.h"
#include <subcmd/parse-options.h>
#include "util/auxtrace.h"

View File

@ -860,7 +860,9 @@ static int record__auxtrace_init(struct record *rec)
if (err)
return err;
auxtrace_regroup_aux_output(rec->evlist);
err = auxtrace_parse_aux_action(rec->evlist);
if (err)
return err;
return auxtrace_parse_filters(rec->evlist);
}
@ -1748,10 +1750,8 @@ static void record__init_features(struct record *rec)
if (rec->no_buildid)
perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
#ifdef HAVE_LIBTRACEEVENT
if (!have_tracepoints(&rec->evlist->core.entries))
perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
#endif
if (!rec->opts.branch_stack)
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "builtin.h"
#include "perf.h"
#include "perf-sys.h"
#include "util/cpumap.h"

View File

@ -85,15 +85,12 @@ static bool system_wide;
static bool print_flags;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
static struct perf_stat_config stat_config;
static int max_blocks;
static bool native_arch;
static struct dlfilter *dlfilter;
static int dlargc;
static char **dlargv;
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
enum perf_output_field {
PERF_OUTPUT_COMM = 1ULL << 0,
PERF_OUTPUT_TID = 1ULL << 1,
@ -1589,19 +1586,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
return len + dlen;
}
__weak void arch_fetch_insn(struct perf_sample *sample __maybe_unused,
struct thread *thread __maybe_unused,
struct machine *machine __maybe_unused)
{
}
void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
struct machine *machine)
{
if (sample->insn_len == 0 && native_arch)
arch_fetch_insn(sample, thread, machine);
}
static int perf_sample__fprintf_insn(struct perf_sample *sample,
struct evsel *evsel,
struct perf_event_attr *attr,
@ -1611,7 +1595,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
{
int printed = 0;
script_fetch_insn(sample, thread, machine);
script_fetch_insn(sample, thread, machine, native_arch);
if (PRINT_FIELD(INSNLEN))
printed += fprintf(fp, " ilen: %d", sample->insn_len);
@ -1709,87 +1693,6 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
return printed;
}
static struct {
u32 flags;
const char *name;
} sample_flags[] = {
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"},
{PERF_IP_FLAG_BRANCH, "jmp"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT, "hw int"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"},
{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_BRANCH_MISS, "br miss"},
{0, NULL}
};
static const char *sample_flags_to_name(u32 flags)
{
int i;
for (i = 0; sample_flags[i].name ; i++) {
if (sample_flags[i].flags == flags)
return sample_flags[i].name;
}
return NULL;
}
int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz)
{
u32 xf = PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_INTR_DISABLE |
PERF_IP_FLAG_INTR_TOGGLE;
const char *chars = PERF_IP_FLAG_CHARS;
const size_t n = strlen(PERF_IP_FLAG_CHARS);
const char *name = NULL;
size_t i, pos = 0;
char xs[16] = {0};
if (flags & xf)
snprintf(xs, sizeof(xs), "(%s%s%s)",
flags & PERF_IP_FLAG_IN_TX ? "x" : "",
flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "",
flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : "");
name = sample_flags_to_name(flags & ~xf);
if (name)
return snprintf(str, sz, "%-15s%6s", name, xs);
if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_BEGIN));
if (name)
return snprintf(str, sz, "tr strt %-7s%6s", name, xs);
}
if (flags & PERF_IP_FLAG_TRACE_END) {
name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_END));
if (name)
return snprintf(str, sz, "tr end %-7s%6s", name, xs);
}
for (i = 0; i < n; i++, flags >>= 1) {
if ((flags & 1) && pos < sz)
str[pos++] = chars[i];
}
for (; i < 32; i++, flags >>= 1) {
if ((flags & 1) && pos < sz)
str[pos++] = '?';
}
if (pos < sz)
str[pos] = 0;
return pos;
}
static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
{
char str[SAMPLE_FLAGS_BUF_SIZE];
@ -2289,8 +2192,13 @@ static void process_event(struct perf_script *script,
}
#ifdef HAVE_LIBTRACEEVENT
if (PRINT_FIELD(TRACE) && sample->raw_data) {
event_format__fprintf(evsel->tp_format, sample->cpu,
sample->raw_data, sample->raw_size, fp);
const struct tep_event *tp_format = evsel__tp_format(evsel);
if (tp_format) {
event_format__fprintf(tp_format, sample->cpu,
sample->raw_data, sample->raw_size,
fp);
}
}
#endif
if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH))
@ -2959,79 +2867,18 @@ static int __cmd_script(struct perf_script *script)
return ret;
}
struct script_spec {
struct list_head node;
struct scripting_ops *ops;
char spec[];
};
static LIST_HEAD(script_specs);
static struct script_spec *script_spec__new(const char *spec,
struct scripting_ops *ops)
static int list_available_languages_cb(struct scripting_ops *ops, const char *spec)
{
struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
if (s != NULL) {
strcpy(s->spec, spec);
s->ops = ops;
}
return s;
}
static void script_spec__add(struct script_spec *s)
{
list_add_tail(&s->node, &script_specs);
}
static struct script_spec *script_spec__find(const char *spec)
{
struct script_spec *s;
list_for_each_entry(s, &script_specs, node)
if (strcasecmp(s->spec, spec) == 0)
return s;
return NULL;
}
int script_spec_register(const char *spec, struct scripting_ops *ops)
{
struct script_spec *s;
s = script_spec__find(spec);
if (s)
return -1;
s = script_spec__new(spec, ops);
if (!s)
return -1;
else
script_spec__add(s);
fprintf(stderr, " %-42s [%s]\n", spec, ops->name);
return 0;
}
static struct scripting_ops *script_spec__lookup(const char *spec)
{
struct script_spec *s = script_spec__find(spec);
if (!s)
return NULL;
return s->ops;
}
static void list_available_languages(void)
{
struct script_spec *s;
fprintf(stderr, "\n");
fprintf(stderr, "Scripting language extensions (used in "
"perf script -s [spec:]script.[spec]):\n\n");
list_for_each_entry(s, &script_specs, node)
fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name);
script_spec__for_each(&list_available_languages_cb);
fprintf(stderr, "\n");
}
@ -3523,144 +3370,6 @@ static void free_dlarg(void)
free(dlargv);
}
/*
* Some scripts specify the required events in their "xxx-record" file,
* this function will check if the events in perf.data match those
* mentioned in the "xxx-record".
*
* Fixme: All existing "xxx-record" are all in good formats "-e event ",
* which is covered well now. And new parsing code should be added to
* cover the future complex formats like event groups etc.
*/
static int check_ev_match(char *dir_name, char *scriptname,
struct perf_session *session)
{
char filename[MAXPATHLEN], evname[128];
char line[BUFSIZ], *p;
struct evsel *pos;
int match, len;
FILE *fp;
scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname);
fp = fopen(filename, "r");
if (!fp)
return -1;
while (fgets(line, sizeof(line), fp)) {
p = skip_spaces(line);
if (*p == '#')
continue;
while (strlen(p)) {
p = strstr(p, "-e");
if (!p)
break;
p += 2;
p = skip_spaces(p);
len = strcspn(p, " \t");
if (!len)
break;
snprintf(evname, len + 1, "%s", p);
match = 0;
evlist__for_each_entry(session->evlist, pos) {
if (evsel__name_is(pos, evname)) {
match = 1;
break;
}
}
if (!match) {
fclose(fp);
return -1;
}
}
}
fclose(fp);
return 0;
}
/*
* Return -1 if none is found, otherwise the actual scripts number.
*
* Currently the only user of this function is the script browser, which
* will list all statically runnable scripts, select one, execute it and
* show the output in a perf browser.
*/
int find_scripts(char **scripts_array, char **scripts_path_array, int num,
int pathlen)
{
struct dirent *script_dirent, *lang_dirent;
char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
DIR *scripts_dir, *lang_dir;
struct perf_session *session;
struct perf_data data = {
.path = input_name,
.mode = PERF_DATA_MODE_READ,
};
char *temp;
int i = 0;
session = perf_session__new(&data, NULL);
if (IS_ERR(session))
return PTR_ERR(session);
snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
scripts_dir = opendir(scripts_path);
if (!scripts_dir) {
perf_session__delete(session);
return -1;
}
for_each_lang(scripts_path, scripts_dir, lang_dirent) {
scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
lang_dirent->d_name);
#ifndef HAVE_LIBPERL_SUPPORT
if (strstr(lang_path, "perl"))
continue;
#endif
#ifndef HAVE_LIBPYTHON_SUPPORT
if (strstr(lang_path, "python"))
continue;
#endif
lang_dir = opendir(lang_path);
if (!lang_dir)
continue;
for_each_script(lang_path, lang_dir, script_dirent) {
/* Skip those real time scripts: xxxtop.p[yl] */
if (strstr(script_dirent->d_name, "top."))
continue;
if (i >= num)
break;
snprintf(scripts_path_array[i], pathlen, "%s/%s",
lang_path,
script_dirent->d_name);
temp = strchr(script_dirent->d_name, '.');
snprintf(scripts_array[i],
(temp - script_dirent->d_name) + 1,
"%s", script_dirent->d_name);
if (check_ev_match(lang_path,
scripts_array[i], session))
continue;
i++;
}
closedir(lang_dir);
}
closedir(scripts_dir);
perf_session__delete(session);
return i;
}
static char *get_script_path(const char *script_root, const char *suffix)
{
struct dirent *script_dirent, *lang_dirent;

View File

@ -112,8 +112,6 @@ static struct target target = {
.uid = UINT_MAX,
};
#define METRIC_ONLY_LEN 20
static volatile sig_atomic_t child_pid = -1;
static int detailed_run = 0;
static bool transaction_run;
@ -151,21 +149,6 @@ static struct perf_stat perf_stat;
static volatile sig_atomic_t done = 0;
static struct perf_stat_config stat_config = {
.aggr_mode = AGGR_GLOBAL,
.aggr_level = MAX_CACHE_LVL + 1,
.scale = true,
.unit_width = 4, /* strlen("unit") */
.run_count = 1,
.metric_only_len = METRIC_ONLY_LEN,
.walltime_nsecs_stats = &walltime_nsecs_stats,
.ru_stats = &ru_stats,
.big_num = true,
.ctl_fd = -1,
.ctl_fd_ack = -1,
.iostat_run = false,
};
/* Options set from the command line. */
struct opt_aggr_mode {
bool node, socket, die, cluster, cache, core, thread, no_aggr;
@ -1071,16 +1054,6 @@ static void sig_atexit(void)
kill(getpid(), signr);
}
void perf_stat__set_big_num(int set)
{
stat_config.big_num = (set != 0);
}
void perf_stat__set_no_csv_summary(int set)
{
stat_config.no_csv_summary = (set != 0);
}
static int stat__set_big_num(const struct option *opt __maybe_unused,
const char *s __maybe_unused, int unset)
{

View File

@ -389,7 +389,12 @@ static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
}
if (et->fmt == NULL) {
et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
const struct tep_event *tp_format = evsel__tp_format(evsel);
if (tp_format == NULL)
goto out_delete;
et->fmt = calloc(tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
if (et->fmt == NULL)
goto out_delete;
}
@ -1108,7 +1113,6 @@ static bool syscall_arg__strtoul_btf_type(char *bf __maybe_unused, size_t size _
.strtoul = STUL_STRARRAY_FLAGS, \
.parm = &strarray__##array, }
#include "trace/beauty/arch_errno_names.c"
#include "trace/beauty/eventfd.c"
#include "trace/beauty/futex_op.c"
#include "trace/beauty/futex_val3.c"
@ -2154,8 +2158,12 @@ static int evsel__init_tp_arg_scnprintf(struct evsel *evsel, bool *use_btf)
struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
if (fmt != NULL) {
syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields, use_btf);
return 0;
const struct tep_event *tp_format = evsel__tp_format(evsel);
if (tp_format) {
syscall_arg_fmt__init_array(fmt, tp_format->format.fields, use_btf);
return 0;
}
}
return -ENOMEM;
@ -3027,7 +3035,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
{
char bf[2048];
size_t size = sizeof(bf);
struct tep_format_field *field = evsel->tp_format->format.fields;
const struct tep_event *tp_format = evsel__tp_format(evsel);
struct tep_format_field *field = tp_format ? tp_format->format.fields : NULL;
struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
size_t printed = 0, btf_printed;
unsigned long val;
@ -3145,11 +3154,13 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
if (evsel__is_bpf_output(evsel)) {
bpf_output__fprintf(trace, sample);
} else if (evsel->tp_format) {
if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
trace__fprintf_sys_enter(trace, evsel, sample)) {
} else {
const struct tep_event *tp_format = evsel__tp_format(evsel);
if (tp_format && (strncmp(tp_format->name, "sys_enter_", 10) ||
trace__fprintf_sys_enter(trace, evsel, sample))) {
if (trace->libtraceevent_print) {
event_format__fprintf(evsel->tp_format, sample->cpu,
event_format__fprintf(tp_format, sample->cpu,
sample->raw_data, sample->raw_size,
trace->output);
} else {
@ -4077,17 +4088,23 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg,
char **type)
{
struct tep_format_field *field;
struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
const struct tep_event *tp_format;
if (evsel->tp_format == NULL || fmt == NULL)
if (!fmt)
return NULL;
for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
tp_format = evsel__tp_format(evsel);
if (!tp_format)
return NULL;
for (const struct tep_format_field *field = tp_format->format.fields; field;
field = field->next, ++fmt) {
if (strcmp(field->name, arg) == 0) {
*type = field->type;
return fmt;
}
}
return NULL;
}
@ -4843,13 +4860,18 @@ static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
const struct syscall_fmt *scfmt = syscall_fmt__find(name);
if (scfmt) {
int skip = 0;
const struct tep_event *tp_format = evsel__tp_format(evsel);
if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
++skip;
if (tp_format) {
int skip = 0;
memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
if (strcmp(tp_format->format.fields->name, "__syscall_nr") == 0 ||
strcmp(tp_format->format.fields->name, "nr") == 0)
++skip;
memcpy(fmt + skip, scfmt->arg,
(tp_format->format.nr_fields - skip) * sizeof(*fmt));
}
}
}
}
@ -4859,10 +4881,16 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist, bool *use_btf)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
if (evsel->priv || !evsel->tp_format)
const struct tep_event *tp_format;
if (evsel->priv)
continue;
if (strcmp(evsel->tp_format->system, "syscalls")) {
tp_format = evsel__tp_format(evsel);
if (!tp_format)
continue;
if (strcmp(tp_format->system, "syscalls")) {
evsel__init_tp_arg_scnprintf(evsel, use_btf);
continue;
}
@ -4870,20 +4898,24 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist, bool *use_btf)
if (evsel__init_syscall_tp(evsel))
return -1;
if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
if (!strncmp(tp_format->name, "sys_enter_", 10)) {
struct syscall_tp *sc = __evsel__syscall_tp(evsel);
if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
return -1;
evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
} else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
evsel__set_syscall_arg_fmt(evsel,
tp_format->name + sizeof("sys_enter_") - 1);
} else if (!strncmp(tp_format->name, "sys_exit_", 9)) {
struct syscall_tp *sc = __evsel__syscall_tp(evsel);
if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
if (__tp_field__init_uint(&sc->ret, sizeof(u64),
sc->id.offset + sizeof(u64),
evsel->needs_swap))
return -1;
evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
evsel__set_syscall_arg_fmt(evsel,
tp_format->name + sizeof("sys_exit_") - 1);
}
}

View File

@ -2,10 +2,6 @@
#ifndef BUILTIN_H
#define BUILTIN_H
#include <stddef.h>
#include <linux/compiler.h>
#include <tools/config.h>
struct feature_status {
const char *name;
const char *macro;
@ -56,6 +52,4 @@ int cmd_ftrace(int argc, const char **argv);
int cmd_daemon(int argc, const char **argv);
int cmd_kwork(int argc, const char **argv);
int find_scripts(char **scripts_array, char **scripts_path_array, int num,
int pathlen);
#endif

View File

@ -3,7 +3,7 @@
#define _PERF_PERF_H
#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 2048
#define MAX_NR_CPUS 4096
#endif
enum perf_affinity {

View File

@ -5,7 +5,7 @@
},
{
"ArchStdEvent": "EXC_RETURN",
"PublicDescription": "Counts any architecturally executed exception return instructions. Eg: AArch64: ERET"
"PublicDescription": "Counts any architecturally executed exception return instructions. For example: AArch64: ERET"
},
{
"ArchStdEvent": "EXC_UNDEF",

View File

@ -5,6 +5,6 @@
},
{
"ArchStdEvent": "CNT_CYCLES",
"PublicDescription": "Counts constant frequency cycles"
"PublicDescription": "Increments at a constant frequency equal to the rate of increment of the System Counter, CNTPCT_EL0."
}
]

View File

@ -1,11 +1,11 @@
[
{
"ArchStdEvent": "L1D_CACHE_REFILL",
"PublicDescription": "Counts level 1 data cache refills caused by speculatively executed load or store operations that missed in the level 1 data cache. This event only counts one event per cache line. This event does not count cache line allocations from preload instructions or from hardware cache prefetching."
"PublicDescription": "Counts level 1 data cache refills caused by speculatively executed load or store operations that missed in the level 1 data cache. This event only counts one event per cache line."
},
{
"ArchStdEvent": "L1D_CACHE",
"PublicDescription": "Counts level 1 data cache accesses from any load/store operations. Atomic operations that resolve in the CPUs caches (near atomic operations) count as both a write access and read access. Each access to a cache line is counted including the multiple accesses caused by single instructions such as LDM or STM. Each access to other level 1 data or unified memory structures, for example refill buffers, write buffers, and write-back buffers, are also counted."
"PublicDescription": "Counts level 1 data cache accesses from any load/store operations. Atomic operations that resolve in the CPUs caches (near atomic operations) counts as both a write access and read access. Each access to a cache line is counted including the multiple accesses caused by single instructions such as LDM or STM. Each access to other level 1 data or unified memory structures, for example refill buffers, write buffers, and write-back buffers, are also counted."
},
{
"ArchStdEvent": "L1D_CACHE_WB",
@ -17,7 +17,7 @@
},
{
"ArchStdEvent": "L1D_CACHE_RD",
"PublicDescription": "Counts level 1 data cache accesses from any load operation. Atomic load operations that resolve in the CPUs caches count as both a write access and read access."
"PublicDescription": "Counts level 1 data cache accesses from any load operation. Atomic load operations that resolve in the CPUs caches counts as both a write access and read access."
},
{
"ArchStdEvent": "L1D_CACHE_WR",

View File

@ -1,11 +1,11 @@
[
{
"ArchStdEvent": "L2D_CACHE",
"PublicDescription": "Counts level 2 cache accesses. level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the first level caches or translation resolutions due to accesses. This event also counts write back of dirty data from level 1 data cache to the L2 cache."
"PublicDescription": "Counts accesses to the level 2 cache due to data accesses. Level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the first level data cache or translation resolutions due to accesses. This event also counts write back of dirty data from level 1 data cache to the L2 cache."
},
{
"ArchStdEvent": "L2D_CACHE_REFILL",
"PublicDescription": "Counts cache line refills into the level 2 cache. level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the level 1 caches or translation resolutions due to accesses."
"PublicDescription": "Counts cache line refills into the level 2 cache. Level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the level 1 data cache or translation resolutions due to accesses."
},
{
"ArchStdEvent": "L2D_CACHE_WB",
@ -13,23 +13,23 @@
},
{
"ArchStdEvent": "L2D_CACHE_ALLOCATE",
"PublicDescription": "TBD"
"PublicDescription": "Counts level 2 cache line allocates that do not fetch data from outside the level 2 data or unified cache."
},
{
"ArchStdEvent": "L2D_CACHE_RD",
"PublicDescription": "Counts level 2 cache accesses due to memory read operations. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
"PublicDescription": "Counts level 2 data cache accesses due to memory read operations. Level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 data cache or translation resolutions due to accesses."
},
{
"ArchStdEvent": "L2D_CACHE_WR",
"PublicDescription": "Counts level 2 cache accesses due to memory write operations. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
"PublicDescription": "Counts level 2 cache accesses due to memory write operations. Level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 data cache or translation resolutions due to accesses."
},
{
"ArchStdEvent": "L2D_CACHE_REFILL_RD",
"PublicDescription": "Counts refills for memory accesses due to memory read operation counted by L2D_CACHE_RD. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
"PublicDescription": "Counts refills for memory accesses due to memory read operation counted by L2D_CACHE_RD. Level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 data cache or translation resolutions due to accesses."
},
{
"ArchStdEvent": "L2D_CACHE_REFILL_WR",
"PublicDescription": "Counts refills for memory accesses due to memory write operation counted by L2D_CACHE_WR. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
"PublicDescription": "Counts refills for memory accesses due to memory write operation counted by L2D_CACHE_WR. Level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 data cache or translation resolutions due to accesses."
},
{
"ArchStdEvent": "L2D_CACHE_WB_VICTIM",

View File

@ -9,11 +9,11 @@
},
{
"ArchStdEvent": "L3D_CACHE",
"PublicDescription": "Counts level 3 cache accesses. level 3 cache is a unified cache for data and instruction accesses. Accesses are for misses in the lower level caches or translation resolutions due to accesses."
"PublicDescription": "Counts level 3 cache accesses. Level 3 cache is a unified cache for data and instruction accesses. Accesses are for misses in the lower level caches or translation resolutions due to accesses."
},
{
"ArchStdEvent": "L3D_CACHE_RD",
"PublicDescription": "TBD"
"PublicDescription": "Counts level 3 cache accesses caused by any memory read operation. Level 3 cache is a unified cache for data and instruction accesses. Accesses are for misses in the lower level caches or translation resolutions due to accesses."
},
{
"ArchStdEvent": "L3D_CACHE_LMISS_RD",

View File

@ -1,10 +1,10 @@
[
{
"ArchStdEvent": "LL_CACHE_RD",
"PublicDescription": "Counts read transactions that were returned from outside the core cluster. This event counts when the system register CPUECTLR.EXTLLC bit is set. This event counts read transactions returned from outside the core if those transactions are either hit in the system level cache or missed in the SLC and are returned from any other external sources."
"PublicDescription": "Counts read transactions that were returned from outside the core cluster. This event counts for external last level cache when the system register CPUECTLR.EXTLLC bit is set, otherwise it counts for the L3 cache. This event counts read transactions returned from outside the core if those transactions are either hit in the system level cache or missed in the SLC and are returned from any other external sources."
},
{
"ArchStdEvent": "LL_CACHE_MISS_RD",
"PublicDescription": "Counts read transactions that were returned from outside the core cluster but missed in the system level cache. This event counts when the system register CPUECTLR.EXTLLC bit is set. This event counts read transactions returned from outside the core if those transactions are missed in the System level Cache. The data source of the transaction is indicated by a field in the CHI transaction returning to the CPU. This event does not count reads caused by cache maintenance operations."
"PublicDescription": "Counts read transactions that were returned from outside the core cluster but missed in the system level cache. This event counts for external last level cache when the system register CPUECTLR.EXTLLC bit is set, otherwise it counts for L3 cache. This event counts read transactions returned from outside the core if those transactions are missed in the System level Cache. The data source of the transaction is indicated by a field in the CHI transaction returning to the CPU. This event does not count reads caused by cache maintenance operations."
}
]

View File

@ -33,7 +33,7 @@
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED",
"PublicDescription": "Counts the number of memory read and write accesses in a cycle that are tag checked by the Memory Tagging Extension (MTE)."
"PublicDescription": "Counts the number of memory read and write accesses counted by MEM_ACCESS that are tag checked by the Memory Tagging Extension (MTE). This event is implemented as the sum of MEM_ACCESS_CHECKED_RD and MEM_ACCESS_CHECKED_WR"
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED_RD",

View File

@ -5,7 +5,7 @@
},
{
"MetricName": "backend_stalled_cycles",
"MetricExpr": "((STALL_BACKEND / CPU_CYCLES) * 100)",
"MetricExpr": "STALL_BACKEND / CPU_CYCLES * 100",
"BriefDescription": "This metric is the percentage of cycles that were stalled due to resource constraints in the backend unit of the processor.",
"MetricGroup": "Cycle_Accounting",
"ScaleUnit": "1percent of cycles"
@ -16,45 +16,45 @@
},
{
"MetricName": "branch_misprediction_ratio",
"MetricExpr": "(BR_MIS_PRED_RETIRED / BR_RETIRED)",
"MetricExpr": "BR_MIS_PRED_RETIRED / BR_RETIRED",
"BriefDescription": "This metric measures the ratio of branches mispredicted to the total number of branches architecturally executed. This gives an indication of the effectiveness of the branch prediction unit.",
"MetricGroup": "Miss_Ratio;Branch_Effectiveness",
"ScaleUnit": "1per branch"
"ScaleUnit": "100percent of branches"
},
{
"MetricName": "branch_mpki",
"MetricExpr": "((BR_MIS_PRED_RETIRED / INST_RETIRED) * 1000)",
"MetricExpr": "BR_MIS_PRED_RETIRED / INST_RETIRED * 1000",
"BriefDescription": "This metric measures the number of branch mispredictions per thousand instructions executed.",
"MetricGroup": "MPKI;Branch_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
"MetricName": "branch_percentage",
"MetricExpr": "(((BR_IMMED_SPEC + BR_INDIRECT_SPEC) / INST_SPEC) * 100)",
"MetricExpr": "(BR_IMMED_SPEC + BR_INDIRECT_SPEC) / INST_SPEC * 100",
"BriefDescription": "This metric measures branch operations as a percentage of operations speculatively executed.",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
},
{
"MetricName": "crypto_percentage",
"MetricExpr": "((CRYPTO_SPEC / INST_SPEC) * 100)",
"MetricExpr": "CRYPTO_SPEC / INST_SPEC * 100",
"BriefDescription": "This metric measures crypto operations as a percentage of operations speculatively executed.",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
},
{
"MetricName": "dtlb_mpki",
"MetricExpr": "((DTLB_WALK / INST_RETIRED) * 1000)",
"MetricExpr": "DTLB_WALK / INST_RETIRED * 1000",
"BriefDescription": "This metric measures the number of data TLB Walks per thousand instructions executed.",
"MetricGroup": "MPKI;DTLB_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
"MetricName": "dtlb_walk_ratio",
"MetricExpr": "(DTLB_WALK / L1D_TLB)",
"MetricExpr": "DTLB_WALK / L1D_TLB",
"BriefDescription": "This metric measures the ratio of data TLB Walks to the total number of data TLB accesses. This gives an indication of the effectiveness of the data TLB accesses.",
"MetricGroup": "Miss_Ratio;DTLB_Effectiveness",
"ScaleUnit": "1per TLB access"
"ScaleUnit": "100percent of TLB accesses"
},
{
"ArchStdEvent": "frontend_bound",
@ -62,147 +62,147 @@
},
{
"MetricName": "frontend_stalled_cycles",
"MetricExpr": "((STALL_FRONTEND / CPU_CYCLES) * 100)",
"MetricExpr": "STALL_FRONTEND / CPU_CYCLES * 100",
"BriefDescription": "This metric is the percentage of cycles that were stalled due to resource constraints in the frontend unit of the processor.",
"MetricGroup": "Cycle_Accounting",
"ScaleUnit": "1percent of cycles"
},
{
"MetricName": "integer_dp_percentage",
"MetricExpr": "((DP_SPEC / INST_SPEC) * 100)",
"MetricExpr": "DP_SPEC / INST_SPEC * 100",
"BriefDescription": "This metric measures scalar integer operations as a percentage of operations speculatively executed.",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
},
{
"MetricName": "ipc",
"MetricExpr": "(INST_RETIRED / CPU_CYCLES)",
"MetricExpr": "INST_RETIRED / CPU_CYCLES",
"BriefDescription": "This metric measures the number of instructions retired per cycle.",
"MetricGroup": "General",
"ScaleUnit": "1per cycle"
},
{
"MetricName": "itlb_mpki",
"MetricExpr": "((ITLB_WALK / INST_RETIRED) * 1000)",
"MetricExpr": "ITLB_WALK / INST_RETIRED * 1000",
"BriefDescription": "This metric measures the number of instruction TLB Walks per thousand instructions executed.",
"MetricGroup": "MPKI;ITLB_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
"MetricName": "itlb_walk_ratio",
"MetricExpr": "(ITLB_WALK / L1I_TLB)",
"MetricExpr": "ITLB_WALK / L1I_TLB",
"BriefDescription": "This metric measures the ratio of instruction TLB Walks to the total number of instruction TLB accesses. This gives an indication of the effectiveness of the instruction TLB accesses.",
"MetricGroup": "Miss_Ratio;ITLB_Effectiveness",
"ScaleUnit": "1per TLB access"
"ScaleUnit": "100percent of TLB accesses"
},
{
"MetricName": "l1d_cache_miss_ratio",
"MetricExpr": "(L1D_CACHE_REFILL / L1D_CACHE)",
"MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE",
"BriefDescription": "This metric measures the ratio of level 1 data cache accesses missed to the total number of level 1 data cache accesses. This gives an indication of the effectiveness of the level 1 data cache.",
"MetricGroup": "Miss_Ratio;L1D_Cache_Effectiveness",
"ScaleUnit": "1per cache access"
"ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l1d_cache_mpki",
"MetricExpr": "((L1D_CACHE_REFILL / INST_RETIRED) * 1000)",
"MetricExpr": "L1D_CACHE_REFILL / INST_RETIRED * 1000",
"BriefDescription": "This metric measures the number of level 1 data cache accesses missed per thousand instructions executed.",
"MetricGroup": "MPKI;L1D_Cache_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
"MetricName": "l1d_tlb_miss_ratio",
"MetricExpr": "(L1D_TLB_REFILL / L1D_TLB)",
"MetricExpr": "L1D_TLB_REFILL / L1D_TLB",
"BriefDescription": "This metric measures the ratio of level 1 data TLB accesses missed to the total number of level 1 data TLB accesses. This gives an indication of the effectiveness of the level 1 data TLB.",
"MetricGroup": "Miss_Ratio;DTLB_Effectiveness",
"ScaleUnit": "1per TLB access"
"ScaleUnit": "100percent of TLB accesses"
},
{
"MetricName": "l1d_tlb_mpki",
"MetricExpr": "((L1D_TLB_REFILL / INST_RETIRED) * 1000)",
"BriefDescription": "This metric measures the number of level 1 instruction TLB accesses missed per thousand instructions executed.",
"MetricExpr": "L1D_TLB_REFILL / INST_RETIRED * 1000",
"BriefDescription": "This metric measures the number of level 1 data TLB accesses missed per thousand instructions executed.",
"MetricGroup": "MPKI;DTLB_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
"MetricName": "l1i_cache_miss_ratio",
"MetricExpr": "(L1I_CACHE_REFILL / L1I_CACHE)",
"MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE",
"BriefDescription": "This metric measures the ratio of level 1 instruction cache accesses missed to the total number of level 1 instruction cache accesses. This gives an indication of the effectiveness of the level 1 instruction cache.",
"MetricGroup": "Miss_Ratio;L1I_Cache_Effectiveness",
"ScaleUnit": "1per cache access"
"ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l1i_cache_mpki",
"MetricExpr": "((L1I_CACHE_REFILL / INST_RETIRED) * 1000)",
"MetricExpr": "L1I_CACHE_REFILL / INST_RETIRED * 1000",
"BriefDescription": "This metric measures the number of level 1 instruction cache accesses missed per thousand instructions executed.",
"MetricGroup": "MPKI;L1I_Cache_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
"MetricName": "l1i_tlb_miss_ratio",
"MetricExpr": "(L1I_TLB_REFILL / L1I_TLB)",
"MetricExpr": "L1I_TLB_REFILL / L1I_TLB",
"BriefDescription": "This metric measures the ratio of level 1 instruction TLB accesses missed to the total number of level 1 instruction TLB accesses. This gives an indication of the effectiveness of the level 1 instruction TLB.",
"MetricGroup": "Miss_Ratio;ITLB_Effectiveness",
"ScaleUnit": "1per TLB access"
"ScaleUnit": "100percent of TLB accesses"
},
{
"MetricName": "l1i_tlb_mpki",
"MetricExpr": "((L1I_TLB_REFILL / INST_RETIRED) * 1000)",
"MetricExpr": "L1I_TLB_REFILL / INST_RETIRED * 1000",
"BriefDescription": "This metric measures the number of level 1 instruction TLB accesses missed per thousand instructions executed.",
"MetricGroup": "MPKI;ITLB_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
"MetricName": "l2_cache_miss_ratio",
"MetricExpr": "(L2D_CACHE_REFILL / L2D_CACHE)",
"MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE",
"BriefDescription": "This metric measures the ratio of level 2 cache accesses missed to the total number of level 2 cache accesses. This gives an indication of the effectiveness of the level 2 cache, which is a unified cache that stores both data and instruction. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.",
"MetricGroup": "Miss_Ratio;L2_Cache_Effectiveness",
"ScaleUnit": "1per cache access"
"ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l2_cache_mpki",
"MetricExpr": "((L2D_CACHE_REFILL / INST_RETIRED) * 1000)",
"MetricExpr": "L2D_CACHE_REFILL / INST_RETIRED * 1000",
"BriefDescription": "This metric measures the number of level 2 unified cache accesses missed per thousand instructions executed. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.",
"MetricGroup": "MPKI;L2_Cache_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
"MetricName": "l2_tlb_miss_ratio",
"MetricExpr": "(L2D_TLB_REFILL / L2D_TLB)",
"MetricExpr": "L2D_TLB_REFILL / L2D_TLB",
"BriefDescription": "This metric measures the ratio of level 2 unified TLB accesses missed to the total number of level 2 unified TLB accesses. This gives an indication of the effectiveness of the level 2 TLB.",
"MetricGroup": "Miss_Ratio;ITLB_Effectiveness;DTLB_Effectiveness",
"ScaleUnit": "1per TLB access"
"ScaleUnit": "100percent of TLB accesses"
},
{
"MetricName": "l2_tlb_mpki",
"MetricExpr": "((L2D_TLB_REFILL / INST_RETIRED) * 1000)",
"MetricExpr": "L2D_TLB_REFILL / INST_RETIRED * 1000",
"BriefDescription": "This metric measures the number of level 2 unified TLB accesses missed per thousand instructions executed.",
"MetricGroup": "MPKI;ITLB_Effectiveness;DTLB_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
"MetricName": "ll_cache_read_hit_ratio",
"MetricExpr": "((LL_CACHE_RD - LL_CACHE_MISS_RD) / LL_CACHE_RD)",
"MetricExpr": "(LL_CACHE_RD - LL_CACHE_MISS_RD) / LL_CACHE_RD",
"BriefDescription": "This metric measures the ratio of last level cache read accesses hit in the cache to the total number of last level cache accesses. This gives an indication of the effectiveness of the last level cache for read traffic. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a system level cache.",
"MetricGroup": "LL_Cache_Effectiveness",
"ScaleUnit": "1per cache access"
"ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "ll_cache_read_miss_ratio",
"MetricExpr": "(LL_CACHE_MISS_RD / LL_CACHE_RD)",
"MetricExpr": "LL_CACHE_MISS_RD / LL_CACHE_RD",
"BriefDescription": "This metric measures the ratio of last level cache read accesses missed to the total number of last level cache accesses. This gives an indication of the effectiveness of the last level cache for read traffic. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a system level cache.",
"MetricGroup": "Miss_Ratio;LL_Cache_Effectiveness",
"ScaleUnit": "1per cache access"
"ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "ll_cache_read_mpki",
"MetricExpr": "((LL_CACHE_MISS_RD / INST_RETIRED) * 1000)",
"MetricExpr": "LL_CACHE_MISS_RD / INST_RETIRED * 1000",
"BriefDescription": "This metric measures the number of last level cache read accesses missed per thousand instructions executed.",
"MetricGroup": "MPKI;LL_Cache_Effectiveness",
"ScaleUnit": "1MPKI"
},
{
"MetricName": "load_percentage",
"MetricExpr": "((LD_SPEC / INST_SPEC) * 100)",
"MetricExpr": "LD_SPEC / INST_SPEC * 100",
"BriefDescription": "This metric measures load operations as a percentage of operations speculatively executed.",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
@ -213,21 +213,21 @@
},
{
"MetricName": "scalar_fp_percentage",
"MetricExpr": "((VFP_SPEC / INST_SPEC) * 100)",
"MetricExpr": "VFP_SPEC / INST_SPEC * 100",
"BriefDescription": "This metric measures scalar floating point operations as a percentage of operations speculatively executed.",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
},
{
"MetricName": "simd_percentage",
"MetricExpr": "((ASE_SPEC / INST_SPEC) * 100)",
"MetricExpr": "ASE_SPEC / INST_SPEC * 100",
"BriefDescription": "This metric measures advanced SIMD operations as a percentage of total operations speculatively executed.",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
},
{
"MetricName": "store_percentage",
"MetricExpr": "((ST_SPEC / INST_SPEC) * 100)",
"MetricExpr": "ST_SPEC / INST_SPEC * 100",
"BriefDescription": "This metric measures store operations as a percentage of operations speculatively executed.",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
@ -300,5 +300,12 @@
"MetricGroup": "Operation_Mix",
"MetricName": "branch_indirect_spec_rate",
"ScaleUnit": "100%"
},
{
"MetricName": "sve_all_percentage",
"MetricExpr": "SVE_INST_SPEC / INST_SPEC * 100",
"BriefDescription": "This metric measures scalable vector operations, including loads and stores, as a percentage of operations speculatively executed.",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
}
]

View File

@ -9,7 +9,7 @@
},
{
"ArchStdEvent": "CID_WRITE_RETIRED",
"PublicDescription": "Counts architecturally executed writes to the CONTEXTIDR register, which usually contain the kernel PID and can be output with hardware trace."
"PublicDescription": "Counts architecturally executed writes to the CONTEXTIDR_EL1 register, which usually contain the kernel PID and can be output with hardware trace."
},
{
"ArchStdEvent": "TTBR_WRITE_RETIRED",
@ -17,7 +17,7 @@
},
{
"ArchStdEvent": "BR_RETIRED",
"PublicDescription": "Counts architecturally executed branches, whether the branch is taken or not. Instructions that explicitly write to the PC are also counted."
"PublicDescription": "Counts architecturally executed branches, whether the branch is taken or not. Instructions that explicitly write to the PC are also counted. Note that exception generating instructions, exception return instructions and context synchronization instructions are not counted."
},
{
"ArchStdEvent": "BR_MIS_PRED_RETIRED",

View File

@ -5,7 +5,7 @@
},
{
"ArchStdEvent": "BR_PRED",
"PublicDescription": "Counts branches speculatively executed and were predicted right."
"PublicDescription": "Counts all speculatively executed branches."
},
{
"ArchStdEvent": "INST_SPEC",
@ -29,7 +29,7 @@
},
{
"ArchStdEvent": "LDREX_SPEC",
"PublicDescription": "Counts Load-Exclusive operations that have been speculatively executed. Eg: LDREX, LDX"
"PublicDescription": "Counts Load-Exclusive operations that have been speculatively executed. For example: LDREX, LDX"
},
{
"ArchStdEvent": "STREX_PASS_SPEC",
@ -73,15 +73,15 @@
},
{
"ArchStdEvent": "BR_IMMED_SPEC",
"PublicDescription": "Counts immediate branch operations which are speculatively executed."
"PublicDescription": "Counts direct branch operations which are speculatively executed."
},
{
"ArchStdEvent": "BR_RETURN_SPEC",
"PublicDescription": "Counts procedure return operations (RET) which are speculatively executed."
"PublicDescription": "Counts procedure return operations (RET, RETAA and RETAB) which are speculatively executed."
},
{
"ArchStdEvent": "BR_INDIRECT_SPEC",
"PublicDescription": "Counts indirect branch operations including procedure returns, which are speculatively executed. This includes operations that force a software change of the PC, other than exception-generating operations. Eg: BR Xn, RET"
"PublicDescription": "Counts indirect branch operations including procedure returns, which are speculatively executed. This includes operations that force a software change of the PC, other than exception-generating operations and direct branch instructions. Some examples of the instructions counted by this event include BR Xn, RET, etc..."
},
{
"ArchStdEvent": "ISB_SPEC",
@ -97,11 +97,11 @@
},
{
"ArchStdEvent": "RC_LD_SPEC",
"PublicDescription": "Counts any load acquire operations that are speculatively executed. Eg: LDAR, LDARH, LDARB"
"PublicDescription": "Counts any load acquire operations that are speculatively executed. For example: LDAR, LDARH, LDARB"
},
{
"ArchStdEvent": "RC_ST_SPEC",
"PublicDescription": "Counts any store release operations that are speculatively executed. Eg: STLR, STLRH, STLRB'"
"PublicDescription": "Counts any store release operations that are speculatively executed. For example: STLR, STLRH, STLRB"
},
{
"ArchStdEvent": "ASE_INST_SPEC",

View File

@ -1,7 +1,7 @@
[
{
"ArchStdEvent": "STALL_FRONTEND",
"PublicDescription": "Counts cycles when frontend could not send any micro-operations to the rename stage because of frontend resource stalls caused by fetch memory latency or branch prediction flow stalls. All the frontend slots were empty during the cycle when this event counts."
"PublicDescription": "Counts cycles when frontend could not send any micro-operations to the rename stage because of frontend resource stalls caused by fetch memory latency or branch prediction flow stalls. STALL_FRONTEND_SLOTS counts SLOTS during the cycle when this event counts."
},
{
"ArchStdEvent": "STALL_BACKEND",
@ -9,11 +9,11 @@
},
{
"ArchStdEvent": "STALL",
"PublicDescription": "Counts cycles when no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall)."
"PublicDescription": "Counts cycles when no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall). This event is the sum of STALL_FRONTEND and STALL_BACKEND"
},
{
"ArchStdEvent": "STALL_SLOT_BACKEND",
"PublicDescription": "Counts slots per cycle in which no operations are sent from the rename unit to the backend due to backend resource constraints."
"PublicDescription": "Counts slots per cycle in which no operations are sent from the rename unit to the backend due to backend resource constraints. STALL_BACKEND counts during the cycle when STALL_SLOT_BACKEND counts at least 1."
},
{
"ArchStdEvent": "STALL_SLOT_FRONTEND",
@ -21,7 +21,7 @@
},
{
"ArchStdEvent": "STALL_SLOT",
"PublicDescription": "Counts slots per cycle in which no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall)."
"PublicDescription": "Counts slots per cycle in which no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall). STALL_SLOT is the sum of STALL_SLOT_FRONTEND and STALL_SLOT_BACKEND."
},
{
"ArchStdEvent": "STALL_BACKEND_MEM",

View File

@ -25,11 +25,11 @@
},
{
"ArchStdEvent": "DTLB_WALK",
"PublicDescription": "Counts data memory translation table walks caused by a miss in the L2 TLB driven by a memory access. Note that partial translations that also cause a table walk are counted. This event does not count table walks caused by TLB maintenance operations."
"PublicDescription": "Counts number of demand data translation table walks caused by a miss in the L2 TLB and performing at least one memory access. Translation table walks are counted even if the translation ended up taking a translation fault for reasons different than EPD, E0PD and NFD. Note that partial translations that cause a translation table walk are also counted. Also note that this event counts walks triggered by software preloads, but not walks triggered by hardware prefetchers, and that this event does not count walks triggered by TLB maintenance operations."
},
{
"ArchStdEvent": "ITLB_WALK",
"PublicDescription": "Counts instruction memory translation table walks caused by a miss in the L2 TLB driven by a memory access. Partial translations that also cause a table walk are counted. This event does not count table walks caused by TLB maintenance operations."
"PublicDescription": "Counts number of instruction translation table walks caused by a miss in the L2 TLB and performing at least one memory access. Translation table walks are counted even if the translation ended up taking a translation fault for reasons different than EPD, E0PD and NFD. Note that partial translations that cause a translation table walk are also counted. Also note that this event does not count walks triggered by TLB maintenance operations."
},
{
"ArchStdEvent": "L1D_TLB_REFILL_RD",

View File

@ -430,8 +430,11 @@ class JsonEvent:
def to_c_string(self, metric: bool) -> str:
"""Representation of the event as a C struct initializer."""
def fix_comment(s: str) -> str:
return s.replace('*/', r'\*\/')
s = self.build_c_string(metric)
return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'
return f'{{ { _bcs.offsets[s] } }}, /* {fix_comment(s)} */\n'
@lru_cache(maxsize=None)

View File

@ -24,16 +24,6 @@
#include "../../../util/srcline.h"
#include "../../../util/srccode.h"
#if PY_MAJOR_VERSION < 3
#define _PyCapsule_GetPointer(arg1, arg2) \
PyCObject_AsVoidPtr(arg1)
#define _PyBytes_FromStringAndSize(arg1, arg2) \
PyString_FromStringAndSize((arg1), (arg2))
#define _PyUnicode_AsUTF8(arg) \
PyString_AsString(arg)
PyMODINIT_FUNC initperf_trace_context(void);
#else
#define _PyCapsule_GetPointer(arg1, arg2) \
PyCapsule_GetPointer((arg1), (arg2))
#define _PyBytes_FromStringAndSize(arg1, arg2) \
@ -42,7 +32,6 @@ PyMODINIT_FUNC initperf_trace_context(void);
PyUnicode_AsUTF8(arg)
PyMODINIT_FUNC PyInit_perf_trace_context(void);
#endif
static struct scripting_context *get_args(PyObject *args, const char *name, PyObject **arg2)
{
@ -104,7 +93,7 @@ static PyObject *perf_sample_insn(PyObject *obj, PyObject *args)
if (c->sample->ip && !c->sample->insn_len && thread__maps(c->al->thread)) {
struct machine *machine = maps__machine(thread__maps(c->al->thread));
script_fetch_insn(c->sample, c->al->thread, machine);
script_fetch_insn(c->sample, c->al->thread, machine, /*native_arch=*/true);
}
if (!c->sample->insn_len)
Py_RETURN_NONE; /* N.B. This is a return statement */
@ -213,12 +202,6 @@ static PyMethodDef ContextMethods[] = {
{ NULL, NULL, 0, NULL}
};
#if PY_MAJOR_VERSION < 3
PyMODINIT_FUNC initperf_trace_context(void)
{
(void) Py_InitModule("perf_trace_context", ContextMethods);
}
#else
PyMODINIT_FUNC PyInit_perf_trace_context(void)
{
static struct PyModuleDef moduledef = {
@ -240,4 +223,3 @@ PyMODINIT_FUNC PyInit_perf_trace_context(void)
return mod;
}
#endif

View File

@ -3,98 +3,125 @@
#
# Copyright (c) 2018, Intel Corporation.
from __future__ import division
from __future__ import print_function
import os
import sys
import struct
import re
import bisect
import collections
from dataclasses import dataclass
from typing import (Dict, Optional)
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
#physical address ranges for System RAM
system_ram = []
#physical address ranges for Persistent Memory
pmem = []
#file object for proc iomem
f = None
#Count for each type of memory
load_mem_type_cnt = collections.Counter()
#perf event name
event_name = None
@dataclass(frozen=True)
class IomemEntry:
"""Read from a line in /proc/iomem"""
begin: int
end: int
indent: int
label: str
# Physical memory layout from /proc/iomem. Key is the indent and then
# a list of ranges.
iomem: Dict[int, list[IomemEntry]] = collections.defaultdict(list)
# Child nodes from the iomem parent.
children: Dict[IomemEntry, set[IomemEntry]] = collections.defaultdict(set)
# Maximum indent seen before an entry in the iomem file.
max_indent: int = 0
# Count for each range of memory.
load_mem_type_cnt: Dict[IomemEntry, int] = collections.Counter()
# Perf event name set from the first sample in the data.
event_name: Optional[str] = None
def parse_iomem():
global f
f = open('/proc/iomem', 'r')
for i, j in enumerate(f):
m = re.split('-|:',j,2)
if m[2].strip() == 'System RAM':
system_ram.append(int(m[0], 16))
system_ram.append(int(m[1], 16))
if m[2].strip() == 'Persistent Memory':
pmem.append(int(m[0], 16))
pmem.append(int(m[1], 16))
"""Populate iomem from /proc/iomem file"""
global iomem
global max_indent
global children
with open('/proc/iomem', 'r', encoding='ascii') as f:
for line in f:
indent = 0
while line[indent] == ' ':
indent += 1
if indent > max_indent:
max_indent = indent
m = re.split('-|:', line, 2)
begin = int(m[0], 16)
end = int(m[1], 16)
label = m[2].strip()
entry = IomemEntry(begin, end, indent, label)
# Before adding entry, search for a parent node using its begin.
if indent > 0:
parent = find_memory_type(begin)
assert parent, f"Given indent expected a parent for {label}"
children[parent].add(entry)
iomem[indent].append(entry)
def find_memory_type(phys_addr) -> Optional[IomemEntry]:
"""Search iomem for the range containing phys_addr with the maximum indent"""
for i in range(max_indent, -1, -1):
if i not in iomem:
continue
position = bisect.bisect_right(iomem[i], phys_addr,
key=lambda entry: entry.begin)
if position is None:
continue
iomem_entry = iomem[i][position-1]
if iomem_entry.begin <= phys_addr <= iomem_entry.end:
return iomem_entry
print(f"Didn't find {phys_addr}")
return None
def print_memory_type():
print("Event: %s" % (event_name))
print("%-40s %10s %10s\n" % ("Memory type", "count", "percentage"), end='')
print("%-40s %10s %10s\n" % ("----------------------------------------",
"-----------", "-----------"),
end='');
total = sum(load_mem_type_cnt.values())
for mem_type, count in sorted(load_mem_type_cnt.most_common(), \
key = lambda kv: (kv[1], kv[0]), reverse = True):
print("%-40s %10d %10.1f%%\n" %
(mem_type, count, 100 * count / total),
end='')
print(f"Event: {event_name}")
print(f"{'Memory type':<40} {'count':>10} {'percentage':>10}")
print(f"{'-' * 40:<40} {'-' * 10:>10} {'-' * 10:>10}")
total = sum(load_mem_type_cnt.values())
# Add count from children into the parent.
for i in range(max_indent, -1, -1):
if i not in iomem:
continue
for entry in iomem[i]:
global children
for child in children[entry]:
if load_mem_type_cnt[child] > 0:
load_mem_type_cnt[entry] += load_mem_type_cnt[child]
def print_entries(entries):
"""Print counts from parents down to their children"""
global children
for entry in sorted(entries,
key = lambda entry: load_mem_type_cnt[entry],
reverse = True):
count = load_mem_type_cnt[entry]
if count > 0:
mem_type = ' ' * entry.indent + f"{entry.begin:x}-{entry.end:x} : {entry.label}"
percent = 100 * count / total
print(f"{mem_type:<40} {count:>10} {percent:>10.1f}")
print_entries(children[entry])
print_entries(iomem[0])
def trace_begin():
parse_iomem()
parse_iomem()
def trace_end():
print_memory_type()
f.close()
def is_system_ram(phys_addr):
#/proc/iomem is sorted
position = bisect.bisect(system_ram, phys_addr)
if position % 2 == 0:
return False
return True
def is_persistent_mem(phys_addr):
position = bisect.bisect(pmem, phys_addr)
if position % 2 == 0:
return False
return True
def find_memory_type(phys_addr):
if phys_addr == 0:
return "N/A"
if is_system_ram(phys_addr):
return "System RAM"
if is_persistent_mem(phys_addr):
return "Persistent Memory"
#slow path, search all
f.seek(0, 0)
for j in f:
m = re.split('-|:',j,2)
if int(m[0], 16) <= phys_addr <= int(m[1], 16):
return m[2]
return "N/A"
print_memory_type()
def process_event(param_dict):
name = param_dict["ev_name"]
sample = param_dict["sample"]
phys_addr = sample["phys_addr"]
if "sample" not in param_dict:
return
global event_name
if event_name == None:
event_name = name
load_mem_type_cnt[find_memory_type(phys_addr)] += 1
sample = param_dict["sample"]
if "phys_addr" not in sample:
return
phys_addr = sample["phys_addr"]
entry = find_memory_type(phys_addr)
if entry:
load_mem_type_cnt[entry] += 1
global event_name
if event_name is None:
event_name = param_dict["ev_name"]

View File

@ -5,10 +5,10 @@ perf-test-y += tests-scripts.o
perf-test-y += parse-events.o
perf-test-y += dso-data.o
perf-test-y += vmlinux-kallsyms.o
perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall.o
perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall-all-cpus.o
perf-test-y += openat-syscall.o
perf-test-y += openat-syscall-all-cpus.o
perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall-tp-fields.o
perf-test-$(CONFIG_LIBTRACEEVENT) += mmap-basic.o
perf-test-y += mmap-basic.o
perf-test-y += perf-record.o
perf-test-y += evsel-roundtrip-name.o
perf-test-$(CONFIG_LIBTRACEEVENT) += evsel-tp-sched.o

View File

@ -60,11 +60,9 @@ static struct test_suite *arch_tests[] = {
static struct test_suite *generic_tests[] = {
&suite__vmlinux_matches_kallsyms,
#ifdef HAVE_LIBTRACEEVENT
&suite__openat_syscall_event,
&suite__openat_syscall_event_on_all_cpus,
&suite__basic_mmap,
#endif
&suite__mem,
&suite__parse_events,
&suite__expr,

View File

@ -156,21 +156,54 @@ static int test__cpu_map_print(struct test_suite *test __maybe_unused, int subte
return 0;
}
static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
static int __test__cpu_map_merge(const char *lhs, const char *rhs, int nr, const char *expected)
{
struct perf_cpu_map *a = perf_cpu_map__new("4,2,1");
struct perf_cpu_map *b = perf_cpu_map__new("4,5,7");
struct perf_cpu_map *c = perf_cpu_map__merge(a, b);
struct perf_cpu_map *a = perf_cpu_map__new(lhs);
struct perf_cpu_map *b = perf_cpu_map__new(rhs);
char buf[100];
TEST_ASSERT_VAL("failed to merge map: bad nr", perf_cpu_map__nr(c) == 5);
cpu_map__snprint(c, buf, sizeof(buf));
TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7"));
perf_cpu_map__merge(&a, b);
TEST_ASSERT_VAL("failed to merge map: bad nr", perf_cpu_map__nr(a) == nr);
cpu_map__snprint(a, buf, sizeof(buf));
TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, expected));
perf_cpu_map__put(b);
perf_cpu_map__put(c);
/*
* If 'b' is a superset of 'a', 'a' points to the same map with the
* map 'b'. In this case, the owner 'b' has released the resource above
* but 'a' still keeps the ownership, the reference counter should be 1.
*/
TEST_ASSERT_VAL("unexpected refcnt: bad result",
refcount_read(perf_cpu_map__refcnt(a)) == 1);
perf_cpu_map__put(a);
return 0;
}
static int test__cpu_map_merge(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
{
int ret;
ret = __test__cpu_map_merge("4,2,1", "4,5,7", 5, "1-2,4-5,7");
if (ret)
return ret;
ret = __test__cpu_map_merge("1-8", "6-9", 9, "1-9");
if (ret)
return ret;
ret = __test__cpu_map_merge("1-8,12-20", "6-9,15", 18, "1-9,12-20");
if (ret)
return ret;
ret = __test__cpu_map_merge("4,2,1", "1", 3, "1-2,4");
if (ret)
return ret;
ret = __test__cpu_map_merge("1", "4,2,1", 3, "1-2,4");
if (ret)
return ret;
ret = __test__cpu_map_merge("1", "1", 1, "1");
return ret;
}
static int __test__cpu_map_intersect(const char *lhs, const char *rhs, int nr, const char *expected)
{
struct perf_cpu_map *a = perf_cpu_map__new(lhs);
@ -233,9 +266,8 @@ static int test__cpu_map_equal(struct test_suite *test __maybe_unused, int subte
}
/* Maps equal made maps. */
tmp = perf_cpu_map__merge(perf_cpu_map__get(one), two);
TEST_ASSERT_VAL("pair", perf_cpu_map__equal(pair, tmp));
perf_cpu_map__put(tmp);
perf_cpu_map__merge(&two, one);
TEST_ASSERT_VAL("pair", perf_cpu_map__equal(pair, two));
tmp = perf_cpu_map__intersect(pair, one);
TEST_ASSERT_VAL("one", perf_cpu_map__equal(one, tmp));

View File

@ -54,8 +54,6 @@ static bool test_perf_config(const struct perf_evsel *evsel, __u64 expected_conf
return (evsel->attr.config & PERF_HW_EVENT_MASK) == expected_config;
}
#ifdef HAVE_LIBTRACEEVENT
#if defined(__s390x__)
/* Return true if kvm module is available and loaded. Test this
* and return success when trace point kvm_s390_create_vm
@ -112,7 +110,6 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist)
}
return TEST_OK;
}
#endif /* HAVE_LIBTRACEEVENT */
static int test__checkevent_raw(struct evlist *evlist)
{
@ -311,7 +308,6 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist)
return TEST_OK;
}
#ifdef HAVE_LIBTRACEEVENT
static int test__checkevent_tracepoint_modifier(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
@ -340,7 +336,6 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist)
return test__checkevent_tracepoint_multi(evlist);
}
#endif /* HAVE_LIBTRACEEVENT */
static int test__checkevent_raw_modifier(struct evlist *evlist)
{
@ -629,7 +624,6 @@ static int test__checkevent_pmu(struct evlist *evlist)
return TEST_OK;
}
#ifdef HAVE_LIBTRACEEVENT
static int test__checkevent_list(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
@ -671,7 +665,6 @@ static int test__checkevent_list(struct evlist *evlist)
return TEST_OK;
}
#endif
static int test__checkevent_pmu_name(struct evlist *evlist)
{
@ -971,7 +964,6 @@ static int test__group2(struct evlist *evlist)
return TEST_OK;
}
#ifdef HAVE_LIBTRACEEVENT
static int test__group3(struct evlist *evlist __maybe_unused)
{
struct evsel *evsel, *group1_leader = NULL, *group2_leader = NULL;
@ -1078,7 +1070,6 @@ static int test__group3(struct evlist *evlist __maybe_unused)
}
return TEST_OK;
}
#endif
static int test__group4(struct evlist *evlist __maybe_unused)
{
@ -1813,7 +1804,6 @@ static int test__term_equal_legacy(struct evlist *evlist)
return TEST_OK;
}
#ifdef HAVE_LIBTRACEEVENT
static int count_tracepoints(void)
{
struct dirent *events_ent;
@ -1867,7 +1857,6 @@ static int test__all_tracepoints(struct evlist *evlist)
return test__checkevent_tracepoint_multi(evlist);
}
#endif /* HAVE_LIBTRACEVENT */
struct evlist_test {
const char *name;
@ -1876,7 +1865,6 @@ struct evlist_test {
};
static const struct evlist_test test__events[] = {
#ifdef HAVE_LIBTRACEEVENT
{
.name = "syscalls:sys_enter_openat",
.check = test__checkevent_tracepoint,
@ -1887,7 +1875,6 @@ static const struct evlist_test test__events[] = {
.check = test__checkevent_tracepoint_multi,
/* 1 */
},
#endif
{
.name = "r1a",
.check = test__checkevent_raw,
@ -1938,7 +1925,6 @@ static const struct evlist_test test__events[] = {
.check = test__checkevent_breakpoint_w,
/* 1 */
},
#ifdef HAVE_LIBTRACEEVENT
{
.name = "syscalls:sys_enter_openat:k",
.check = test__checkevent_tracepoint_modifier,
@ -1949,7 +1935,6 @@ static const struct evlist_test test__events[] = {
.check = test__checkevent_tracepoint_multi_modifier,
/* 3 */
},
#endif
{
.name = "r1a:kp",
.check = test__checkevent_raw_modifier,
@ -1995,13 +1980,11 @@ static const struct evlist_test test__events[] = {
.check = test__checkevent_breakpoint_w_modifier,
/* 2 */
},
#ifdef HAVE_LIBTRACEEVENT
{
.name = "r1,syscalls:sys_enter_openat:k,1:1:hp",
.check = test__checkevent_list,
/* 3 */
},
#endif
{
.name = "instructions:G",
.check = test__checkevent_exclude_host_modifier,
@ -2032,13 +2015,11 @@ static const struct evlist_test test__events[] = {
.check = test__group2,
/* 9 */
},
#ifdef HAVE_LIBTRACEEVENT
{
.name = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
.check = test__group3,
/* 0 */
},
#endif
{
.name = "{cycles:u,instructions:kp}:p",
.check = test__group4,
@ -2049,13 +2030,11 @@ static const struct evlist_test test__events[] = {
.check = test__group5,
/* 2 */
},
#ifdef HAVE_LIBTRACEEVENT
{
.name = "*:*",
.check = test__all_tracepoints,
/* 3 */
},
#endif
{
.name = "{cycles,cache-misses:G}:H",
.check = test__group_gh1,
@ -2111,7 +2090,7 @@ static const struct evlist_test test__events[] = {
.check = test__checkevent_breakpoint_len_rw_modifier,
/* 4 */
},
#if defined(__s390x__) && defined(HAVE_LIBTRACEEVENT)
#if defined(__s390x__)
{
.name = "kvm-s390:kvm_s390_create_vm",
.check = test__checkevent_tracepoint,
@ -2265,13 +2244,11 @@ static const struct evlist_test test__events[] = {
.check = test__checkevent_breakpoint_2_events,
/* 3 */
},
#ifdef HAVE_LIBTRACEEVENT
{
.name = "9p:9p_client_req",
.check = test__checkevent_tracepoint,
/* 4 */
},
#endif
};
static const struct evlist_test test__events_pmu[] = {

View File

@ -22,6 +22,9 @@ if ! check_kprobes_available; then
exit 0
fi
# Check for presence of DWARF
$CMD_PERF check feature -q dwarf
[ $? -ne 0 ] && HINT_FAIL="Some of the tests need DWARF to run"
### missing argument
@ -75,5 +78,5 @@ done
# print overall results
print_overall_results "$TEST_RESULT"
print_overall_results "$TEST_RESULT" $HINT_FAIL
exit $?

View File

@ -23,6 +23,9 @@ if ! check_kprobes_available; then
exit 0
fi
# Check for presence of DWARF
$CMD_PERF check feature -q dwarf
[ $? -ne 0 ] && HINT_FAIL="Some of the tests need DWARF to run"
### acceptable --line descriptions
@ -51,5 +54,5 @@ done
# print overall results
print_overall_results "$TEST_RESULT"
print_overall_results "$TEST_RESULT" $HINT_FAIL
exit $?

View File

@ -46,10 +46,13 @@ print_results()
print_overall_results()
{
RETVAL="$1"; shift
TASK_COMMENT="$*"
test -n "$TASK_COMMENT" && TASK_COMMENT=":: $TASK_COMMENT"
if [ $RETVAL -eq 0 ]; then
_echo "$MALLPASS## [ PASS ] ##$MEND $TEST_NAME :: $THIS_TEST_NAME SUMMARY"
else
_echo "$MALLFAIL## [ FAIL ] ##$MEND $TEST_NAME :: $THIS_TEST_NAME SUMMARY :: $RETVAL failures found"
_echo "$MALLFAIL## [ FAIL ] ##$MEND $TEST_NAME :: $THIS_TEST_NAME SUMMARY :: $RETVAL failures found $TASK_COMMENT"
fi
return $RETVAL
}

View File

@ -13,7 +13,7 @@ stat_output=$(mktemp /tmp/__perf_test.stat_output.std.XXXXX)
event_name=(cpu-clock task-clock context-switches cpu-migrations page-faults stalled-cycles-frontend stalled-cycles-backend cycles instructions branches branch-misses)
event_metric=("CPUs utilized" "CPUs utilized" "/sec" "/sec" "/sec" "frontend cycles idle" "backend cycles idle" "GHz" "insn per cycle" "/sec" "of all branches")
skip_metric=("stalled cycles per insn" "tma_" "retiring" "frontend_bound" "bad_speculation" "backend_bound")
skip_metric=("stalled cycles per insn" "tma_" "retiring" "frontend_bound" "bad_speculation" "backend_bound" "TopdownL1" "percent of slots")
cleanup() {
rm -f "${stat_output}"

View File

@ -644,6 +644,33 @@ test_pipe()
return 0
}
test_pause_resume()
{
echo "--- Test with pause / resume ---"
if ! perf_record_no_decode -o "${perfdatafile}" -e intel_pt/aux-action=start-paused/u uname ; then
echo "SKIP: pause / resume is not supported"
return 2
fi
if ! perf_record_no_bpf -o "${perfdatafile}" \
-e intel_pt/aux-action=start-paused/u \
-e instructions/period=50000,aux-action=resume,name=Resume/u \
-e instructions/period=100000,aux-action=pause,name=Pause/u uname ; then
echo "perf record with pause / resume failed"
return 1
fi
if ! perf script -i "${perfdatafile}" --itrace=b -Fperiod,event | \
awk 'BEGIN {paused=1;branches=0}
/Resume/ {paused=0}
/branches/ {if (paused) exit 1;branches=1}
/Pause/ {paused=1}
END {if (!branches) exit 1}' ; then
echo "perf record with pause / resume failed"
return 1
fi
echo OK
return 0
}
count_result()
{
if [ "$1" -eq 2 ] ; then
@ -672,6 +699,7 @@ test_power_event || ret=$? ; count_result $ret ; ret=0
test_no_tnt || ret=$? ; count_result $ret ; ret=0
test_event_trace || ret=$? ; count_result $ret ; ret=0
test_pipe || ret=$? ; count_result $ret ; ret=0
test_pause_resume || ret=$? ; count_result $ret ; ret=0
cleanup

View File

@ -56,6 +56,7 @@ static struct perf_event_attr make_event_attr(void)
#ifdef HAVE_BPF_SKEL
#include <bpf/btf.h>
#include <util/btf.h>
static struct btf *btf;
@ -73,21 +74,6 @@ static void btf__exit(void)
btf = NULL;
}
static const struct btf_member *__btf_type__find_member_by_name(int type_id, const char *member_name)
{
const struct btf_type *t = btf__type_by_id(btf, type_id);
const struct btf_member *m;
int i;
for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
const char *current_member_name = btf__name_by_offset(btf, m->name_off);
if (!strcmp(current_member_name, member_name))
return m;
}
return NULL;
}
static bool attr_has_sigtrap(void)
{
int id;
@ -101,7 +87,7 @@ static bool attr_has_sigtrap(void)
if (id < 0)
return false;
return __btf_type__find_member_by_name(id, "sigtrap") != NULL;
return __btf_type__find_member_by_name(btf, id, "sigtrap") != NULL;
}
static bool kernel_with_sleepable_spinlocks(void)
@ -119,7 +105,7 @@ static bool kernel_with_sleepable_spinlocks(void)
return false;
// Only RT has a "lock" member for "struct spinlock"
member = __btf_type__find_member_by_name(id, "lock");
member = __btf_type__find_member_by_name(btf, id, "lock");
if (member == NULL)
return false;

View File

@ -27,7 +27,7 @@ static int process_stat_config_event(const struct perf_tool *tool __maybe_unused
struct machine *machine __maybe_unused)
{
struct perf_record_stat_config *config = &event->stat_config;
struct perf_stat_config stat_config = {};
struct perf_stat_config test_stat_config = {};
#define HAS(term, val) \
has_term(config, PERF_STAT_CONFIG_TERM__##term, val)
@ -39,25 +39,27 @@ static int process_stat_config_event(const struct perf_tool *tool __maybe_unused
#undef HAS
perf_event__read_stat_config(&stat_config, config);
perf_event__read_stat_config(&test_stat_config, config);
TEST_ASSERT_VAL("wrong aggr_mode", stat_config.aggr_mode == AGGR_CORE);
TEST_ASSERT_VAL("wrong scale", stat_config.scale == 1);
TEST_ASSERT_VAL("wrong interval", stat_config.interval == 1);
TEST_ASSERT_VAL("wrong aggr_mode", test_stat_config.aggr_mode == AGGR_CORE);
TEST_ASSERT_VAL("wrong scale", test_stat_config.scale == 1);
TEST_ASSERT_VAL("wrong interval", test_stat_config.interval == 1);
return 0;
}
static int test__synthesize_stat_config(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
{
struct perf_stat_config stat_config = {
struct perf_stat_config test_stat_config = {
.aggr_mode = AGGR_CORE,
.scale = 1,
.interval = 1,
};
TEST_ASSERT_VAL("failed to synthesize stat_config",
!perf_event__synthesize_stat_config(NULL, &stat_config, process_stat_config_event, NULL));
!perf_event__synthesize_stat_config(NULL, &test_stat_config,
process_stat_config_event,
NULL));
return 0;
}

View File

@ -583,4 +583,4 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub
goto out;
}
DEFINE_SUITE("Track with sched_switch", switch_tracking);
DEFINE_SUITE_EXCLUSIVE("Track with sched_switch", switch_tracking);

View File

@ -174,7 +174,7 @@ static void append_script(int dir_fd, const char *name, char *desc,
char filename[PATH_MAX], link[128];
struct test_suite *test_suite, **result_tmp;
struct test_case *tests;
size_t len;
ssize_t len;
char *exclusive;
snprintf(link, sizeof(link), "/proc/%d/fd/%d", getpid(), dir_fd);

View File

@ -81,6 +81,16 @@ struct test_suite {
.test_cases = tests__##_name, \
}
#define DEFINE_SUITE_EXCLUSIVE(description, _name) \
struct test_case tests__##_name[] = { \
TEST_CASE_EXCLUSIVE(description, _name),\
{ .name = NULL, } \
}; \
struct test_suite suite__##_name = { \
.desc = description, \
.test_cases = tests__##_name, \
}
/* Tests */
DECLARE_SUITE(vmlinux_matches_kallsyms);
DECLARE_SUITE(openat_syscall_event);

View File

@ -10,7 +10,7 @@
* 'perf test' workload) we just add the required types and defines here instead
* of including linux/landlock, that isn't available in older systems.
*
* We are not interested in the the result of the syscall, just in intercepting
* We are not interested in the result of the syscall, just in intercepting
* its arguments.
*/

View File

@ -57,7 +57,8 @@ create_arch_errno_table_func()
archlist="$1"
default="$2"
printf 'arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch)\n'
printf 'static arch_syscalls__strerrno_t *\n'
printf 'arch_syscalls__strerrno_function(const char *arch)\n'
printf '{\n'
for arch in $archlist; do
arch_str=$(arch_string "$arch")

View File

@ -1,16 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
#include "../../builtin.h"
#include "../../perf.h"
#include "../../util/util.h" // perf_exe()
#include "../util.h"
#include "../../util/evlist.h"
#include "../../util/hist.h"
#include "../../util/debug.h"
#include "../../util/session.h"
#include "../../util/symbol.h"
#include "../browser.h"
#include "../libslang.h"
#include "config.h"
#include <linux/err.h>
#include <linux/string.h>
#include <linux/zalloc.h>
#include <subcmd/exec-cmd.h>
#include <stdlib.h>
#define SCRIPT_NAMELEN 128
@ -77,6 +79,177 @@ static int scripts_config(const char *var, const char *value, void *data)
return 0;
}
/*
* Some scripts specify the required events in their "xxx-record" file,
* this function will check if the events in perf.data match those
* mentioned in the "xxx-record".
*
* Fixme: All existing "xxx-record" are all in good formats "-e event ",
* which is covered well now. And new parsing code should be added to
* cover the future complex formats like event groups etc.
*/
static int check_ev_match(int dir_fd, const char *scriptname, struct perf_session *session)
{
char line[BUFSIZ];
FILE *fp;
{
char filename[FILENAME_MAX + 5];
int fd;
scnprintf(filename, sizeof(filename), "bin/%s-record", scriptname);
fd = openat(dir_fd, filename, O_RDONLY);
if (fd == -1)
return -1;
fp = fdopen(fd, "r");
if (!fp)
return -1;
}
while (fgets(line, sizeof(line), fp)) {
char *p = skip_spaces(line);
if (*p == '#')
continue;
while (strlen(p)) {
int match, len;
struct evsel *pos;
char evname[128];
p = strstr(p, "-e");
if (!p)
break;
p += 2;
p = skip_spaces(p);
len = strcspn(p, " \t");
if (!len)
break;
snprintf(evname, len + 1, "%s", p);
match = 0;
evlist__for_each_entry(session->evlist, pos) {
if (evsel__name_is(pos, evname)) {
match = 1;
break;
}
}
if (!match) {
fclose(fp);
return -1;
}
}
}
fclose(fp);
return 0;
}
/*
* Return -1 if none is found, otherwise the actual scripts number.
*
* Currently the only user of this function is the script browser, which
* will list all statically runnable scripts, select one, execute it and
* show the output in a perf browser.
*/
static int find_scripts(char **scripts_array, char **scripts_path_array, int num,
int pathlen)
{
struct dirent *script_dirent, *lang_dirent;
int scripts_dir_fd, lang_dir_fd;
DIR *scripts_dir, *lang_dir;
struct perf_session *session;
struct perf_data data = {
.path = input_name,
.mode = PERF_DATA_MODE_READ,
};
char *temp;
int i = 0;
const char *exec_path = get_argv_exec_path();
session = perf_session__new(&data, NULL);
if (IS_ERR(session))
return PTR_ERR(session);
{
char scripts_path[PATH_MAX];
snprintf(scripts_path, sizeof(scripts_path), "%s/scripts", exec_path);
scripts_dir_fd = open(scripts_path, O_DIRECTORY);
pr_err("Failed to open directory '%s'", scripts_path);
if (scripts_dir_fd == -1) {
perf_session__delete(session);
return -1;
}
}
scripts_dir = fdopendir(scripts_dir_fd);
if (!scripts_dir) {
close(scripts_dir_fd);
perf_session__delete(session);
return -1;
}
while ((lang_dirent = readdir(scripts_dir)) != NULL) {
if (lang_dirent->d_type != DT_DIR &&
(lang_dirent->d_type == DT_UNKNOWN &&
!is_directory_at(scripts_dir_fd, lang_dirent->d_name)))
continue;
if (!strcmp(lang_dirent->d_name, ".") || !strcmp(lang_dirent->d_name, ".."))
continue;
#ifndef HAVE_LIBPERL_SUPPORT
if (strstr(lang_dirent->d_name, "perl"))
continue;
#endif
#ifndef HAVE_LIBPYTHON_SUPPORT
if (strstr(lang_dirent->d_name, "python"))
continue;
#endif
lang_dir_fd = openat(scripts_dir_fd, lang_dirent->d_name, O_DIRECTORY);
if (lang_dir_fd == -1)
continue;
lang_dir = fdopendir(lang_dir_fd);
if (!lang_dir) {
close(lang_dir_fd);
continue;
}
while ((script_dirent = readdir(lang_dir)) != NULL) {
if (script_dirent->d_type == DT_DIR)
continue;
if (script_dirent->d_type == DT_UNKNOWN &&
is_directory_at(lang_dir_fd, script_dirent->d_name))
continue;
/* Skip those real time scripts: xxxtop.p[yl] */
if (strstr(script_dirent->d_name, "top."))
continue;
if (i >= num)
break;
scnprintf(scripts_path_array[i], pathlen, "%s/scripts/%s/%s",
exec_path,
lang_dirent->d_name,
script_dirent->d_name);
temp = strchr(script_dirent->d_name, '.');
snprintf(scripts_array[i],
(temp - script_dirent->d_name) + 1,
"%s", script_dirent->d_name);
if (check_ev_match(lang_dir_fd, scripts_array[i], session))
continue;
i++;
}
closedir(lang_dir);
}
closedir(scripts_dir);
perf_session__delete(session);
return i;
}
/*
* When success, will copy the full path of the selected script
* into the buffer pointed by script_name, and return 0.

View File

@ -86,7 +86,7 @@ perf-util-y += pmu-bison.o
perf-util-y += hwmon_pmu.o
perf-util-y += tool_pmu.o
perf-util-y += svghelper.o
perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o
perf-util-y += trace-event-info.o
perf-util-y += trace-event-scripting.o
perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event.o
perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o
@ -121,8 +121,10 @@ perf-util-y += spark.o
perf-util-y += topdown.o
perf-util-y += iostat.o
perf-util-y += stream.o
perf-util-y += kvm-stat.o
perf-util-y += lock-contention.o
perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
perf-util-$(CONFIG_AUXTRACE) += intel-pt-decoder/
perf-util-y += intel-pt-decoder/
perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
perf-util-$(CONFIG_AUXTRACE) += intel-bts.o
perf-util-$(CONFIG_AUXTRACE) += arm-spe.o
@ -168,6 +170,7 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o
ifeq ($(CONFIG_LIBTRACEEVENT),y)
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o

View File

@ -441,6 +441,7 @@ enum symbol_disassemble_errno {
SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP,
SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE,
SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF,
SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE,
__SYMBOL_ANNOTATE_ERRNO__END,
};

View File

@ -67,6 +67,15 @@ enum arm_spe_common_data_source {
ARM_SPE_COMMON_DS_DRAM = 0xe,
};
enum arm_spe_ampereone_data_source {
ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0,
ARM_SPE_AMPEREONE_SLC = 0x3,
ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5,
ARM_SPE_AMPEREONE_DDR = 0x7,
ARM_SPE_AMPEREONE_L1D = 0x8,
ARM_SPE_AMPEREONE_L2D = 0x9,
};
struct arm_spe_record {
enum arm_spe_sample_type type;
int err;

View File

@ -103,6 +103,18 @@ struct arm_spe_queue {
u32 flags;
};
struct data_source_handle {
const struct midr_range *midr_ranges;
void (*ds_synth)(const struct arm_spe_record *record,
union perf_mem_data_src *data_src);
};
#define DS(range, func) \
{ \
.midr_ranges = range, \
.ds_synth = arm_spe__synth_##func, \
}
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
unsigned char *buf, size_t len)
{
@ -443,6 +455,11 @@ static const struct midr_range common_ds_encoding_cpus[] = {
{},
};
static const struct midr_range ampereone_ds_encoding_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
{},
};
static void arm_spe__sample_flags(struct arm_spe_queue *speq)
{
const struct arm_spe_record *record = &speq->decoder->record;
@ -532,6 +549,49 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor
}
}
/*
* Source is IMPDEF. Here we convert the source code used on AmpereOne cores
* to the common (Neoverse, Cortex) to avoid duplicating the decoding code.
*/
static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
struct arm_spe_record common_record;
switch (record->source) {
case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
common_record.source = ARM_SPE_COMMON_DS_PEER_CORE;
break;
case ARM_SPE_AMPEREONE_SLC:
common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE;
break;
case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
common_record.source = ARM_SPE_COMMON_DS_REMOTE;
break;
case ARM_SPE_AMPEREONE_DDR:
common_record.source = ARM_SPE_COMMON_DS_DRAM;
break;
case ARM_SPE_AMPEREONE_L1D:
common_record.source = ARM_SPE_COMMON_DS_L1D;
break;
case ARM_SPE_AMPEREONE_L2D:
common_record.source = ARM_SPE_COMMON_DS_L2;
break;
default:
pr_warning_once("AmpereOne: Unknown data source (0x%x)\n",
record->source);
return;
}
common_record.op = record->op;
arm_spe__synth_data_source_common(&common_record, data_src);
}
static const struct data_source_handle data_source_handles[] = {
DS(common_ds_encoding_cpus, data_source_common),
DS(ampereone_ds_encoding_cpus, data_source_ampereone),
};
static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
@ -555,12 +615,14 @@ static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
}
static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq)
static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
struct arm_spe *spe = speq->spe;
bool is_in_cpu_list;
u64 *metadata = NULL;
u64 midr = 0;
u64 midr;
unsigned int i;
/* Metadata version 1 assumes all CPUs are the same (old behavior) */
if (spe->metadata_ver == 1) {
@ -592,18 +654,20 @@ static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq)
midr = metadata[ARM_SPE_CPU_MIDR];
}
is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus);
if (is_in_cpu_list)
return true;
else
return false;
for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) {
if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) {
data_source_handles[i].ds_synth(record, data_src);
return true;
}
}
return false;
}
static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
const struct arm_spe_record *record)
{
union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
bool is_common = arm_spe__is_common_ds_encoding(speq);
if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
@ -612,9 +676,7 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
else
return 0;
if (is_common)
arm_spe__synth_data_source_common(record, &data_src);
else
if (!arm_spe__synth_ds(speq, record, &data_src))
arm_spe__synth_memory_level(record, &data_src);
if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {

View File

@ -810,19 +810,76 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr,
return auxtrace_validate_aux_sample_size(evlist, opts);
}
void auxtrace_regroup_aux_output(struct evlist *evlist)
static struct aux_action_opt {
const char *str;
u32 aux_action;
bool aux_event_opt;
} aux_action_opts[] = {
{"start-paused", BIT(0), true},
{"pause", BIT(1), false},
{"resume", BIT(2), false},
{.str = NULL},
};
static const struct aux_action_opt *auxtrace_parse_aux_action_str(const char *str)
{
const struct aux_action_opt *opt;
if (!str)
return NULL;
for (opt = aux_action_opts; opt->str; opt++)
if (!strcmp(str, opt->str))
return opt;
return NULL;
}
int auxtrace_parse_aux_action(struct evlist *evlist)
{
struct evsel *evsel, *aux_evsel = NULL;
struct evsel_config_term *term;
struct evsel *aux_evsel = NULL;
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
if (evsel__is_aux_event(evsel))
bool is_aux_event = evsel__is_aux_event(evsel);
const struct aux_action_opt *opt;
if (is_aux_event)
aux_evsel = evsel;
term = evsel__get_config_term(evsel, AUX_OUTPUT);
term = evsel__get_config_term(evsel, AUX_ACTION);
if (!term) {
if (evsel__get_config_term(evsel, AUX_OUTPUT))
goto regroup;
continue;
}
opt = auxtrace_parse_aux_action_str(term->val.str);
if (!opt) {
pr_err("Bad aux-action '%s'\n", term->val.str);
return -EINVAL;
}
if (opt->aux_event_opt && !is_aux_event) {
pr_err("aux-action '%s' can only be used with AUX area event\n",
term->val.str);
return -EINVAL;
}
if (!opt->aux_event_opt && is_aux_event) {
pr_err("aux-action '%s' cannot be used for AUX area event itself\n",
term->val.str);
return -EINVAL;
}
evsel->core.attr.aux_action = opt->aux_action;
regroup:
/* If possible, group with the AUX event */
if (term && aux_evsel)
if (aux_evsel)
evlist__regroup(evlist, aux_evsel, evsel);
if (!evsel__is_aux_event(evsel__leader(evsel))) {
pr_err("Events with aux-action must have AUX area event group leader\n");
return -EINVAL;
}
}
return 0;
}
struct auxtrace_record *__weak

View File

@ -578,7 +578,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
int auxtrace_parse_sample_options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts, const char *str);
void auxtrace_regroup_aux_output(struct evlist *evlist);
int auxtrace_parse_aux_action(struct evlist *evlist);
int auxtrace_record__options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts);
@ -799,8 +799,10 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused,
}
static inline
void auxtrace_regroup_aux_output(struct evlist *evlist __maybe_unused)
int auxtrace_parse_aux_action(struct evlist *evlist __maybe_unused)
{
pr_err("AUX area tracing not supported\n");
return -EINVAL;
}
static inline

View File

@ -289,7 +289,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
}
info_node->info_linear = info_linear;
perf_env__insert_bpf_prog_info(env, info_node);
if (!perf_env__insert_bpf_prog_info(env, info_node)) {
free(info_linear);
free(info_node);
}
info_linear = NULL;
/*
@ -480,7 +483,10 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
info_node = malloc(sizeof(struct bpf_prog_info_node));
if (info_node) {
info_node->info_linear = info_linear;
perf_env__insert_bpf_prog_info(env, info_node);
if (!perf_env__insert_bpf_prog_info(env, info_node)) {
free(info_linear);
free(info_node);
}
} else
free(info_linear);

View File

@ -36,6 +36,9 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
return -1;
}
skel->rodata->bucket_range = ftrace->bucket_range;
skel->rodata->min_latency = ftrace->min_latency;
/* don't need to set cpu filter for system-wide mode */
if (ftrace->target.cpu_list) {
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);

View File

@ -285,7 +285,7 @@ static int add_work(struct perf_kwork *kwork,
(bpf_trace->get_work_name(key, &tmp.name)))
return -1;
work = perf_kwork_add_work(kwork, tmp.class, &tmp);
work = kwork->add_work(kwork, tmp.class, &tmp);
if (work == NULL)
return -1;

View File

@ -255,7 +255,7 @@ static int add_work(struct perf_kwork *kwork, struct work_key *key,
bpf_trace = kwork_class_bpf_supported_list[type];
tmp.class = bpf_trace->class;
work = perf_kwork_add_work(kwork, tmp.class, &tmp);
work = kwork->add_work(kwork, tmp.class, &tmp);
if (!work)
return -1;

View File

@ -458,7 +458,7 @@ int lock_contention_read(struct lock_contention *con)
if (con->save_callstack) {
bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
if (!match_callstack_filter(machine, stack_trace)) {
if (!match_callstack_filter(machine, stack_trace, con->max_stack)) {
con->nr_filtered += data.count;
goto next;
}

View File

@ -41,6 +41,9 @@ int enabled = 0;
const volatile int has_cpu = 0;
const volatile int has_task = 0;
const volatile int use_nsec = 0;
const volatile unsigned int bucket_range;
const volatile unsigned int min_latency;
const volatile unsigned int max_latency;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@ -92,7 +95,7 @@ int BPF_PROG(func_end)
start = bpf_map_lookup_elem(&functime, &tid);
if (start) {
__s64 delta = bpf_ktime_get_ns() - *start;
__u32 key;
__u32 key = 0;
__u64 *hist;
bpf_map_delete_elem(&functime, &tid);
@ -100,12 +103,34 @@ int BPF_PROG(func_end)
if (delta < 0)
return 0;
if (bucket_range != 0) {
delta /= cmp_base;
if (min_latency > 0) {
if (delta > min_latency)
delta -= min_latency;
else
goto do_lookup;
}
// Less than 1 unit (ms or ns), or, in the future,
// than the min latency desired.
if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
// clang 12 doesn't like s64 / u32 division
key = (__u64)delta / bucket_range + 1;
if (key >= NUM_BUCKET ||
delta >= max_latency - min_latency)
key = NUM_BUCKET - 1;
}
goto do_lookup;
}
// calculate index using delta
for (key = 0; key < (NUM_BUCKET - 1); key++) {
if (delta < (cmp_base << key))
break;
}
do_lookup:
hist = bpf_map_lookup_elem(&latency, &key);
if (!hist)
return 0;

View File

@ -18,7 +18,9 @@ enum kwork_class_type {
};
#define MAX_ENTRIES 102400
#define MAX_NR_CPUS 2048
#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 4096
#endif
#define PF_KTHREAD 0x00200000
#define MAX_COMMAND_LEN 16

27
tools/perf/util/btf.c Normal file
View File

@ -0,0 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Arnaldo Carvalho de Melo <acme@redhat.com>
*
* Copyright (C) 2024, Red Hat, Inc
*/
#include <bpf/btf.h>
#include <util/btf.h>
#include <string.h>
const struct btf_member *__btf_type__find_member_by_name(struct btf *btf,
int type_id, const char *member_name)
{
const struct btf_type *t = btf__type_by_id(btf, type_id);
const struct btf_member *m;
int i;
for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
const char *current_member_name = btf__name_by_offset(btf, m->name_off);
if (!strcmp(current_member_name, member_name))
return m;
}
return NULL;
}

10
tools/perf/util/btf.h Normal file
View File

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PERF_UTIL_BTF
#define __PERF_UTIL_BTF 1
struct btf;
struct btf_member;
const struct btf_member *__btf_type__find_member_by_name(struct btf *btf,
int type_id, const char *member_name);
#endif // __PERF_UTIL_BTF

View File

@ -473,7 +473,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
leader = NULL;
evlist__for_each_entry(orig_list, pos) {
evsel = evsel__clone(pos);
evsel = evsel__clone(/*dest=*/NULL, pos);
if (evsel == NULL)
goto out_err;

View File

@ -13,6 +13,7 @@
#include <sys/param.h>
#include "cache.h"
#include "callchain.h"
#include "header.h"
#include <subcmd/exec-cmd.h>
#include "util/event.h" /* proc_map_timeout */
#include "util/hist.h" /* perf_hist_config */
@ -34,6 +35,22 @@
#define DEBUG_CACHE_DIR ".debug"
#define METRIC_ONLY_LEN 20
struct perf_stat_config stat_config = {
.aggr_mode = AGGR_GLOBAL,
.aggr_level = MAX_CACHE_LVL + 1,
.scale = true,
.unit_width = 4, /* strlen("unit") */
.run_count = 1,
.metric_only_len = METRIC_ONLY_LEN,
.walltime_nsecs_stats = &walltime_nsecs_stats,
.ru_stats = &ru_stats,
.big_num = true,
.ctl_fd = -1,
.ctl_fd_ack = -1,
.iostat_run = false,
};
char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */
@ -455,6 +472,16 @@ static int perf_ui_config(const char *var, const char *value)
return 0;
}
void perf_stat__set_big_num(int set)
{
stat_config.big_num = (set != 0);
}
static void perf_stat__set_no_csv_summary(int set)
{
stat_config.no_csv_summary = (set != 0);
}
static int perf_stat_config(const char *var, const char *value)
{
if (!strcmp(var, "stat.big-num"))

View File

@ -426,8 +426,9 @@ static int add_tracepoint_values(struct ctf_writer *cw,
struct evsel *evsel,
struct perf_sample *sample)
{
struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
struct tep_format_field *fields = evsel->tp_format->format.fields;
const struct tep_event *tp_format = evsel__tp_format(evsel);
struct tep_format_field *common_fields = tp_format->format.common_fields;
struct tep_format_field *fields = tp_format->format.fields;
int ret;
ret = add_tracepoint_fields_values(cw, event_class, event,
@ -1064,8 +1065,9 @@ static int add_tracepoint_types(struct ctf_writer *cw,
struct evsel *evsel,
struct bt_ctf_event_class *class)
{
struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
struct tep_format_field *fields = evsel->tp_format->format.fields;
const struct tep_event *tp_format = evsel__tp_format(evsel);
struct tep_format_field *common_fields = tp_format ? tp_format->format.common_fields : NULL;
struct tep_format_field *fields = tp_format ? tp_format->format.fields : NULL;
int ret;
ret = add_tracepoint_fields_types(cw, common_fields, class);

View File

@ -230,12 +230,12 @@ static int process_sample_event(const struct perf_tool *tool,
#ifdef HAVE_LIBTRACEEVENT
if (sample->raw_data) {
int i;
struct tep_format_field **fields;
struct tep_event *tp_format = evsel__tp_format(evsel);
struct tep_format_field **fields = tp_format ? tep_event_fields(tp_format) : NULL;
fields = tep_event_fields(evsel->tp_format);
if (fields) {
i = 0;
int i = 0;
while (fields[i]) {
struct trace_seq s;

View File

@ -1245,6 +1245,9 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s
scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
dso__long_name(dso));
break;
case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE:
scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso));
break;
default:
scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
break;
@ -2289,7 +2292,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
return symbol__disassemble_bpf_image(sym, args);
} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
return -1;
return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE;
} else if (dso__is_kcore(dso)) {
kce.addr = map__rip_2objdump(map, sym->start);
kce.kcore_filename = symfs_filename;

View File

@ -234,7 +234,8 @@ static const __u8 *dlfilter__insn(void *ctx, __u32 *len)
struct machine *machine = maps__machine(thread__maps(al->thread));
if (machine)
script_fetch_insn(d->sample, al->thread, machine);
script_fetch_insn(d->sample, al->thread, machine,
/*native_arch=*/true);
}
}

View File

@ -24,15 +24,19 @@ struct perf_env perf_env;
#include "bpf-utils.h"
#include <bpf/libbpf.h>
void perf_env__insert_bpf_prog_info(struct perf_env *env,
bool perf_env__insert_bpf_prog_info(struct perf_env *env,
struct bpf_prog_info_node *info_node)
{
bool ret;
down_write(&env->bpf_progs.lock);
__perf_env__insert_bpf_prog_info(env, info_node);
ret = __perf_env__insert_bpf_prog_info(env, info_node);
up_write(&env->bpf_progs.lock);
return ret;
}
void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node)
bool __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node)
{
__u32 prog_id = info_node->info_linear->info.id;
struct bpf_prog_info_node *node;
@ -50,13 +54,14 @@ void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info
p = &(*p)->rb_right;
} else {
pr_debug("duplicated bpf prog info %u\n", prog_id);
return;
return false;
}
}
rb_link_node(&info_node->rb_node, parent, p);
rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos);
env->bpf_progs.infos_cnt++;
return true;
}
struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
@ -326,10 +331,13 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
for (idx = 0; idx < nr_cpus; ++idx) {
struct perf_cpu cpu = { .cpu = idx };
int core_id = cpu__get_core_id(cpu);
int socket_id = cpu__get_socket_id(cpu);
int die_id = cpu__get_die_id(cpu);
env->cpu[idx].core_id = cpu__get_core_id(cpu);
env->cpu[idx].socket_id = cpu__get_socket_id(cpu);
env->cpu[idx].die_id = cpu__get_die_id(cpu);
env->cpu[idx].core_id = core_id >= 0 ? core_id : -1;
env->cpu[idx].socket_id = socket_id >= 0 ? socket_id : -1;
env->cpu[idx].die_id = die_id >= 0 ? die_id : -1;
}
env->nr_cpus_avail = nr_cpus;
@ -472,6 +480,10 @@ const char *perf_env__arch(struct perf_env *env)
return normalize_arch(arch_name);
}
#if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
#include "trace/beauty/arch_errno_names.c"
#endif
const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused)
{
#if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT)

View File

@ -56,8 +56,6 @@ struct pmu_caps {
typedef const char *(arch_syscalls__strerrno_t)(int err);
arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch);
struct perf_env {
char *hostname;
char *os_release;
@ -176,9 +174,9 @@ const char *perf_env__raw_arch(struct perf_env *env);
int perf_env__nr_cpus_avail(struct perf_env *env);
void perf_env__init(struct perf_env *env);
void __perf_env__insert_bpf_prog_info(struct perf_env *env,
bool __perf_env__insert_bpf_prog_info(struct perf_env *env,
struct bpf_prog_info_node *info_node);
void perf_env__insert_bpf_prog_info(struct perf_env *env,
bool perf_env__insert_bpf_prog_info(struct perf_env *env,
struct bpf_prog_info_node *info_node);
struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
__u32 prog_id);

View File

@ -454,7 +454,7 @@ static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
* The assumption is that @orig is not configured nor opened yet.
* So we only care about the attributes that can be set while it's parsed.
*/
struct evsel *evsel__clone(struct evsel *orig)
struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig)
{
struct evsel *evsel;
@ -467,7 +467,11 @@ struct evsel *evsel__clone(struct evsel *orig)
if (orig->bpf_obj)
return NULL;
evsel = evsel__new(&orig->core.attr);
if (dest)
evsel = dest;
else
evsel = evsel__new(&orig->core.attr);
if (evsel == NULL)
return NULL;
@ -512,11 +516,12 @@ struct evsel *evsel__clone(struct evsel *orig)
evsel->core.leader = orig->core.leader;
evsel->max_events = orig->max_events;
free((char *)evsel->unit);
evsel->unit = strdup(orig->unit);
if (evsel->unit == NULL)
goto out_err;
zfree(&evsel->unit);
if (orig->unit) {
evsel->unit = strdup(orig->unit);
if (evsel->unit == NULL)
goto out_err;
}
evsel->scale = orig->scale;
evsel->snapshot = orig->snapshot;
evsel->per_pkg = orig->per_pkg;
@ -544,53 +549,101 @@ struct evsel *evsel__clone(struct evsel *orig)
return NULL;
}
static int trace_event__id(const char *sys, const char *name)
{
char *tp_dir = get_events_file(sys);
char path[PATH_MAX];
int id, err;
if (!tp_dir)
return -1;
scnprintf(path, PATH_MAX, "%s/%s/id", tp_dir, name);
put_events_file(tp_dir);
err = filename__read_int(path, &id);
if (err)
return err;
return id;
}
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
#ifdef HAVE_LIBTRACEEVENT
struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_TRACEPOINT,
.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
};
struct evsel *evsel = zalloc(perf_evsel__object.size);
int err = -ENOMEM;
int err = -ENOMEM, id = -1;
if (evsel == NULL) {
if (evsel == NULL)
goto out_err;
} else {
struct perf_event_attr attr = {
.type = PERF_TYPE_TRACEPOINT,
.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
};
if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
goto out_free;
#ifdef HAVE_LIBTRACEEVENT
evsel->tp_sys = strdup(sys);
if (!evsel->tp_sys)
goto out_free;
evsel->tp_name = strdup(name);
if (!evsel->tp_name)
goto out_free;
#endif
event_attr_init(&attr);
if (format) {
id = trace_event__id(sys, name);
if (id < 0) {
err = id;
goto out_free;
event_attr_init(&attr);
if (format) {
evsel->tp_format = trace_event__tp_format(sys, name);
if (IS_ERR(evsel->tp_format)) {
err = PTR_ERR(evsel->tp_format);
goto out_free;
}
attr.config = evsel->tp_format->id;
} else {
attr.config = (__u64) -1;
}
attr.sample_period = 1;
evsel__init(evsel, &attr, idx);
}
attr.config = (__u64)id;
attr.sample_period = 1;
evsel__init(evsel, &attr, idx);
return evsel;
out_free:
zfree(&evsel->name);
#ifdef HAVE_LIBTRACEEVENT
zfree(&evsel->tp_sys);
zfree(&evsel->tp_name);
#endif
free(evsel);
out_err:
return ERR_PTR(err);
}
#ifdef HAVE_LIBTRACEEVENT
struct tep_event *evsel__tp_format(struct evsel *evsel)
{
struct tep_event *tp_format = evsel->tp_format;
if (tp_format)
return tp_format;
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
return NULL;
tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name);
if (IS_ERR(tp_format)) {
int err = -PTR_ERR(evsel->tp_format);
pr_err("Error getting tracepoint format '%s' '%s'(%d)\n",
evsel__name(evsel), strerror(err), err);
return NULL;
}
evsel->tp_format = tp_format;
return evsel->tp_format;
}
#endif
const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = {
@ -1103,6 +1156,9 @@ static void evsel__apply_config_terms(struct evsel *evsel,
case EVSEL__CONFIG_TERM_AUX_OUTPUT:
attr->aux_output = term->val.aux_output ? 1 : 0;
break;
case EVSEL__CONFIG_TERM_AUX_ACTION:
/* Already applied by auxtrace */
break;
case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
/* Already applied by auxtrace */
break;
@ -1587,6 +1643,10 @@ void evsel__exit(struct evsel *evsel)
perf_thread_map__put(evsel->core.threads);
zfree(&evsel->group_name);
zfree(&evsel->name);
#ifdef HAVE_LIBTRACEEVENT
zfree(&evsel->tp_sys);
zfree(&evsel->tp_name);
#endif
zfree(&evsel->filter);
zfree(&evsel->group_pmu_name);
zfree(&evsel->unit);
@ -2090,16 +2150,17 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
return err;
}
static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
static bool __has_attr_feature(struct perf_event_attr *attr,
struct perf_cpu cpu, unsigned long flags)
{
int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
/*group_fd=*/-1, flags);
close(fd);
if (fd < 0) {
attr->exclude_kernel = 1;
fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
/*group_fd=*/-1, flags);
close(fd);
}
@ -2107,7 +2168,7 @@ static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
if (fd < 0) {
attr->exclude_hv = 1;
fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
/*group_fd=*/-1, flags);
close(fd);
}
@ -2115,7 +2176,7 @@ static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
if (fd < 0) {
attr->exclude_guest = 1;
fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
/*group_fd=*/-1, flags);
close(fd);
}
@ -2127,6 +2188,13 @@ static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
return fd >= 0;
}
static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
{
struct perf_cpu cpu = {.cpu = -1};
return __has_attr_feature(attr, cpu, flags);
}
static void evsel__detect_missing_pmu_features(struct evsel *evsel)
{
struct perf_event_attr attr = {
@ -2215,7 +2283,65 @@ static void evsel__detect_missing_brstack_features(struct evsel *evsel)
errno = old_errno;
}
static bool evsel__detect_missing_features(struct evsel *evsel)
static bool evsel__probe_aux_action(struct evsel *evsel, struct perf_cpu cpu)
{
struct perf_event_attr attr = evsel->core.attr;
int old_errno = errno;
attr.disabled = 1;
attr.aux_start_paused = 1;
if (__has_attr_feature(&attr, cpu, /*flags=*/0)) {
errno = old_errno;
return true;
}
/*
* EOPNOTSUPP means the kernel supports the feature but the PMU does
* not, so keep that distinction if possible.
*/
if (errno != EOPNOTSUPP)
errno = old_errno;
return false;
}
static void evsel__detect_missing_aux_action_feature(struct evsel *evsel, struct perf_cpu cpu)
{
static bool detection_done;
struct evsel *leader;
/*
* Don't bother probing aux_action if it is not being used or has been
* probed before.
*/
if (!evsel->core.attr.aux_action || detection_done)
return;
detection_done = true;
/*
* The leader is an AUX area event. If it has failed, assume the feature
* is not supported.
*/
leader = evsel__leader(evsel);
if (evsel == leader) {
perf_missing_features.aux_action = true;
return;
}
/*
* AUX area event with aux_action must have been opened successfully
* already, so feature is supported.
*/
if (leader->core.attr.aux_action)
return;
if (!evsel__probe_aux_action(leader, cpu))
perf_missing_features.aux_action = true;
}
static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu cpu)
{
static bool detection_done = false;
struct perf_event_attr attr = {
@ -2225,6 +2351,8 @@ static bool evsel__detect_missing_features(struct evsel *evsel)
};
int old_errno;
evsel__detect_missing_aux_action_feature(evsel, cpu);
evsel__detect_missing_pmu_features(evsel);
if (evsel__has_br_stack(evsel))
@ -2439,6 +2567,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
int idx, thread, nthreads;
int pid = -1, err, old_errno;
enum rlimit_action set_rlimit = NO_CHANGE;
struct perf_cpu cpu;
if (evsel__is_retire_lat(evsel))
return tpebs_start(evsel->evlist);
@ -2476,6 +2605,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
}
for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
cpu = perf_cpu_map__cpu(cpus, idx);
for (thread = 0; thread < nthreads; thread++) {
int fd, group_fd;
@ -2496,10 +2626,9 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
/* Debug message used by test scripts */
pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
pid, cpu.cpu, group_fd, evsel->open_flags);
fd = sys_perf_event_open(&evsel->core.attr, pid,
perf_cpu_map__cpu(cpus, idx).cpu,
fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu,
group_fd, evsel->open_flags);
FD(evsel, idx, thread) = fd;
@ -2515,8 +2644,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
bpf_counter__install_pe(evsel, idx, fd);
if (unlikely(test_attr__enabled())) {
test_attr__open(&evsel->core.attr, pid,
perf_cpu_map__cpu(cpus, idx),
test_attr__open(&evsel->core.attr, pid, cpu,
fd, group_fd, evsel->open_flags);
}
@ -2571,7 +2699,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit))
goto retry_open;
if (err == -EINVAL && evsel__detect_missing_features(evsel))
if (err == -EINVAL && evsel__detect_missing_features(evsel, cpu))
goto fallback_missing_features;
if (evsel__precise_ip_fallback(evsel))
@ -3218,12 +3346,16 @@ u16 evsel__id_hdr_size(const struct evsel *evsel)
#ifdef HAVE_LIBTRACEEVENT
struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
{
return tep_find_field(evsel->tp_format, name);
struct tep_event *tp_format = evsel__tp_format(evsel);
return tp_format ? tep_find_field(tp_format, name) : NULL;
}
struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name)
{
return tep_find_common_field(evsel->tp_format, name);
struct tep_event *tp_format = evsel__tp_format(evsel);
return tp_format ? tep_find_common_field(tp_format, name) : NULL;
}
void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name)
@ -3526,6 +3658,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support 'aux_output' feature",
evsel__name(evsel));
if (evsel->core.attr.aux_action)
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support 'aux_action' feature",
evsel__name(evsel));
if (evsel->core.attr.sample_period != 0)
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
@ -3556,6 +3692,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size, "clockid feature not supported.");
if (perf_missing_features.clockid_wrong)
return scnprintf(msg, size, "wrong clockid (%d).", clockid);
if (perf_missing_features.aux_action)
return scnprintf(msg, size, "The 'aux_action' feature is not supported, update the kernel.");
if (perf_missing_features.aux_output)
return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
if (!target__has_cpu(target))

View File

@ -59,6 +59,8 @@ struct evsel {
char *group_name;
const char *group_pmu_name;
#ifdef HAVE_LIBTRACEEVENT
char *tp_sys;
char *tp_name;
struct tep_event *tp_format;
#endif
char *filter;
@ -205,6 +207,7 @@ struct perf_missing_features {
bool weight_struct;
bool read_lost;
bool branch_counters;
bool aux_action;
bool inherit_sample_read;
};
@ -241,26 +244,23 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr)
return evsel__new_idx(attr, 0);
}
struct evsel *evsel__clone(struct evsel *orig);
struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig);
int copy_config_terms(struct list_head *dst, struct list_head *src);
void free_config_terms(struct list_head *config_terms);
#ifdef HAVE_LIBTRACEEVENT
struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format);
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format);
static inline struct evsel *evsel__newtp(const char *sys, const char *name)
{
return evsel__newtp_idx(sys, name, 0, true);
}
#endif
#ifdef HAVE_LIBTRACEEVENT
struct tep_event *event_format__new(const char *sys, const char *name);
struct tep_event *evsel__tp_format(struct evsel *evsel);
#endif
void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);

View File

@ -25,6 +25,7 @@ enum evsel_term_type {
EVSEL__CONFIG_TERM_BRANCH,
EVSEL__CONFIG_TERM_PERCORE,
EVSEL__CONFIG_TERM_AUX_OUTPUT,
EVSEL__CONFIG_TERM_AUX_ACTION,
EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
EVSEL__CONFIG_TERM_CFG_CHG,
};

View File

@ -81,13 +81,15 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE
#ifdef HAVE_LIBTRACEEVENT
if (details->trace_fields) {
struct tep_format_field *field;
const struct tep_event *tp_format;
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
printed += comma_fprintf(fp, &first, " (not a tracepoint)");
goto out;
}
field = evsel->tp_format->format.fields;
tp_format = evsel__tp_format(evsel);
field = tp_format ? tp_format->format.fields : NULL;
if (field == NULL) {
printed += comma_fprintf(fp, &first, " (no trace field)");
goto out;

View File

@ -285,7 +285,7 @@ struct expr_parse_ctx *expr__ctx_new(void)
{
struct expr_parse_ctx *ctx;
ctx = malloc(sizeof(struct expr_parse_ctx));
ctx = calloc(1, sizeof(struct expr_parse_ctx));
if (!ctx)
return NULL;
@ -294,9 +294,6 @@ struct expr_parse_ctx *expr__ctx_new(void)
free(ctx);
return NULL;
}
ctx->sctx.user_requested_cpu_list = NULL;
ctx->sctx.runtime = 0;
ctx->sctx.system_wide = false;
return ctx;
}

View File

@ -20,6 +20,9 @@ struct perf_ftrace {
unsigned long percpu_buffer_size;
bool inherit;
bool use_nsec;
unsigned int bucket_range;
unsigned int min_latency;
unsigned int max_latency;
int graph_depth;
int func_stack_trace;
int func_irq_info;

View File

@ -3158,7 +3158,10 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused)
/* after reading from file, translate offset to address */
bpil_offs_to_addr(info_linear);
info_node->info_linear = info_linear;
__perf_env__insert_bpf_prog_info(env, info_node);
if (!__perf_env__insert_bpf_prog_info(env, info_node)) {
free(info_linear);
free(info_node);
}
}
up_write(&env->bpf_progs.lock);
@ -3205,7 +3208,8 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
if (__do_read(ff, node->data, data_size))
goto out;
__perf_env__insert_btf(env, node);
if (!__perf_env__insert_btf(env, node))
free(node);
node = NULL;
}

View File

@ -7,16 +7,24 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table
$(call rule_mkdir)
@$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
# Busybox's diff doesn't have -I, avoid warning in the case
ifeq ($(SRCARCH),x86)
perf-util-y += inat.o insn.o
else
perf-util-$(CONFIG_AUXTRACE) += inat.o insn.o
endif
$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
$(OUTPUT)util/intel-pt-decoder/inat.o: $(srctree)/tools/arch/x86/lib/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder
CFLAGS_inat.o += -I$(OUTPUT)util/intel-pt-decoder
$(OUTPUT)util/intel-pt-decoder/insn.o: $(srctree)/tools/arch/x86/lib/insn.c
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
ifeq ($(CC_NO_CLANG), 1)
CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init
CFLAGS_insn.o += -Wno-override-init
endif
CFLAGS_intel-pt-insn-decoder.o += -Wno-packed
CFLAGS_insn.o += -Wno-packed

View File

@ -11,9 +11,6 @@
#include <byteswap.h>
#include "../../../arch/x86/include/asm/insn.h"
#include "../../../arch/x86/lib/inat.c"
#include "../../../arch/x86/lib/insn.c"
#include "event.h"
#include "intel-pt-insn-decoder.h"

Some files were not shown because too many files have changed in this diff Show More