mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 02:33:57 +00:00
perf tools changes for v6.3:
- 'perf lock contention' improvements: - Add -o/--lock-owner option: $ sudo ./perf lock contention -abo -- ./perf bench sched pipe # Running 'sched/pipe' benchmark: # Executed 1000000 pipe operations between two processes Total time: 4.766 [sec] 4.766540 usecs/op 209795 ops/sec contended total wait max wait avg wait pid owner 403 565.32 us 26.81 us 1.40 us -1 Unknown 4 27.99 us 8.57 us 7.00 us 1583145 sched-pipe 1 8.25 us 8.25 us 8.25 us 1583144 sched-pipe 1 2.03 us 2.03 us 2.03 us 5068 chrome The owner is unknown in most cases. Filtering only for the mutex locks, it will more likely get the owners. - -S/--callstack-filter is to limit display entries having the given string in the callstack $ sudo ./perf lock contention -abv -S net sleep 1 ... contended total wait max wait avg wait type caller 5 70.20 us 16.13 us 14.04 us spinlock __dev_queue_xmit+0xb6d 0xffffffffa5dd1c60 _raw_spin_lock+0x30 0xffffffffa5b8f6ed __dev_queue_xmit+0xb6d 0xffffffffa5cd8267 ip6_finish_output2+0x2c7 0xffffffffa5cdac14 ip6_finish_output+0x1d4 0xffffffffa5cdb477 ip6_xmit+0x457 0xffffffffa5d1fd17 inet6_csk_xmit+0xd7 0xffffffffa5c5f4aa __tcp_transmit_skb+0x54a 0xffffffffa5c6467d tcp_keepalive_timer+0x2fd Please note that to have the -b option (BPF) working above one has to build with BUILD_BPF_SKEL=1. - Add more 'perf test' entries to test these new features. - Add Ian Rogers to MAINTAINERS as a perf tools reviewer. - Add support for retire latency feature (pipeline stall of a instruction compared to the previous one, in cycles) present on some Intel processors. - Add 'perf c2c' report option to show false sharing with adjacent cachelines, to be used in machines with cacheline prefetching, where accesses to a cacheline brings the next one too. - Skip 'perf test bpf' when the required kernel-debuginfo package isn't installed. perf script: - Add 'cgroup' field for 'perf script' output: $ perf record --all-cgroups -- true $ perf script -F comm,pid,cgroup true 337112 /user.slice/user-657345.slice/user@657345.service/... true 337112 /user.slice/user-657345.slice/user@657345.service/... true 337112 /user.slice/user-657345.slice/user@657345.service/... true 337112 /user.slice/user-657345.slice/user@657345.service/... - Add support for showing branch speculation information in 'perf script' and in the 'perf report' raw dump (-D). perf record: - Fix 'perf record' segfault with --overwrite and --max-size. Intel PT: - Add support for synthesizing "cycle" events from Intel PT traces as we support "instruction" events when Intel PT CYC packets are available. This enables much more accurate profiles than when using the regular 'perf record -e cycles' (the default) when the workload lasts for very short periods (<10ms). - .plt symbol handling improvements, better handling IBT (in the past MPX) done in the context of decoding Intel PT processor traces, IFUNC symbols on x86_64, static executables, understanding .plt.got symbols on x86_64. - Add a 'perf test' to test symbol resolution, part of the .plt improvements series, this tests things like symbol size in contexts where only the symbol start is available (kallsyms), etc. - Better handle auxtrace/Intel PT data when using pipe mode (perf record sleep 1|perf report). - Fix symbol lookup with kcore with multiple segments match stext, getting the symbol resolution to just show DSOs as unknown. ARM: - Timestamp improvements for ARM64 systems with ETMv4 (Embedded Trace Macrocell v4). - Ensure ARM64 CoreSight timestamps don't go backwards. - Document that ARM64 SPE (Statistical Profiling Extension) is used with 'perf c2c/mem'. - Add raw decoding for ARM64 SPEv1.2 previous branch address. - Update neoverse-n2-v2 ARM vendor events (JSON tables): topdown L1, TLB, cache, branch, PE utilization and instruction mix metrics. - Update decoder code for OpenCSD version 1.4, on ARM64 systems. - Fix command line auto-complete of CPU events on aarch64. perf test/bench: - Switch basic BPF filtering test to use syscall tracepoint to avoid the variable number of probes inserted when using the previous probe point (do_epoll_wait) that happens on different CPU architectures. - Fix DWARF unwind test by adding non-inline to expected function in a backtrace. - Use 'grep -c' where the longer form 'grep | wc -l' was being used. - Add getpid and execve benchmarks to 'perf bench syscall'. Miscellaneous: - Avoid d3-flame-graph package dependency in 'perf script flamegraph', making this feature more generally available. - Add JSON metric events to present CPI stall cycles in Power10. - Assorted improvements/refactorings on the JSON metrics parsing code. Build: - Fix 'perf probe' and 'perf test' when libtraceevent isn't linked, as several tests use tracepoints, those should be skipped. - More fallout fixes for the removal of tools/lib/traceevent/. - Fix build error when linking with libpfm. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCY/YzGgAKCRCyPKLppCJ+ J98CAP4/GD3E86Dk+S+w5FmPEHuBKootuZ3pHOqCnXLiyKFZqgEAs9TWOg9KVKGh io9cLluMjzfRwQrND8cpn3VfXxWvVAQ= =L1qh -----END PGP SIGNATURE----- Merge tag 'perf-tools-for-v6.3-1-2023-02-22' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux Pull perf tools updates from Arnaldo Carvalho de Melo: "Miscellaneous: - Add Ian Rogers to MAINTAINERS as a perf tools reviewer. - Add support for retire latency feature (pipeline stall of a instruction compared to the previous one, in cycles) present on some Intel processors. - Add 'perf c2c' report option to show false sharing with adjacent cachelines, to be used in machines with cacheline prefetching, where accesses to a cacheline brings the next one too. - Skip 'perf test bpf' when the required kernel-debuginfo package isn't installed. - Avoid d3-flame-graph package dependency in 'perf script flamegraph', making this feature more generally available. - Add JSON metric events to present CPI stall cycles in Power10. - Assorted improvements/refactorings on the JSON metrics parsing code. perf lock contention: - Add -o/--lock-owner option: $ sudo ./perf lock contention -abo -- ./perf bench sched pipe # Running 'sched/pipe' benchmark: # Executed 1000000 pipe operations between two processes Total time: 4.766 [sec] 4.766540 usecs/op 209795 ops/sec contended total wait max wait avg wait pid owner 403 565.32 us 26.81 us 1.40 us -1 Unknown 4 27.99 us 8.57 us 7.00 us 1583145 sched-pipe 1 8.25 us 8.25 us 8.25 us 1583144 sched-pipe 1 2.03 us 2.03 us 2.03 us 5068 chrome The owner is unknown in most cases. Filtering only for the mutex locks, it will more likely get the owners. - -S/--callstack-filter is to limit display entries having the given string in the callstack: $ sudo ./perf lock contention -abv -S net sleep 1 ... contended total wait max wait avg wait type caller 5 70.20 us 16.13 us 14.04 us spinlock __dev_queue_xmit+0xb6d 0xffffffffa5dd1c60 _raw_spin_lock+0x30 0xffffffffa5b8f6ed __dev_queue_xmit+0xb6d 0xffffffffa5cd8267 ip6_finish_output2+0x2c7 0xffffffffa5cdac14 ip6_finish_output+0x1d4 0xffffffffa5cdb477 ip6_xmit+0x457 0xffffffffa5d1fd17 inet6_csk_xmit+0xd7 0xffffffffa5c5f4aa __tcp_transmit_skb+0x54a 0xffffffffa5c6467d tcp_keepalive_timer+0x2fd Please note that to have the -b option (BPF) working above one has to build with BUILD_BPF_SKEL=1. - Add more 'perf test' entries to test these new features. perf script: - Add 'cgroup' field for 'perf script' output: $ perf record --all-cgroups -- true $ perf script -F comm,pid,cgroup true 337112 /user.slice/user-657345.slice/user@657345.service/... true 337112 /user.slice/user-657345.slice/user@657345.service/... true 337112 /user.slice/user-657345.slice/user@657345.service/... true 337112 /user.slice/user-657345.slice/user@657345.service/... - Add support for showing branch speculation information in 'perf script' and in the 'perf report' raw dump (-D). perf record: - Fix 'perf record' segfault with --overwrite and --max-size. perf test/bench: - Switch basic BPF filtering test to use syscall tracepoint to avoid the variable number of probes inserted when using the previous probe point (do_epoll_wait) that happens on different CPU architectures. - Fix DWARF unwind test by adding non-inline to expected function in a backtrace. - Use 'grep -c' where the longer form 'grep | wc -l' was being used. - Add getpid and execve benchmarks to 'perf bench syscall'. Intel PT: - Add support for synthesizing "cycle" events from Intel PT traces as we support "instruction" events when Intel PT CYC packets are available. This enables much more accurate profiles than when using the regular 'perf record -e cycles' (the default) when the workload lasts for very short periods (<10ms). - .plt symbol handling improvements, better handling IBT (in the past MPX) done in the context of decoding Intel PT processor traces, IFUNC symbols on x86_64, static executables, understanding .plt.got symbols on x86_64. - Add a 'perf test' to test symbol resolution, part of the .plt improvements series, this tests things like symbol size in contexts where only the symbol start is available (kallsyms), etc. - Better handle auxtrace/Intel PT data when using pipe mode (perf record sleep 1|perf report). - Fix symbol lookup with kcore with multiple segments match stext, getting the symbol resolution to just show DSOs as unknown. ARM: - Timestamp improvements for ARM64 systems with ETMv4 (Embedded Trace Macrocell v4). - Ensure ARM64 CoreSight timestamps don't go backwards. - Document that ARM64 SPE (Statistical Profiling Extension) is used with 'perf c2c/mem'. - Add raw decoding for ARM64 SPEv1.2 previous branch address. - Update neoverse-n2-v2 ARM vendor events (JSON tables): topdown L1, TLB, cache, branch, PE utilization and instruction mix metrics. - Update decoder code for OpenCSD version 1.4, on ARM64 systems. - Fix command line auto-complete of CPU events on aarch64. Build: - Fix 'perf probe' and 'perf test' when libtraceevent isn't linked, as several tests use tracepoints, those should be skipped. - More fallout fixes for the removal of tools/lib/traceevent/. - Fix build error when linking with libpfm" * tag 'perf-tools-for-v6.3-1-2023-02-22' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (114 commits) perf tests stat_all_metrics: Change true workload to sleep workload for system wide check perf vendor events power10: Add JSON metric events to present CPI stall cycles in powerpc perf intel-pt: Synthesize cycle events perf c2c: Add report option to show false sharing in adjacent cachelines perf record: Fix segfault with --overwrite and --max-size perf stat: Avoid merging/aggregating metric counts twice perf tools: Fix perf tool build error in util/pfm.c perf tools: Fix auto-complete on aarch64 perf lock contention: Support old rw_semaphore type perf lock contention: Add -o/--lock-owner option perf lock contention: Fix to save callstack for the default modified perf test bpf: Skip test if kernel-debuginfo is not present perf probe: Update the exit error codes in function try_to_find_probe_trace_event perf script: Fix missing Retire Latency fields option documentation perf event x86: Add retire_lat when synthesizing PERF_SAMPLE_WEIGHT_STRUCT perf test x86: Support the retire_lat (Retire Latency) sample_type check perf test bpf: Check for libtraceevent support perf script: Support Retire Latency perf report: Support Retire Latency perf lock contention: Support filters for different aggregation ...
This commit is contained in:
commit
0df82189bc
@ -16323,6 +16323,7 @@ R: Mark Rutland <mark.rutland@arm.com>
|
||||
R: Alexander Shishkin <alexander.shishkin@linux.intel.com>
|
||||
R: Jiri Olsa <jolsa@kernel.org>
|
||||
R: Namhyung Kim <namhyung@kernel.org>
|
||||
R: Ian Rogers <irogers@google.com>
|
||||
L: linux-perf-users@vger.kernel.org
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Supported
|
||||
|
@ -1,16 +1,25 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __NR_perf_event_open
|
||||
# define __NR_perf_event_open 336
|
||||
#ifndef __NR_execve
|
||||
#define __NR_execve 11
|
||||
#endif
|
||||
#ifndef __NR_futex
|
||||
# define __NR_futex 240
|
||||
#ifndef __NR_getppid
|
||||
#define __NR_getppid 64
|
||||
#endif
|
||||
#ifndef __NR_getpgid
|
||||
#define __NR_getpgid 132
|
||||
#endif
|
||||
#ifndef __NR_gettid
|
||||
# define __NR_gettid 224
|
||||
#define __NR_gettid 224
|
||||
#endif
|
||||
#ifndef __NR_futex
|
||||
#define __NR_futex 240
|
||||
#endif
|
||||
#ifndef __NR_getcpu
|
||||
# define __NR_getcpu 318
|
||||
#define __NR_getcpu 318
|
||||
#endif
|
||||
#ifndef __NR_perf_event_open
|
||||
#define __NR_perf_event_open 336
|
||||
#endif
|
||||
#ifndef __NR_setns
|
||||
# define __NR_setns 346
|
||||
#define __NR_setns 346
|
||||
#endif
|
||||
|
@ -1,16 +1,25 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __NR_perf_event_open
|
||||
# define __NR_perf_event_open 298
|
||||
#ifndef __NR_execve
|
||||
#define __NR_execve 59
|
||||
#endif
|
||||
#ifndef __NR_futex
|
||||
# define __NR_futex 202
|
||||
#ifndef __NR_getppid
|
||||
#define __NR_getppid 110
|
||||
#endif
|
||||
#ifndef __NR_getpgid
|
||||
#define __NR_getpgid 121
|
||||
#endif
|
||||
#ifndef __NR_gettid
|
||||
# define __NR_gettid 186
|
||||
#define __NR_gettid 186
|
||||
#endif
|
||||
#ifndef __NR_getcpu
|
||||
# define __NR_getcpu 309
|
||||
#ifndef __NR_futex
|
||||
#define __NR_futex 202
|
||||
#endif
|
||||
#ifndef __NR_perf_event_open
|
||||
#define __NR_perf_event_open 298
|
||||
#endif
|
||||
#ifndef __NR_setns
|
||||
#define __NR_setns 308
|
||||
#endif
|
||||
#ifndef __NR_getcpu
|
||||
#define __NR_getcpu 309
|
||||
#endif
|
||||
|
@ -53,6 +53,7 @@ build-file := $(dir)/Build
|
||||
|
||||
quiet_cmd_flex = FLEX $@
|
||||
quiet_cmd_bison = BISON $@
|
||||
quiet_cmd_test = TEST $@
|
||||
|
||||
# Create directory unless it exists
|
||||
quiet_cmd_mkdir = MKDIR $(dir $@)
|
||||
|
1
tools/perf/.gitignore
vendored
1
tools/perf/.gitignore
vendored
@ -38,6 +38,7 @@ arch/*/include/generated/
|
||||
trace/beauty/generated/
|
||||
pmu-events/pmu-events.c
|
||||
pmu-events/jevents
|
||||
pmu-events/metric_test.log
|
||||
feature/
|
||||
libapi/
|
||||
libbpf/
|
||||
|
@ -1,4 +1,5 @@
|
||||
i synthesize instructions events
|
||||
y synthesize cycles events
|
||||
b synthesize branches events (branch misses for Arm SPE)
|
||||
c synthesize branches events (calls only)
|
||||
r synthesize branches events (returns only)
|
||||
@ -25,7 +26,7 @@
|
||||
A approximate IPC
|
||||
Z prefer to ignore timestamps (so-called "timeless" decoding)
|
||||
|
||||
The default is all events i.e. the same as --itrace=ibxwpe,
|
||||
The default is all events i.e. the same as --itrace=iybxwpe,
|
||||
except for perf script where it is --itrace=ce
|
||||
|
||||
In addition, the period (default 100000, except for perf script where it is 1)
|
||||
|
@ -18,7 +18,7 @@ COMMON OPTIONS
|
||||
--------------
|
||||
-r::
|
||||
--repeat=::
|
||||
Specify amount of times to repeat the run (default 10).
|
||||
Specify number of times to repeat the run (default 10).
|
||||
|
||||
-f::
|
||||
--format=::
|
||||
|
@ -22,7 +22,11 @@ you to track down the cacheline contentions.
|
||||
On Intel, the tool is based on load latency and precise store facility events
|
||||
provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling
|
||||
with thresholding feature. On AMD, the tool uses IBS op pmu (due to hardware
|
||||
limitations, perf c2c is not supported on Zen3 cpus).
|
||||
limitations, perf c2c is not supported on Zen3 cpus). On Arm64 it uses SPE to
|
||||
sample load and store operations, therefore hardware and kernel support is
|
||||
required. See linkperf:perf-arm-spe[1] for a setup guide. Due to the
|
||||
statistical nature of Arm SPE sampling, not every memory operation will be
|
||||
sampled.
|
||||
|
||||
These events provide:
|
||||
- memory address of the access
|
||||
@ -121,11 +125,17 @@ REPORT OPTIONS
|
||||
perf c2c record --call-graph lbr.
|
||||
Disabled by default. In common cases with call stack overflows,
|
||||
it can recreate better call stacks than the default lbr call stack
|
||||
output. But this approach is not full proof. There can be cases
|
||||
output. But this approach is not foolproof. There can be cases
|
||||
where it creates incorrect call stacks from incorrect matches.
|
||||
The known limitations include exception handing such as
|
||||
setjmp/longjmp will have calls/returns not match.
|
||||
|
||||
--double-cl::
|
||||
Group the detection of shared cacheline events into double cacheline
|
||||
granularity. Some architectures have an Adjacent Cacheline Prefetch
|
||||
feature, which causes cacheline sharing to behave like the cacheline
|
||||
size is doubled.
|
||||
|
||||
C2C RECORD
|
||||
----------
|
||||
The perf c2c record command setup options related to HITM cacheline analysis
|
||||
@ -333,4 +343,4 @@ Check Joe's blog on c2c tool for detailed use case explanation:
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-mem[1]
|
||||
linkperf:perf-record[1], linkperf:perf-mem[1], linkperf:perf-arm-spe[1]
|
||||
|
@ -101,12 +101,12 @@ data is available you can use the 'perf script' tool with all itrace sampling
|
||||
options, which will list all the samples.
|
||||
|
||||
perf record -e intel_pt//u ls
|
||||
perf script --itrace=ibxwpe
|
||||
perf script --itrace=iybxwpe
|
||||
|
||||
An interesting field that is not printed by default is 'flags' which can be
|
||||
displayed as follows:
|
||||
|
||||
perf script --itrace=ibxwpe -F+flags
|
||||
perf script --itrace=iybxwpe -F+flags
|
||||
|
||||
The flags are "bcrosyiABExghDt" which stand for branch, call, return, conditional,
|
||||
system, asynchronous, interrupt, transaction abort, trace begin, trace end,
|
||||
@ -147,16 +147,17 @@ displayed as follows:
|
||||
There are two ways that instructions-per-cycle (IPC) can be calculated depending
|
||||
on the recording.
|
||||
|
||||
If the 'cyc' config term (see config terms section below) was used, then IPC is
|
||||
calculated using the cycle count from CYC packets, otherwise MTC packets are
|
||||
used - refer to the 'mtc' config term. When MTC is used, however, the values
|
||||
are less accurate because the timing is less accurate.
|
||||
If the 'cyc' config term (see config terms section below) was used, then IPC
|
||||
and cycle events are calculated using the cycle count from CYC packets, otherwise
|
||||
MTC packets are used - refer to the 'mtc' config term. When MTC is used, however,
|
||||
the values are less accurate because the timing is less accurate.
|
||||
|
||||
Because Intel PT does not update the cycle count on every branch or instruction,
|
||||
the values will often be zero. When there are values, they will be the number
|
||||
of instructions and number of cycles since the last update, and thus represent
|
||||
the average IPC since the last IPC for that event type. Note IPC for "branches"
|
||||
events is calculated separately from IPC for "instructions" events.
|
||||
the average IPC cycle count since the last IPC for that event type.
|
||||
Note IPC for "branches" events is calculated separately from IPC for "instructions"
|
||||
events.
|
||||
|
||||
Even with the 'cyc' config term, it is possible to produce IPC information for
|
||||
every change of timestamp, but at the expense of accuracy. That is selected by
|
||||
@ -900,11 +901,12 @@ Having no option is the same as
|
||||
|
||||
which, in turn, is the same as
|
||||
|
||||
--itrace=cepwx
|
||||
--itrace=cepwxy
|
||||
|
||||
The letters are:
|
||||
|
||||
i synthesize "instructions" events
|
||||
y synthesize "cycles" events
|
||||
b synthesize "branches" events
|
||||
x synthesize "transactions" events
|
||||
w synthesize "ptwrite" events
|
||||
@ -927,6 +929,16 @@ The letters are:
|
||||
"Instructions" events look like they were recorded by "perf record -e
|
||||
instructions".
|
||||
|
||||
"Cycles" events look like they were recorded by "perf record -e cycles"
|
||||
(ie., the default). Note that even with CYC packets enabled and no sampling,
|
||||
these are not fully accurate, since CYC packets are not emitted for each
|
||||
instruction, only when some other event (like an indirect branch, or a
|
||||
TNT packet representing multiple branches) happens causes a packet to
|
||||
be emitted. Thus, it is more effective for attributing cycles to functions
|
||||
(and possibly basic blocks) than to individual instructions, although it
|
||||
is not even perfect for functions (although it becomes better if the noretcomp
|
||||
option is active).
|
||||
|
||||
"Branches" events look like they were recorded by "perf record -e branches". "c"
|
||||
and "r" can be combined to get calls and returns.
|
||||
|
||||
@ -934,9 +946,9 @@ and "r" can be combined to get calls and returns.
|
||||
'flags' field can be used in perf script to determine whether the event is a
|
||||
transaction start, commit or abort.
|
||||
|
||||
Note that "instructions", "branches" and "transactions" events depend on code
|
||||
flow packets which can be disabled by using the config term "branch=0". Refer
|
||||
to the config terms section above.
|
||||
Note that "instructions", "cycles", "branches" and "transactions" events
|
||||
depend on code flow packets which can be disabled by using the config term
|
||||
"branch=0". Refer to the config terms section above.
|
||||
|
||||
"ptwrite" events record the payload of the ptwrite instruction and whether
|
||||
"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are
|
||||
@ -1821,6 +1833,36 @@ Can be compiled and traced:
|
||||
$
|
||||
|
||||
|
||||
Pipe mode
|
||||
---------
|
||||
Pipe mode is a problem for Intel PT and possibly other auxtrace users.
|
||||
It's not recommended to use a pipe as data output with Intel PT because
|
||||
of the following reason.
|
||||
|
||||
Essentially the auxtrace buffers do not behave like the regular perf
|
||||
event buffers. That is because the head and tail are updated by
|
||||
software, but in the auxtrace case the data is written by hardware.
|
||||
So the head and tail do not get updated as data is written.
|
||||
|
||||
In the Intel PT case, the head and tail are updated only when the trace
|
||||
is disabled by software, for example:
|
||||
- full-trace, system wide : when buffer passes watermark
|
||||
- full-trace, not system-wide : when buffer passes watermark or
|
||||
context switches
|
||||
- snapshot mode : as above but also when a snapshot is made
|
||||
- sample mode : as above but also when a sample is made
|
||||
|
||||
That means finished-round ordering doesn't work. An auxtrace buffer
|
||||
can turn up that has data that extends back in time, possibly to the
|
||||
very beginning of tracing.
|
||||
|
||||
For a perf.data file, that problem is solved by going through the trace
|
||||
and queuing up the auxtrace buffers in advance.
|
||||
|
||||
For pipe mode, the order of events and timestamps can presumably
|
||||
be messed up.
|
||||
|
||||
|
||||
EXAMPLE
|
||||
-------
|
||||
|
||||
|
@ -232,7 +232,7 @@ This can be overridden by setting the kernel.perf_event_paranoid
|
||||
sysctl to -1, which allows non root to use these events.
|
||||
|
||||
For accessing trace point events perf needs to have read access to
|
||||
/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
|
||||
/sys/kernel/tracing, even when perf_event_paranoid is in a relaxed
|
||||
setting.
|
||||
|
||||
TRACING
|
||||
|
@ -172,6 +172,11 @@ CONTENTION OPTIONS
|
||||
--lock-addr::
|
||||
Show lock contention stat by address
|
||||
|
||||
-o::
|
||||
--lock-owner::
|
||||
Show lock contention stat by owners. Implies --threads and
|
||||
requires --use-bpf.
|
||||
|
||||
-Y::
|
||||
--type-filter=<value>::
|
||||
Show lock contention only for given lock types (comma separated list).
|
||||
@ -187,6 +192,12 @@ CONTENTION OPTIONS
|
||||
--lock-filter=<value>::
|
||||
Show lock contention only for given lock addresses or names (comma separated list).
|
||||
|
||||
-S::
|
||||
--callstack-filter=<value>::
|
||||
Show lock contention only if the callstack contains the given string.
|
||||
Note that it matches the substring so 'rq' would match both 'raw_spin_rq_lock'
|
||||
and 'irq_enter_rcu'.
|
||||
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
|
@ -23,6 +23,11 @@ Note that on Intel systems the memory latency reported is the use-latency,
|
||||
not the pure load (or store latency). Use latency includes any pipeline
|
||||
queueing delays in addition to the memory subsystem latency.
|
||||
|
||||
On Arm64 this uses SPE to sample load and store operations, therefore hardware
|
||||
and kernel support is required. See linkperf:perf-arm-spe[1] for a setup guide.
|
||||
Due to the statistical nature of SPE sampling, not every memory operation will
|
||||
be sampled.
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
<command>...::
|
||||
@ -93,4 +98,4 @@ all perf record options.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-report[1]
|
||||
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-arm-spe[1]
|
||||
|
@ -222,7 +222,7 @@ probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
|
||||
and 'ALN2' is end line number in the file. It is also possible to specify how
|
||||
many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good
|
||||
for searching a specific function when several functions share same name.
|
||||
So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
|
||||
So, "source.c:100-120" shows lines between 100th to 120th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
|
||||
|
||||
LAZY MATCHING
|
||||
-------------
|
||||
|
@ -115,6 +115,8 @@ OPTIONS
|
||||
- p_stage_cyc: On powerpc, this presents the number of cycles spent in a
|
||||
pipeline stage. And currently supported only on powerpc.
|
||||
- addr: (Full) virtual address of the sampled instruction
|
||||
- retire_lat: On X86, this reports pipeline stall of this instruction compared
|
||||
to the previous instruction in cycles. And currently supported only on X86
|
||||
|
||||
By default, comm, dso and symbol keys are used.
|
||||
(i.e. --sort comm,dso,symbol)
|
||||
@ -507,7 +509,7 @@ include::itrace.txt[]
|
||||
perf record --call-graph lbr.
|
||||
Disabled by default. In common cases with call stack overflows,
|
||||
it can recreate better call stacks than the default lbr call stack
|
||||
output. But this approach is not full proof. There can be cases
|
||||
output. But this approach is not foolproof. There can be cases
|
||||
where it creates incorrect call stacks from incorrect matches.
|
||||
The known limitations include exception handing such as
|
||||
setjmp/longjmp will have calls/returns not match.
|
||||
|
@ -55,7 +55,7 @@ Traces meant to be processed using a script should be recorded with
|
||||
the above option: -a to enable system-wide collection.
|
||||
|
||||
The format file for the sched_wakeup event defines the following fields
|
||||
(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
|
||||
(see /sys/kernel/tracing/events/sched/sched_wakeup/format):
|
||||
|
||||
----
|
||||
format:
|
||||
|
@ -319,7 +319,7 @@ So those are the essential steps in writing and running a script. The
|
||||
process can be generalized to any tracepoint or set of tracepoints
|
||||
you're interested in - basically find the tracepoint(s) you're
|
||||
interested in by looking at the list of available events shown by
|
||||
'perf list' and/or look in /sys/kernel/debug/tracing/events/ for
|
||||
'perf list' and/or look in /sys/kernel/tracing/events/ for
|
||||
detailed event and field info, record the corresponding trace data
|
||||
using 'perf record', passing it the list of interesting events,
|
||||
generate a skeleton script using 'perf script -g python' and modify the
|
||||
@ -449,7 +449,7 @@ Traces meant to be processed using a script should be recorded with
|
||||
the above option: -a to enable system-wide collection.
|
||||
|
||||
The format file for the sched_wakeup event defines the following fields
|
||||
(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
|
||||
(see /sys/kernel/tracing/events/sched/sched_wakeup/format):
|
||||
|
||||
----
|
||||
format:
|
||||
|
@ -134,7 +134,7 @@ OPTIONS
|
||||
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
|
||||
brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth,
|
||||
phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat,
|
||||
machine_pid, vcpu.
|
||||
machine_pid, vcpu, cgroup, retire_lat.
|
||||
Field list can be prepended with the type, trace, sw or hw,
|
||||
to indicate to which event type the field list applies.
|
||||
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
|
||||
@ -231,6 +231,9 @@ OPTIONS
|
||||
perf inject to insert a perf.data file recorded inside a virtual machine into
|
||||
a perf.data file recorded on the host at the same time.
|
||||
|
||||
The cgroup fields requires sample having the cgroup id which is saved
|
||||
when "--all-cgroups" option is passed to 'perf record'.
|
||||
|
||||
Finally, a user may not set fields to none for all event types.
|
||||
i.e., -F "" is not allowed.
|
||||
|
||||
@ -502,7 +505,7 @@ include::itrace.txt[]
|
||||
perf record --call-graph lbr.
|
||||
Disabled by default. In common cases with call stack overflows,
|
||||
it can recreate better call stacks than the default lbr call stack
|
||||
output. But this approach is not full proof. There can be cases
|
||||
output. But this approach is not foolproof. There can be cases
|
||||
where it creates incorrect call stacks from incorrect matches.
|
||||
The known limitations include exception handing such as
|
||||
setjmp/longjmp will have calls/returns not match.
|
||||
|
@ -34,3 +34,6 @@ OPTIONS
|
||||
-F::
|
||||
--dont-fork::
|
||||
Do not fork child for each test, run all tests within single process.
|
||||
|
||||
--dso::
|
||||
Specify a DSO for the "Symbols" test.
|
||||
|
@ -334,7 +334,7 @@ use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
|
||||
callgraph. The option must be used with --call-graph lbr recording.
|
||||
Disabled by default. In common cases with call stack overflows,
|
||||
it can recreate better call stacks than the default lbr call stack
|
||||
output. But this approach is not full proof. There can be cases
|
||||
output. But this approach is not foolproof. There can be cases
|
||||
where it creates incorrect call stacks from incorrect matches.
|
||||
The known limitations include exception handing such as
|
||||
setjmp/longjmp will have calls/returns not match.
|
||||
|
@ -1208,10 +1208,6 @@ ifneq ($(NO_LIBTRACEEVENT),1)
|
||||
LIBTRACEEVENT_VERSION_CPP := $(shell expr $(LIBTRACEEVENT_VERSION_1) \* 255 \* 255 + $(LIBTRACEEVENT_VERSION_2) \* 255 + $(LIBTRACEEVENT_VERSION_3))
|
||||
CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP)
|
||||
$(call detected,CONFIG_LIBTRACEEVENT)
|
||||
LIBTRACEEVENT_VERSION_WITH_TEP_FIELD_IS_RELATIVE := $(shell expr 1 \* 255 \* 255 + 5 \* 255 + 0) # 1.5.0
|
||||
ifeq ($(shell test $(LIBTRACEEVENT_VERSION_CPP) -gt $(LIBTRACEEVENT_VERSION_WITH_TEP_FIELD_IS_RELATIVE); echo $$?),0)
|
||||
CFLAGS += -DHAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
|
||||
endif
|
||||
else
|
||||
dummy := $(warning Warning: libtraceevent is missing limiting functionality, please install libtraceevent-dev/libtraceevent-devel)
|
||||
endif
|
||||
|
@ -1103,6 +1103,7 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(
|
||||
$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
|
||||
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
|
||||
$(OUTPUT)pmu-events/pmu-events.c \
|
||||
$(OUTPUT)pmu-events/metric_test.log \
|
||||
$(OUTPUT)$(fadvise_advice_array) \
|
||||
$(OUTPUT)$(fsconfig_arrays) \
|
||||
$(OUTPUT)$(fsmount_arrays) \
|
||||
|
@ -55,17 +55,16 @@ static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
|
||||
|
||||
static struct perf_pmu **find_all_hisi_ptt_pmus(int *nr_ptts, int *err)
|
||||
{
|
||||
const char *sysfs = sysfs__mountpoint();
|
||||
struct perf_pmu **hisi_ptt_pmus = NULL;
|
||||
struct dirent *dent;
|
||||
char path[PATH_MAX];
|
||||
DIR *dir = NULL;
|
||||
int idx = 0;
|
||||
|
||||
snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH, sysfs);
|
||||
perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
|
||||
dir = opendir(path);
|
||||
if (!dir) {
|
||||
pr_err("can't read directory '%s'\n", EVENT_SOURCE_DEVICE_PATH);
|
||||
pr_err("can't read directory '%s'\n", path);
|
||||
*err = -EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -53,7 +53,17 @@ static const char * const metadata_etmv4_ro[] = {
|
||||
[CS_ETMV4_TRCIDR2] = "trcidr/trcidr2",
|
||||
[CS_ETMV4_TRCIDR8] = "trcidr/trcidr8",
|
||||
[CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus",
|
||||
[CS_ETE_TRCDEVARCH] = "mgmt/trcdevarch"
|
||||
[CS_ETMV4_TS_SOURCE] = "ts_source",
|
||||
};
|
||||
|
||||
static const char * const metadata_ete_ro[] = {
|
||||
[CS_ETE_TRCIDR0] = "trcidr/trcidr0",
|
||||
[CS_ETE_TRCIDR1] = "trcidr/trcidr1",
|
||||
[CS_ETE_TRCIDR2] = "trcidr/trcidr2",
|
||||
[CS_ETE_TRCIDR8] = "trcidr/trcidr8",
|
||||
[CS_ETE_TRCAUTHSTATUS] = "mgmt/trcauthstatus",
|
||||
[CS_ETE_TRCDEVARCH] = "mgmt/trcdevarch",
|
||||
[CS_ETE_TS_SOURCE] = "ts_source",
|
||||
};
|
||||
|
||||
static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
|
||||
@ -273,9 +283,15 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
|
||||
|
||||
ret = perf_pmu__scan_file(pmu, path, "%x", &hash);
|
||||
if (ret != 1) {
|
||||
pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n",
|
||||
sink, evsel__name(evsel), errno,
|
||||
str_error_r(errno, msg, sizeof(msg)));
|
||||
if (errno == ENOENT)
|
||||
pr_err("Couldn't find sink \"%s\" on event %s\n"
|
||||
"Missing kernel or device support?\n\n"
|
||||
"Hint: An appropriate sink will be picked automatically if one isn't specified.\n",
|
||||
sink, evsel__name(evsel));
|
||||
else
|
||||
pr_err("Failed to set sink \"%s\" on event %s with %d (%s)\n",
|
||||
sink, evsel__name(evsel), errno,
|
||||
str_error_r(errno, msg, sizeof(msg)));
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -605,6 +621,32 @@ static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path)
|
||||
return val;
|
||||
}
|
||||
|
||||
static int cs_etm_get_ro_signed(struct perf_pmu *pmu, int cpu, const char *path)
|
||||
{
|
||||
char pmu_path[PATH_MAX];
|
||||
int scan;
|
||||
int val = 0;
|
||||
|
||||
/* Get RO metadata from sysfs */
|
||||
snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path);
|
||||
|
||||
scan = perf_pmu__scan_file(pmu, pmu_path, "%d", &val);
|
||||
if (scan != 1)
|
||||
pr_err("%s: error reading: %s\n", __func__, pmu_path);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static bool cs_etm_pmu_path_exists(struct perf_pmu *pmu, int cpu, const char *path)
|
||||
{
|
||||
char pmu_path[PATH_MAX];
|
||||
|
||||
/* Get RO metadata from sysfs */
|
||||
snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path);
|
||||
|
||||
return perf_pmu__file_exists(pmu, pmu_path);
|
||||
}
|
||||
|
||||
#define TRCDEVARCH_ARCHPART_SHIFT 0
|
||||
#define TRCDEVARCH_ARCHPART_MASK GENMASK(11, 0)
|
||||
#define TRCDEVARCH_ARCHPART(x) (((x) & TRCDEVARCH_ARCHPART_MASK) >> TRCDEVARCH_ARCHPART_SHIFT)
|
||||
@ -617,7 +659,7 @@ static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu)
|
||||
{
|
||||
struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr);
|
||||
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
|
||||
int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETE_TRCDEVARCH]);
|
||||
int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCDEVARCH]);
|
||||
|
||||
/*
|
||||
* ETE if ARCHVER is 5 (ARCHVER is 4 for ETM) and ARCHPART is 0xA13.
|
||||
@ -646,6 +688,51 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr,
|
||||
metadata_etmv4_ro[CS_ETMV4_TRCIDR8]);
|
||||
data[CS_ETMV4_TRCAUTHSTATUS] = cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
metadata_etmv4_ro[CS_ETMV4_TRCAUTHSTATUS]);
|
||||
|
||||
/* Kernels older than 5.19 may not expose ts_source */
|
||||
if (cs_etm_pmu_path_exists(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETMV4_TS_SOURCE]))
|
||||
data[CS_ETMV4_TS_SOURCE] = (__u64) cs_etm_get_ro_signed(cs_etm_pmu, cpu,
|
||||
metadata_etmv4_ro[CS_ETMV4_TS_SOURCE]);
|
||||
else {
|
||||
pr_warning("[%03d] pmu file 'ts_source' not found. Fallback to safe value (-1)\n",
|
||||
cpu);
|
||||
data[CS_ETMV4_TS_SOURCE] = (__u64) -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, int cpu)
|
||||
{
|
||||
struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr);
|
||||
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
|
||||
|
||||
/* Get trace configuration register */
|
||||
data[CS_ETE_TRCCONFIGR] = cs_etmv4_get_config(itr);
|
||||
/* Get traceID from the framework */
|
||||
data[CS_ETE_TRCTRACEIDR] = coresight_get_trace_id(cpu);
|
||||
/* Get read-only information from sysFS */
|
||||
data[CS_ETE_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
metadata_ete_ro[CS_ETE_TRCIDR0]);
|
||||
data[CS_ETE_TRCIDR1] = cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
metadata_ete_ro[CS_ETE_TRCIDR1]);
|
||||
data[CS_ETE_TRCIDR2] = cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
metadata_ete_ro[CS_ETE_TRCIDR2]);
|
||||
data[CS_ETE_TRCIDR8] = cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
metadata_ete_ro[CS_ETE_TRCIDR8]);
|
||||
data[CS_ETE_TRCAUTHSTATUS] = cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
metadata_ete_ro[CS_ETE_TRCAUTHSTATUS]);
|
||||
/* ETE uses the same registers as ETMv4 plus TRCDEVARCH */
|
||||
data[CS_ETE_TRCDEVARCH] = cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
metadata_ete_ro[CS_ETE_TRCDEVARCH]);
|
||||
|
||||
/* Kernels older than 5.19 may not expose ts_source */
|
||||
if (cs_etm_pmu_path_exists(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TS_SOURCE]))
|
||||
data[CS_ETE_TS_SOURCE] = (__u64) cs_etm_get_ro_signed(cs_etm_pmu, cpu,
|
||||
metadata_ete_ro[CS_ETE_TS_SOURCE]);
|
||||
else {
|
||||
pr_warning("[%03d] pmu file 'ts_source' not found. Fallback to safe value (-1)\n",
|
||||
cpu);
|
||||
data[CS_ETE_TS_SOURCE] = (__u64) -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void cs_etm_get_metadata(int cpu, u32 *offset,
|
||||
@ -661,11 +748,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
|
||||
/* first see what kind of tracer this cpu is affined to */
|
||||
if (cs_etm_is_ete(itr, cpu)) {
|
||||
magic = __perf_cs_ete_magic;
|
||||
/* ETE uses the same registers as ETMv4 plus TRCDEVARCH */
|
||||
cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu);
|
||||
info->priv[*offset + CS_ETE_TRCDEVARCH] =
|
||||
cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
metadata_etmv4_ro[CS_ETE_TRCDEVARCH]);
|
||||
cs_etm_save_ete_header(&info->priv[*offset], itr, cpu);
|
||||
|
||||
/* How much space was used */
|
||||
increment = CS_ETE_PRIV_MAX;
|
||||
|
@ -3,8 +3,10 @@
|
||||
#include <internal/cpumap.h>
|
||||
#include "../../../util/cpumap.h"
|
||||
#include "../../../util/pmu.h"
|
||||
#include <api/fs/fs.h>
|
||||
#include <math.h>
|
||||
|
||||
const struct pmu_events_table *pmu_events_table__find(void)
|
||||
static struct perf_pmu *pmu__find_core_pmu(void)
|
||||
{
|
||||
struct perf_pmu *pmu = NULL;
|
||||
|
||||
@ -19,8 +21,46 @@ const struct pmu_events_table *pmu_events_table__find(void)
|
||||
if (pmu->cpus->nr != cpu__max_cpu().cpu)
|
||||
return NULL;
|
||||
|
||||
return perf_pmu__find_table(pmu);
|
||||
return pmu;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct pmu_metrics_table *pmu_metrics_table__find(void)
|
||||
{
|
||||
struct perf_pmu *pmu = pmu__find_core_pmu();
|
||||
|
||||
if (pmu)
|
||||
return perf_pmu__find_metrics_table(pmu);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct pmu_events_table *pmu_events_table__find(void)
|
||||
{
|
||||
struct perf_pmu *pmu = pmu__find_core_pmu();
|
||||
|
||||
if (pmu)
|
||||
return perf_pmu__find_events_table(pmu);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
double perf_pmu__cpu_slots_per_cycle(void)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
unsigned long long slots = 0;
|
||||
struct perf_pmu *pmu = pmu__find_core_pmu();
|
||||
|
||||
if (pmu) {
|
||||
perf_pmu__pathname_scnprintf(path, sizeof(path),
|
||||
pmu->name, "caps/slots");
|
||||
/*
|
||||
* The value of slots is not greater than 32 bits, but sysfs__read_int
|
||||
* can't read value with 0x prefix, so use sysfs__read_ull instead.
|
||||
*/
|
||||
sysfs__read_ull(path, &slots);
|
||||
}
|
||||
|
||||
return slots ? (double)slots : NAN;
|
||||
}
|
||||
|
@ -40,11 +40,11 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
|
||||
return bufp;
|
||||
}
|
||||
|
||||
int arch_get_runtimeparam(const struct pmu_event *pe)
|
||||
int arch_get_runtimeparam(const struct pmu_metric *pm)
|
||||
{
|
||||
int count;
|
||||
char path[PATH_MAX] = "/devices/hv_24x7/interface/";
|
||||
|
||||
atoi(pe->aggr_mode) == PerChip ? strcat(path, "sockets") : strcat(path, "coresperchip");
|
||||
atoi(pm->aggr_mode) == PerChip ? strcat(path, "sockets") : strcat(path, "coresperchip");
|
||||
return sysfs__read_int(path, &count) < 0 ? 1 : count;
|
||||
}
|
||||
|
@ -27,8 +27,10 @@ static bool samples_same(const struct perf_sample *s1,
|
||||
const struct perf_sample *s2,
|
||||
u64 type)
|
||||
{
|
||||
if (type & PERF_SAMPLE_WEIGHT_STRUCT)
|
||||
if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
|
||||
COMP(ins_lat);
|
||||
COMP(retire_lat);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -48,6 +50,7 @@ static int do_test(u64 sample_type)
|
||||
struct perf_sample sample = {
|
||||
.weight = 101,
|
||||
.ins_lat = 102,
|
||||
.retire_lat = 103,
|
||||
};
|
||||
struct perf_sample sample_out;
|
||||
size_t i, sz, bufsz;
|
||||
|
@ -89,6 +89,7 @@ void arch_perf_parse_sample_weight(struct perf_sample *data,
|
||||
else {
|
||||
data->weight = weight.var1_dw;
|
||||
data->ins_lat = weight.var2_w;
|
||||
data->retire_lat = weight.var3_w;
|
||||
}
|
||||
}
|
||||
|
||||
@ -100,5 +101,25 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
|
||||
if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
|
||||
*array &= 0xffffffff;
|
||||
*array |= ((u64)data->ins_lat << 32);
|
||||
*array |= ((u64)data->retire_lat << 48);
|
||||
}
|
||||
}
|
||||
|
||||
const char *arch_perf_header_entry(const char *se_header)
|
||||
{
|
||||
if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
|
||||
return "Local Retire Latency";
|
||||
else if (!strcmp(se_header, "Pipeline Stage Cycle"))
|
||||
return "Retire Latency";
|
||||
|
||||
return se_header;
|
||||
}
|
||||
|
||||
int arch_support_sort_key(const char *sort_key)
|
||||
{
|
||||
if (!strcmp(sort_key, "p_stage_cyc"))
|
||||
return 1;
|
||||
if (!strcmp(sort_key, "local_p_stage_cyc"))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
@ -15,8 +15,6 @@
|
||||
#include "../../../util/pmu.h"
|
||||
#include "../../../util/fncache.h"
|
||||
|
||||
#define TEMPLATE_ALIAS "%s/bus/event_source/devices/%s/alias"
|
||||
|
||||
struct pmu_alias {
|
||||
char *name;
|
||||
char *alias;
|
||||
@ -72,18 +70,14 @@ static int setup_pmu_alias_list(void)
|
||||
char path[PATH_MAX];
|
||||
DIR *dir;
|
||||
struct dirent *dent;
|
||||
const char *sysfs = sysfs__mountpoint();
|
||||
struct pmu_alias *pmu_alias;
|
||||
char buf[MAX_PMU_NAME_LEN];
|
||||
FILE *file;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
if (!sysfs)
|
||||
if (!perf_pmu__event_source_devices_scnprintf(path, sizeof(path)))
|
||||
return -1;
|
||||
|
||||
snprintf(path, PATH_MAX,
|
||||
"%s" EVENT_SOURCE_DEVICE_PATH, sysfs);
|
||||
|
||||
dir = opendir(path);
|
||||
if (!dir)
|
||||
return -errno;
|
||||
@ -93,9 +87,7 @@ static int setup_pmu_alias_list(void)
|
||||
!strcmp(dent->d_name, ".."))
|
||||
continue;
|
||||
|
||||
snprintf(path, PATH_MAX,
|
||||
TEMPLATE_ALIAS, sysfs, dent->d_name);
|
||||
|
||||
perf_pmu__pathname_scnprintf(path, sizeof(path), dent->d_name, "alias");
|
||||
if (!file_available(path))
|
||||
continue;
|
||||
|
||||
|
@ -22,6 +22,8 @@ int bench_numa(int argc, const char **argv);
|
||||
int bench_sched_messaging(int argc, const char **argv);
|
||||
int bench_sched_pipe(int argc, const char **argv);
|
||||
int bench_syscall_basic(int argc, const char **argv);
|
||||
int bench_syscall_getpgid(int argc, const char **argv);
|
||||
int bench_syscall_execve(int argc, const char **argv);
|
||||
int bench_mem_memcpy(int argc, const char **argv);
|
||||
int bench_mem_memset(int argc, const char **argv);
|
||||
int bench_mem_find_bit(int argc, const char **argv);
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <sys/time.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
@ -30,25 +31,77 @@ static const char * const bench_syscall_usage[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
int bench_syscall_basic(int argc, const char **argv)
|
||||
static void test_execve(void)
|
||||
{
|
||||
const char *pathname = "/bin/true";
|
||||
char *const argv[] = { (char *)pathname, NULL };
|
||||
pid_t pid = fork();
|
||||
|
||||
if (pid < 0) {
|
||||
fprintf(stderr, "fork failed\n");
|
||||
exit(1);
|
||||
} else if (pid == 0) {
|
||||
execve(pathname, argv, NULL);
|
||||
fprintf(stderr, "execve /bin/true failed\n");
|
||||
exit(1);
|
||||
} else {
|
||||
if (waitpid(pid, NULL, 0) < 0) {
|
||||
fprintf(stderr, "waitpid failed\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int bench_syscall_common(int argc, const char **argv, int syscall)
|
||||
{
|
||||
struct timeval start, stop, diff;
|
||||
unsigned long long result_usec = 0;
|
||||
const char *name = NULL;
|
||||
int i;
|
||||
|
||||
argc = parse_options(argc, argv, options, bench_syscall_usage, 0);
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
|
||||
for (i = 0; i < loops; i++)
|
||||
getppid();
|
||||
for (i = 0; i < loops; i++) {
|
||||
switch (syscall) {
|
||||
case __NR_getppid:
|
||||
getppid();
|
||||
break;
|
||||
case __NR_getpgid:
|
||||
getpgid(0);
|
||||
break;
|
||||
case __NR_execve:
|
||||
test_execve();
|
||||
/* Only loop 10000 times to save time */
|
||||
if (i == 10000)
|
||||
loops = 10000;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday(&stop, NULL);
|
||||
timersub(&stop, &start, &diff);
|
||||
|
||||
switch (syscall) {
|
||||
case __NR_getppid:
|
||||
name = "getppid()";
|
||||
break;
|
||||
case __NR_getpgid:
|
||||
name = "getpgid()";
|
||||
break;
|
||||
case __NR_execve:
|
||||
name = "execve()";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (bench_format) {
|
||||
case BENCH_FORMAT_DEFAULT:
|
||||
printf("# Executed %'d getppid() calls\n", loops);
|
||||
printf("# Executed %'d %s calls\n", loops, name);
|
||||
|
||||
result_usec = diff.tv_sec * 1000000;
|
||||
result_usec += diff.tv_usec;
|
||||
@ -79,3 +132,18 @@ int bench_syscall_basic(int argc, const char **argv)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bench_syscall_basic(int argc, const char **argv)
|
||||
{
|
||||
return bench_syscall_common(argc, argv, __NR_getppid);
|
||||
}
|
||||
|
||||
int bench_syscall_getpgid(int argc, const char **argv)
|
||||
{
|
||||
return bench_syscall_common(argc, argv, __NR_getpgid);
|
||||
}
|
||||
|
||||
int bench_syscall_execve(int argc, const char **argv)
|
||||
{
|
||||
return bench_syscall_common(argc, argv, __NR_execve);
|
||||
}
|
||||
|
@ -52,6 +52,8 @@ static struct bench sched_benchmarks[] = {
|
||||
|
||||
static struct bench syscall_benchmarks[] = {
|
||||
{ "basic", "Benchmark for basic getppid(2) calls", bench_syscall_basic },
|
||||
{ "getpgid", "Benchmark for getpgid(2) calls", bench_syscall_getpgid },
|
||||
{ "execve", "Benchmark for execve(2) calls", bench_syscall_execve },
|
||||
{ "all", "Run all syscall benchmarks", NULL },
|
||||
{ NULL, NULL, NULL },
|
||||
};
|
||||
@ -150,7 +152,7 @@ unsigned int bench_repeat = 10; /* default number of times to repeat the run */
|
||||
|
||||
static const struct option bench_options[] = {
|
||||
OPT_STRING('f', "format", &bench_format_str, "default|simple", "Specify the output formatting style"),
|
||||
OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify amount of times to repeat the run"),
|
||||
OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify number of times to repeat the run"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
|
@ -524,7 +524,7 @@ static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
||||
char buf[20];
|
||||
|
||||
if (he->mem_info)
|
||||
addr = cl_address(he->mem_info->daddr.addr);
|
||||
addr = cl_address(he->mem_info->daddr.addr, chk_double_cl);
|
||||
|
||||
return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
|
||||
}
|
||||
@ -562,7 +562,7 @@ static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
||||
char buf[20];
|
||||
|
||||
if (he->mem_info)
|
||||
addr = cl_offset(he->mem_info->daddr.al_addr);
|
||||
addr = cl_offset(he->mem_info->daddr.al_addr, chk_double_cl);
|
||||
|
||||
return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
|
||||
}
|
||||
@ -574,9 +574,10 @@ offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
|
||||
uint64_t l = 0, r = 0;
|
||||
|
||||
if (left->mem_info)
|
||||
l = cl_offset(left->mem_info->daddr.addr);
|
||||
l = cl_offset(left->mem_info->daddr.addr, chk_double_cl);
|
||||
|
||||
if (right->mem_info)
|
||||
r = cl_offset(right->mem_info->daddr.addr);
|
||||
r = cl_offset(right->mem_info->daddr.addr, chk_double_cl);
|
||||
|
||||
return (int64_t)(r - l);
|
||||
}
|
||||
@ -2590,7 +2591,7 @@ perf_c2c_cacheline_browser__title(struct hist_browser *browser,
|
||||
he = cl_browser->he;
|
||||
|
||||
if (he->mem_info)
|
||||
addr = cl_address(he->mem_info->daddr.addr);
|
||||
addr = cl_address(he->mem_info->daddr.addr, chk_double_cl);
|
||||
|
||||
scnprintf(bf, size, "Cacheline 0x%lx", addr);
|
||||
return 0;
|
||||
@ -2788,15 +2789,16 @@ static int ui_quirks(void)
|
||||
if (!c2c.use_stdio) {
|
||||
dim_offset.width = 5;
|
||||
dim_offset.header = header_offset_tui;
|
||||
nodestr = "CL";
|
||||
nodestr = chk_double_cl ? "Double-CL" : "CL";
|
||||
}
|
||||
|
||||
dim_percent_costly_snoop.header = percent_costly_snoop_header[c2c.display];
|
||||
|
||||
/* Fix the zero line for dcacheline column. */
|
||||
buf = fill_line("Cacheline", dim_dcacheline.width +
|
||||
dim_dcacheline_node.width +
|
||||
dim_dcacheline_count.width + 4);
|
||||
buf = fill_line(chk_double_cl ? "Double-Cacheline" : "Cacheline",
|
||||
dim_dcacheline.width +
|
||||
dim_dcacheline_node.width +
|
||||
dim_dcacheline_count.width + 4);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -3037,6 +3039,7 @@ static int perf_c2c__report(int argc, const char **argv)
|
||||
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
|
||||
OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr,
|
||||
"Enable LBR callgraph stitching approach"),
|
||||
OPT_BOOLEAN(0, "double-cl", &chk_double_cl, "Detect adjacent cacheline false sharing"),
|
||||
OPT_PARENT(c2c_options),
|
||||
OPT_END()
|
||||
};
|
||||
|
@ -215,14 +215,14 @@ static int perf_event__repipe_event_update(struct perf_tool *tool,
|
||||
|
||||
#ifdef HAVE_AUXTRACE_SUPPORT
|
||||
|
||||
static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
|
||||
static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
|
||||
{
|
||||
char buf[4096];
|
||||
ssize_t ssz;
|
||||
int ret;
|
||||
|
||||
while (size > 0) {
|
||||
ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
|
||||
ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf)));
|
||||
if (ssz < 0)
|
||||
return -errno;
|
||||
ret = output_bytes(inject, buf, ssz);
|
||||
@ -260,7 +260,7 @@ static s64 perf_event__repipe_auxtrace(struct perf_session *session,
|
||||
ret = output_bytes(inject, event, event->header.size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = copy_bytes(inject, perf_data__fd(session->data),
|
||||
ret = copy_bytes(inject, session->data,
|
||||
event->auxtrace.size);
|
||||
} else {
|
||||
ret = output_bytes(inject, event,
|
||||
|
@ -99,8 +99,7 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
|
||||
const char *scale_unit __maybe_unused,
|
||||
bool deprecated, const char *event_type_desc,
|
||||
const char *desc, const char *long_desc,
|
||||
const char *encoding_desc,
|
||||
const char *metric_name, const char *metric_expr)
|
||||
const char *encoding_desc)
|
||||
{
|
||||
struct print_state *print_state = ps;
|
||||
int pos;
|
||||
@ -159,10 +158,6 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
|
||||
if (print_state->detailed && encoding_desc) {
|
||||
printf("%*s", 8, "");
|
||||
wordwrap(encoding_desc, 8, pager_get_columns(), 0);
|
||||
if (metric_name)
|
||||
printf(" MetricName: %s", metric_name);
|
||||
if (metric_expr)
|
||||
printf(" MetricExpr: %s", metric_expr);
|
||||
putchar('\n');
|
||||
}
|
||||
}
|
||||
@ -308,8 +303,7 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic,
|
||||
const char *scale_unit,
|
||||
bool deprecated, const char *event_type_desc,
|
||||
const char *desc, const char *long_desc,
|
||||
const char *encoding_desc,
|
||||
const char *metric_name, const char *metric_expr)
|
||||
const char *encoding_desc)
|
||||
{
|
||||
struct json_print_state *print_state = ps;
|
||||
bool need_sep = false;
|
||||
@ -366,16 +360,6 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic,
|
||||
encoding_desc);
|
||||
need_sep = true;
|
||||
}
|
||||
if (metric_name) {
|
||||
fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "",
|
||||
metric_name);
|
||||
need_sep = true;
|
||||
}
|
||||
if (metric_expr) {
|
||||
fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "",
|
||||
metric_expr);
|
||||
need_sep = true;
|
||||
}
|
||||
printf("%s}", need_sep ? "\n" : "");
|
||||
strbuf_release(&buf);
|
||||
}
|
||||
|
@ -58,16 +58,28 @@ static struct rb_root thread_stats;
|
||||
static bool combine_locks;
|
||||
static bool show_thread_stats;
|
||||
static bool show_lock_addrs;
|
||||
static bool show_lock_owner;
|
||||
static bool use_bpf;
|
||||
static unsigned long bpf_map_entries = 10240;
|
||||
static int max_stack_depth = CONTENTION_STACK_DEPTH;
|
||||
static int stack_skip = CONTENTION_STACK_SKIP;
|
||||
static int print_nr_entries = INT_MAX / 2;
|
||||
static LIST_HEAD(callstack_filters);
|
||||
|
||||
struct callstack_filter {
|
||||
struct list_head list;
|
||||
char name[];
|
||||
};
|
||||
|
||||
static struct lock_filter filters;
|
||||
|
||||
static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
|
||||
|
||||
static bool needs_callstack(void)
|
||||
{
|
||||
return verbose > 0 || !list_empty(&callstack_filters);
|
||||
}
|
||||
|
||||
static struct thread_stat *thread_stat_find(u32 tid)
|
||||
{
|
||||
struct rb_node *node;
|
||||
@ -454,7 +466,7 @@ static struct lock_stat *pop_from_result(void)
|
||||
return container_of(node, struct lock_stat, rb);
|
||||
}
|
||||
|
||||
static struct lock_stat *lock_stat_find(u64 addr)
|
||||
struct lock_stat *lock_stat_find(u64 addr)
|
||||
{
|
||||
struct hlist_head *entry = lockhashentry(addr);
|
||||
struct lock_stat *ret;
|
||||
@ -466,7 +478,7 @@ static struct lock_stat *lock_stat_find(u64 addr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
|
||||
struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
|
||||
{
|
||||
struct hlist_head *entry = lockhashentry(addr);
|
||||
struct lock_stat *ret, *new;
|
||||
@ -498,6 +510,34 @@ static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool match_callstack_filter(struct machine *machine, u64 *callstack)
|
||||
{
|
||||
struct map *kmap;
|
||||
struct symbol *sym;
|
||||
u64 ip;
|
||||
|
||||
if (list_empty(&callstack_filters))
|
||||
return true;
|
||||
|
||||
for (int i = 0; i < max_stack_depth; i++) {
|
||||
struct callstack_filter *filter;
|
||||
|
||||
if (!callstack || !callstack[i])
|
||||
break;
|
||||
|
||||
ip = callstack[i];
|
||||
sym = machine__find_kernel_symbol(machine, ip, &kmap);
|
||||
if (sym == NULL)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(filter, &callstack_filters, list) {
|
||||
if (strstr(sym->name, filter->name))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
struct trace_lock_handler {
|
||||
/* it's used on CONFIG_LOCKDEP */
|
||||
int (*acquire_event)(struct evsel *evsel,
|
||||
@ -1059,12 +1099,6 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
|
||||
ls = lock_stat_findnew(key, name, flags);
|
||||
if (!ls)
|
||||
return -ENOMEM;
|
||||
|
||||
if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
|
||||
ls->callstack = get_callstack(sample, max_stack_depth);
|
||||
if (ls->callstack == NULL)
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (filters.nr_types) {
|
||||
@ -1095,6 +1129,22 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (needs_callstack()) {
|
||||
u64 *callstack = get_callstack(sample, max_stack_depth);
|
||||
if (callstack == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!match_callstack_filter(machine, callstack)) {
|
||||
free(callstack);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ls->callstack == NULL)
|
||||
ls->callstack = callstack;
|
||||
else
|
||||
free(callstack);
|
||||
}
|
||||
|
||||
ts = thread_stat_findnew(sample->tid);
|
||||
if (!ts)
|
||||
return -ENOMEM;
|
||||
@ -1567,7 +1617,8 @@ static void print_contention_result(struct lock_contention *con)
|
||||
|
||||
switch (aggr_mode) {
|
||||
case LOCK_AGGR_TASK:
|
||||
pr_info(" %10s %s\n\n", "pid", "comm");
|
||||
pr_info(" %10s %s\n\n", "pid",
|
||||
show_lock_owner ? "owner" : "comm");
|
||||
break;
|
||||
case LOCK_AGGR_CALLER:
|
||||
pr_info(" %10s %s\n\n", "type", "caller");
|
||||
@ -1607,7 +1658,8 @@ static void print_contention_result(struct lock_contention *con)
|
||||
case LOCK_AGGR_TASK:
|
||||
pid = st->addr;
|
||||
t = perf_session__findnew(session, pid);
|
||||
pr_info(" %10d %s\n", pid, thread__comm_str(t));
|
||||
pr_info(" %10d %s\n",
|
||||
pid, pid == -1 ? "Unknown" : thread__comm_str(t));
|
||||
break;
|
||||
case LOCK_AGGR_ADDR:
|
||||
pr_info(" %016llx %s\n", (unsigned long long)st->addr,
|
||||
@ -1719,6 +1771,37 @@ static void sighandler(int sig __maybe_unused)
|
||||
{
|
||||
}
|
||||
|
||||
static int check_lock_contention_options(const struct option *options,
|
||||
const char * const *usage)
|
||||
|
||||
{
|
||||
if (show_thread_stats && show_lock_addrs) {
|
||||
pr_err("Cannot use thread and addr mode together\n");
|
||||
parse_options_usage(usage, options, "threads", 0);
|
||||
parse_options_usage(NULL, options, "lock-addr", 0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (show_lock_owner && !use_bpf) {
|
||||
pr_err("Lock owners are available only with BPF\n");
|
||||
parse_options_usage(usage, options, "lock-owner", 0);
|
||||
parse_options_usage(NULL, options, "use-bpf", 0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (show_lock_owner && show_lock_addrs) {
|
||||
pr_err("Cannot use owner and addr mode together\n");
|
||||
parse_options_usage(usage, options, "lock-owner", 0);
|
||||
parse_options_usage(NULL, options, "lock-addr", 0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (show_lock_owner)
|
||||
show_thread_stats = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __cmd_contention(int argc, const char **argv)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
@ -1743,6 +1826,8 @@ static int __cmd_contention(int argc, const char **argv)
|
||||
.max_stack = max_stack_depth,
|
||||
.stack_skip = stack_skip,
|
||||
.filters = &filters,
|
||||
.save_callstack = needs_callstack(),
|
||||
.owner = show_lock_owner,
|
||||
};
|
||||
|
||||
session = perf_session__new(use_bpf ? NULL : &data, &eops);
|
||||
@ -1756,6 +1841,9 @@ static int __cmd_contention(int argc, const char **argv)
|
||||
con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
|
||||
show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER;
|
||||
|
||||
if (con.aggr_mode == LOCK_AGGR_CALLER)
|
||||
con.save_callstack = true;
|
||||
|
||||
/* for lock function check */
|
||||
symbol_conf.sort_by_name = true;
|
||||
symbol_conf.allow_aliases = true;
|
||||
@ -2123,6 +2211,33 @@ static int parse_lock_addr(const struct option *opt __maybe_unused, const char *
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
|
||||
int unset __maybe_unused)
|
||||
{
|
||||
char *s, *tmp, *tok;
|
||||
int ret = 0;
|
||||
|
||||
s = strdup(str);
|
||||
if (s == NULL)
|
||||
return -1;
|
||||
|
||||
for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
|
||||
struct callstack_filter *entry;
|
||||
|
||||
entry = malloc(sizeof(*entry) + strlen(tok) + 1);
|
||||
if (entry == NULL) {
|
||||
pr_err("Memory allocation failure\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
strcpy(entry->name, tok);
|
||||
list_add_tail(&entry->list, &callstack_filters);
|
||||
}
|
||||
|
||||
free(s);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cmd_lock(int argc, const char **argv)
|
||||
{
|
||||
const struct option lock_options[] = {
|
||||
@ -2190,6 +2305,9 @@ int cmd_lock(int argc, const char **argv)
|
||||
"Filter specific type of locks", parse_lock_type),
|
||||
OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
|
||||
"Filter specific address/symbol of locks", parse_lock_addr),
|
||||
OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
|
||||
"Filter specific function in the callstack", parse_call_stack),
|
||||
OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
|
||||
OPT_PARENT(lock_options)
|
||||
};
|
||||
|
||||
@ -2260,14 +2378,9 @@ int cmd_lock(int argc, const char **argv)
|
||||
contention_usage, 0);
|
||||
}
|
||||
|
||||
if (show_thread_stats && show_lock_addrs) {
|
||||
pr_err("Cannot use thread and addr mode together\n");
|
||||
parse_options_usage(contention_usage, contention_options,
|
||||
"threads", 0);
|
||||
parse_options_usage(NULL, contention_options,
|
||||
"lock-addr", 0);
|
||||
if (check_lock_contention_options(contention_options,
|
||||
contention_usage) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
rc = __cmd_contention(argc, argv);
|
||||
} else {
|
||||
|
@ -383,9 +383,18 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
|
||||
|
||||
/* Note that it is possible to skip all events because of blacklist */
|
||||
if (event) {
|
||||
#ifndef HAVE_LIBTRACEEVENT
|
||||
pr_info("\nperf is not linked with libtraceevent, to use the new probe you can use tracefs:\n\n");
|
||||
pr_info("\tcd /sys/kernel/tracing/\n");
|
||||
pr_info("\techo 1 > events/%s/%s/enable\n", group, event);
|
||||
pr_info("\techo 1 > tracing_on\n");
|
||||
pr_info("\tcat trace_pipe\n");
|
||||
pr_info("\tBefore removing the probe, echo 0 > events/%s/%s/enable\n", group, event);
|
||||
#else
|
||||
/* Show how to use the event. */
|
||||
pr_info("\nYou can now use it in all perf tools, such as:\n\n");
|
||||
pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event);
|
||||
#endif
|
||||
}
|
||||
|
||||
out_cleanup:
|
||||
|
@ -154,6 +154,7 @@ struct record {
|
||||
struct perf_tool tool;
|
||||
struct record_opts opts;
|
||||
u64 bytes_written;
|
||||
u64 thread_bytes_written;
|
||||
struct perf_data data;
|
||||
struct auxtrace_record *itr;
|
||||
struct evlist *evlist;
|
||||
@ -226,14 +227,7 @@ static bool switch_output_time(struct record *rec)
|
||||
|
||||
static u64 record__bytes_written(struct record *rec)
|
||||
{
|
||||
int t;
|
||||
u64 bytes_written = rec->bytes_written;
|
||||
struct record_thread *thread_data = rec->thread_data;
|
||||
|
||||
for (t = 0; t < rec->nr_threads; t++)
|
||||
bytes_written += thread_data[t].bytes_written;
|
||||
|
||||
return bytes_written;
|
||||
return rec->bytes_written + rec->thread_bytes_written;
|
||||
}
|
||||
|
||||
static bool record__output_max_size_exceeded(struct record *rec)
|
||||
@ -255,10 +249,12 @@ static int record__write(struct record *rec, struct mmap *map __maybe_unused,
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (map && map->file)
|
||||
if (map && map->file) {
|
||||
thread->bytes_written += size;
|
||||
else
|
||||
rec->thread_bytes_written += size;
|
||||
} else {
|
||||
rec->bytes_written += size;
|
||||
}
|
||||
|
||||
if (record__output_max_size_exceeded(rec) && !done) {
|
||||
fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
|
||||
|
@ -59,6 +59,7 @@
|
||||
#include "util/dlfilter.h"
|
||||
#include "util/record.h"
|
||||
#include "util/util.h"
|
||||
#include "util/cgroup.h"
|
||||
#include "perf.h"
|
||||
|
||||
#include <linux/ctype.h>
|
||||
@ -130,6 +131,8 @@ enum perf_output_field {
|
||||
PERF_OUTPUT_BRSTACKINSNLEN = 1ULL << 36,
|
||||
PERF_OUTPUT_MACHINE_PID = 1ULL << 37,
|
||||
PERF_OUTPUT_VCPU = 1ULL << 38,
|
||||
PERF_OUTPUT_CGROUP = 1ULL << 39,
|
||||
PERF_OUTPUT_RETIRE_LAT = 1ULL << 40,
|
||||
};
|
||||
|
||||
struct perf_script {
|
||||
@ -200,6 +203,8 @@ struct output_option {
|
||||
{.str = "brstackinsnlen", .field = PERF_OUTPUT_BRSTACKINSNLEN},
|
||||
{.str = "machine_pid", .field = PERF_OUTPUT_MACHINE_PID},
|
||||
{.str = "vcpu", .field = PERF_OUTPUT_VCPU},
|
||||
{.str = "cgroup", .field = PERF_OUTPUT_CGROUP},
|
||||
{.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT},
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -275,7 +280,7 @@ static struct {
|
||||
PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
|
||||
PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR |
|
||||
PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE |
|
||||
PERF_OUTPUT_INS_LAT,
|
||||
PERF_OUTPUT_INS_LAT | PERF_OUTPUT_RETIRE_LAT,
|
||||
|
||||
.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
|
||||
},
|
||||
@ -542,6 +547,16 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
|
||||
evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_INS_LAT))
|
||||
return -EINVAL;
|
||||
|
||||
if (PRINT_FIELD(CGROUP) &&
|
||||
evsel__check_stype(evsel, PERF_SAMPLE_CGROUP, "CGROUP", PERF_OUTPUT_CGROUP)) {
|
||||
pr_err("Hint: run 'perf record --all-cgroups ...'\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (PRINT_FIELD(RETIRE_LAT) &&
|
||||
evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_RETIRE_LAT))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -880,12 +895,13 @@ mispred_str(struct branch_entry *br)
|
||||
|
||||
static int print_bstack_flags(FILE *fp, struct branch_entry *br)
|
||||
{
|
||||
return fprintf(fp, "/%c/%c/%c/%d/%s ",
|
||||
return fprintf(fp, "/%c/%c/%c/%d/%s/%s ",
|
||||
mispred_str(br),
|
||||
br->flags.in_tx ? 'X' : '-',
|
||||
br->flags.abort ? 'A' : '-',
|
||||
br->flags.cycles,
|
||||
get_branch_type(br));
|
||||
get_branch_type(br),
|
||||
br->flags.spec ? branch_spec_desc(br->flags.spec) : "-");
|
||||
}
|
||||
|
||||
static int perf_sample__fprintf_brstack(struct perf_sample *sample,
|
||||
@ -1301,7 +1317,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Print final block upto sample
|
||||
* Print final block up to sample
|
||||
*
|
||||
* Due to pipeline delays the LBRs might be missing a branch
|
||||
* or two, which can result in very large or negative blocks
|
||||
@ -2178,6 +2194,9 @@ static void process_event(struct perf_script *script,
|
||||
if (PRINT_FIELD(INS_LAT))
|
||||
fprintf(fp, "%16" PRIu16, sample->ins_lat);
|
||||
|
||||
if (PRINT_FIELD(RETIRE_LAT))
|
||||
fprintf(fp, "%16" PRIu16, sample->retire_lat);
|
||||
|
||||
if (PRINT_FIELD(IP)) {
|
||||
struct callchain_cursor *cursor = NULL;
|
||||
|
||||
@ -2220,6 +2239,17 @@ static void process_event(struct perf_script *script,
|
||||
if (PRINT_FIELD(CODE_PAGE_SIZE))
|
||||
fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str));
|
||||
|
||||
if (PRINT_FIELD(CGROUP)) {
|
||||
const char *cgrp_name;
|
||||
struct cgroup *cgrp = cgroup__find(machine->env,
|
||||
sample->cgroup);
|
||||
if (cgrp != NULL)
|
||||
cgrp_name = cgrp->name;
|
||||
else
|
||||
cgrp_name = "unknown";
|
||||
fprintf(fp, " %s", cgrp_name);
|
||||
}
|
||||
|
||||
perf_sample__fprintf_ipc(sample, attr, fp);
|
||||
|
||||
fprintf(fp, "\n");
|
||||
@ -3856,7 +3886,7 @@ int cmd_script(int argc, const char **argv)
|
||||
"brstacksym,flags,data_src,weight,bpf-output,brstackinsn,"
|
||||
"brstackinsnlen,brstackoff,callindent,insn,insnlen,synth,"
|
||||
"phys_addr,metric,misc,srccode,ipc,tod,data_page_size,"
|
||||
"code_page_size,ins_lat",
|
||||
"code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat",
|
||||
parse_output_fields),
|
||||
OPT_BOOLEAN('a', "all-cpus", &system_wide,
|
||||
"system-wide collection from all CPUs"),
|
||||
|
@ -2524,7 +2524,6 @@ int cmd_stat(int argc, const char **argv)
|
||||
&stat_config.metric_events);
|
||||
zfree(&metrics);
|
||||
}
|
||||
perf_stat__collect_metric_expr(evsel_list);
|
||||
perf_stat__init_shadow_stats();
|
||||
|
||||
if (add_default_attributes())
|
||||
|
@ -2731,10 +2731,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
|
||||
offset = format_field__intval(field, sample, evsel->needs_swap);
|
||||
syscall_arg.len = offset >> 16;
|
||||
offset &= 0xffff;
|
||||
#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
|
||||
if (field->flags & TEP_FIELD_IS_RELATIVE)
|
||||
if (tep_field_is_relative(field->flags))
|
||||
offset += field->offset + field->size;
|
||||
#endif
|
||||
}
|
||||
|
||||
val = (uintptr_t)(sample->raw_data + offset);
|
||||
|
@ -165,7 +165,12 @@ __perf_main ()
|
||||
|
||||
local cur1=${COMP_WORDS[COMP_CWORD]}
|
||||
local raw_evts=$($cmd list --raw-dump)
|
||||
local arr s tmp result
|
||||
local arr s tmp result cpu_evts
|
||||
|
||||
# aarch64 doesn't have /sys/bus/event_source/devices/cpu/events
|
||||
if [[ `uname -m` != aarch64 ]]; then
|
||||
cpu_evts=$(ls /sys/bus/event_source/devices/cpu/events)
|
||||
fi
|
||||
|
||||
if [[ "$cur1" == */* && ${cur1#*/} =~ ^[A-Z] ]]; then
|
||||
OLD_IFS="$IFS"
|
||||
@ -183,9 +188,9 @@ __perf_main ()
|
||||
fi
|
||||
done
|
||||
|
||||
evts=${result}" "$(ls /sys/bus/event_source/devices/cpu/events)
|
||||
evts=${result}" "${cpu_evts}
|
||||
else
|
||||
evts=${raw_evts}" "$(ls /sys/bus/event_source/devices/cpu/events)
|
||||
evts=${raw_evts}" "${cpu_evts}
|
||||
fi
|
||||
|
||||
if [[ "$cur1" == , ]]; then
|
||||
|
@ -6,10 +6,16 @@ JDIR_TEST = pmu-events/arch/test
|
||||
JSON_TEST = $(shell [ -d $(JDIR_TEST) ] && \
|
||||
find $(JDIR_TEST) -name '*.json')
|
||||
JEVENTS_PY = pmu-events/jevents.py
|
||||
METRIC_PY = pmu-events/metric.py
|
||||
METRIC_TEST_PY = pmu-events/metric_test.py
|
||||
EMPTY_PMU_EVENTS_C = pmu-events/empty-pmu-events.c
|
||||
PMU_EVENTS_C = $(OUTPUT)pmu-events/pmu-events.c
|
||||
METRIC_TEST_LOG = $(OUTPUT)pmu-events/metric_test.log
|
||||
|
||||
ifeq ($(JEVENTS_ARCH),)
|
||||
JEVENTS_ARCH=$(SRCARCH)
|
||||
endif
|
||||
JEVENTS_MODEL ?= all
|
||||
|
||||
#
|
||||
# Locate/process JSON files in pmu-events/arch/
|
||||
@ -17,11 +23,15 @@ endif
|
||||
#
|
||||
|
||||
ifeq ($(NO_JEVENTS),1)
|
||||
$(OUTPUT)pmu-events/pmu-events.c: pmu-events/empty-pmu-events.c
|
||||
$(PMU_EVENTS_C): $(EMPTY_PMU_EVENTS_C)
|
||||
$(call rule_mkdir)
|
||||
$(Q)$(call echo-cmd,gen)cp $< $@
|
||||
else
|
||||
$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY) pmu-events/metric.py
|
||||
$(METRIC_TEST_LOG): $(METRIC_TEST_PY) $(METRIC_PY)
|
||||
$(call rule_mkdir)
|
||||
$(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) pmu-events/arch $@
|
||||
$(Q)$(call echo-cmd,test)$(PYTHON) $< 2> $@ || (cat $@ && false)
|
||||
|
||||
$(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG)
|
||||
$(call rule_mkdir)
|
||||
$(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@
|
||||
endif
|
||||
|
273
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json
Normal file
273
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json
Normal file
@ -0,0 +1,273 @@
|
||||
[
|
||||
{
|
||||
"ArchStdEvent": "FRONTEND_BOUND",
|
||||
"MetricExpr": "((stall_slot_frontend) if (#slots - 5) else (stall_slot_frontend - cpu_cycles)) / (#slots * cpu_cycles)"
|
||||
},
|
||||
{
|
||||
"ArchStdEvent": "BAD_SPECULATION",
|
||||
"MetricExpr": "(1 - op_retired / op_spec) * (1 - (stall_slot if (#slots - 5) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))"
|
||||
},
|
||||
{
|
||||
"ArchStdEvent": "RETIRING",
|
||||
"MetricExpr": "(op_retired / op_spec) * (1 - (stall_slot if (#slots - 5) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))"
|
||||
},
|
||||
{
|
||||
"ArchStdEvent": "BACKEND_BOUND"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L1D_TLB_REFILL / L1D_TLB",
|
||||
"BriefDescription": "The rate of L1D TLB refill to the overall L1D TLB lookups",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "l1d_tlb_miss_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L1I_TLB_REFILL / L1I_TLB",
|
||||
"BriefDescription": "The rate of L1I TLB refill to the overall L1I TLB lookups",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "l1i_tlb_miss_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L2D_TLB_REFILL / L2D_TLB",
|
||||
"BriefDescription": "The rate of L2D TLB refill to the overall L2D TLB lookups",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "l2_tlb_miss_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "DTLB_WALK / INST_RETIRED * 1000",
|
||||
"BriefDescription": "The rate of TLB Walks per kilo instructions for data accesses",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "dtlb_mpki",
|
||||
"ScaleUnit": "1MPKI"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "DTLB_WALK / L1D_TLB",
|
||||
"BriefDescription": "The rate of DTLB Walks to the overall L1D TLB lookups",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "dtlb_walk_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "ITLB_WALK / INST_RETIRED * 1000",
|
||||
"BriefDescription": "The rate of TLB Walks per kilo instructions for instruction accesses",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "itlb_mpki",
|
||||
"ScaleUnit": "1MPKI"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "ITLB_WALK / L1I_TLB",
|
||||
"BriefDescription": "The rate of ITLB Walks to the overall L1I TLB lookups",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "itlb_walk_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L1I_CACHE_REFILL / INST_RETIRED * 1000",
|
||||
"BriefDescription": "The rate of L1 I-Cache misses per kilo instructions",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "l1i_cache_mpki",
|
||||
"ScaleUnit": "1MPKI"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE",
|
||||
"BriefDescription": "The rate of L1 I-Cache misses to the overall L1 I-Cache",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "l1i_cache_miss_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L1D_CACHE_REFILL / INST_RETIRED * 1000",
|
||||
"BriefDescription": "The rate of L1 D-Cache misses per kilo instructions",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "l1d_cache_mpki",
|
||||
"ScaleUnit": "1MPKI"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE",
|
||||
"BriefDescription": "The rate of L1 D-Cache misses to the overall L1 D-Cache",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "l1d_cache_miss_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L2D_CACHE_REFILL / INST_RETIRED * 1000",
|
||||
"BriefDescription": "The rate of L2 D-Cache misses per kilo instructions",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "l2d_cache_mpki",
|
||||
"ScaleUnit": "1MPKI"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE",
|
||||
"BriefDescription": "The rate of L2 D-Cache misses to the overall L2 D-Cache",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "l2d_cache_miss_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L3D_CACHE_REFILL / INST_RETIRED * 1000",
|
||||
"BriefDescription": "The rate of L3 D-Cache misses per kilo instructions",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "l3d_cache_mpki",
|
||||
"ScaleUnit": "1MPKI"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "L3D_CACHE_REFILL / L3D_CACHE",
|
||||
"BriefDescription": "The rate of L3 D-Cache misses to the overall L3 D-Cache",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "l3d_cache_miss_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "LL_CACHE_MISS_RD / INST_RETIRED * 1000",
|
||||
"BriefDescription": "The rate of LL Cache read misses per kilo instructions",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "ll_cache_read_mpki",
|
||||
"ScaleUnit": "1MPKI"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "LL_CACHE_MISS_RD / LL_CACHE_RD",
|
||||
"BriefDescription": "The rate of LL Cache read misses to the overall LL Cache read",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "ll_cache_read_miss_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "(LL_CACHE_RD - LL_CACHE_MISS_RD) / LL_CACHE_RD",
|
||||
"BriefDescription": "The rate of LL Cache read hit to the overall LL Cache read",
|
||||
"MetricGroup": "Cache",
|
||||
"MetricName": "ll_cache_read_hit_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "BR_MIS_PRED_RETIRED / INST_RETIRED * 1000",
|
||||
"BriefDescription": "The rate of branches mis-predicted per kilo instructions",
|
||||
"MetricGroup": "Branch",
|
||||
"MetricName": "branch_mpki",
|
||||
"ScaleUnit": "1MPKI"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "BR_RETIRED / INST_RETIRED * 1000",
|
||||
"BriefDescription": "The rate of branches retired per kilo instructions",
|
||||
"MetricGroup": "Branch",
|
||||
"MetricName": "branch_pki",
|
||||
"ScaleUnit": "1PKI"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "BR_MIS_PRED_RETIRED / BR_RETIRED",
|
||||
"BriefDescription": "The rate of branches mis-predited to the overall branches",
|
||||
"MetricGroup": "Branch",
|
||||
"MetricName": "branch_miss_pred_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "instructions / CPU_CYCLES",
|
||||
"BriefDescription": "The average number of instructions executed for each cycle.",
|
||||
"MetricGroup": "PEutilization",
|
||||
"MetricName": "ipc"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "ipc / 5",
|
||||
"BriefDescription": "IPC percentage of peak. The peak of IPC is 5.",
|
||||
"MetricGroup": "PEutilization",
|
||||
"MetricName": "ipc_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "INST_RETIRED / CPU_CYCLES",
|
||||
"BriefDescription": "Architecturally executed Instructions Per Cycle (IPC)",
|
||||
"MetricGroup": "PEutilization",
|
||||
"MetricName": "retired_ipc"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "INST_SPEC / CPU_CYCLES",
|
||||
"BriefDescription": "Speculatively executed Instructions Per Cycle (IPC)",
|
||||
"MetricGroup": "PEutilization",
|
||||
"MetricName": "spec_ipc"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "OP_RETIRED / OP_SPEC",
|
||||
"BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)",
|
||||
"MetricGroup": "PEutilization",
|
||||
"MetricName": "retired_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "1 - OP_RETIRED / OP_SPEC",
|
||||
"BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)",
|
||||
"MetricGroup": "PEutilization",
|
||||
"MetricName": "wasted_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "OP_RETIRED / OP_SPEC * (1 - (STALL_SLOT if (#slots - 5) else (STALL_SLOT - CPU_CYCLES)) / (#slots * CPU_CYCLES))",
|
||||
"BriefDescription": "The truly effective ratio of micro-operations executed by the CPU, which means that misprediction and stall are not included",
|
||||
"MetricGroup": "PEutilization",
|
||||
"MetricName": "cpu_utilization",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "LD_SPEC / INST_SPEC",
|
||||
"BriefDescription": "The rate of load instructions speculatively executed to overall instructions speclatively executed",
|
||||
"MetricGroup": "InstructionMix",
|
||||
"MetricName": "load_spec_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "ST_SPEC / INST_SPEC",
|
||||
"BriefDescription": "The rate of store instructions speculatively executed to overall instructions speclatively executed",
|
||||
"MetricGroup": "InstructionMix",
|
||||
"MetricName": "store_spec_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "DP_SPEC / INST_SPEC",
|
||||
"BriefDescription": "The rate of integer data-processing instructions speculatively executed to overall instructions speclatively executed",
|
||||
"MetricGroup": "InstructionMix",
|
||||
"MetricName": "data_process_spec_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "ASE_SPEC / INST_SPEC",
|
||||
"BriefDescription": "The rate of advanced SIMD instructions speculatively executed to overall instructions speclatively executed",
|
||||
"MetricGroup": "InstructionMix",
|
||||
"MetricName": "advanced_simd_spec_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "VFP_SPEC / INST_SPEC",
|
||||
"BriefDescription": "The rate of floating point instructions speculatively executed to overall instructions speclatively executed",
|
||||
"MetricGroup": "InstructionMix",
|
||||
"MetricName": "float_point_spec_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "CRYPTO_SPEC / INST_SPEC",
|
||||
"BriefDescription": "The rate of crypto instructions speculatively executed to overall instructions speclatively executed",
|
||||
"MetricGroup": "InstructionMix",
|
||||
"MetricName": "crypto_spec_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "BR_IMMED_SPEC / INST_SPEC",
|
||||
"BriefDescription": "The rate of branch immediate instructions speculatively executed to overall instructions speclatively executed",
|
||||
"MetricGroup": "InstructionMix",
|
||||
"MetricName": "branch_immed_spec_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "BR_RETURN_SPEC / INST_SPEC",
|
||||
"BriefDescription": "The rate of procedure return instructions speculatively executed to overall instructions speclatively executed",
|
||||
"MetricGroup": "InstructionMix",
|
||||
"MetricName": "branch_return_spec_rate",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "BR_INDIRECT_SPEC / INST_SPEC",
|
||||
"BriefDescription": "The rate of indirect branch instructions speculatively executed to overall instructions speclatively executed",
|
||||
"MetricGroup": "InstructionMix",
|
||||
"MetricName": "branch_indirect_spec_rate",
|
||||
"ScaleUnit": "100%"
|
||||
}
|
||||
]
|
30
tools/perf/pmu-events/arch/arm64/sbsa.json
Normal file
30
tools/perf/pmu-events/arch/arm64/sbsa.json
Normal file
@ -0,0 +1,30 @@
|
||||
[
|
||||
{
|
||||
"MetricExpr": "stall_slot_frontend / (#slots * cpu_cycles)",
|
||||
"BriefDescription": "Frontend bound L1 topdown metric",
|
||||
"MetricGroup": "TopdownL1",
|
||||
"MetricName": "frontend_bound",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "(1 - op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))",
|
||||
"BriefDescription": "Bad speculation L1 topdown metric",
|
||||
"MetricGroup": "TopdownL1",
|
||||
"MetricName": "bad_speculation",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "(op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))",
|
||||
"BriefDescription": "Retiring L1 topdown metric",
|
||||
"MetricGroup": "TopdownL1",
|
||||
"MetricName": "retiring",
|
||||
"ScaleUnit": "100%"
|
||||
},
|
||||
{
|
||||
"MetricExpr": "stall_slot_backend / (#slots * cpu_cycles)",
|
||||
"BriefDescription": "Backend Bound L1 topdown metric",
|
||||
"MetricGroup": "TopdownL1",
|
||||
"MetricName": "backend_bound",
|
||||
"ScaleUnit": "100%"
|
||||
}
|
||||
]
|
@ -15,7 +15,7 @@
|
||||
{
|
||||
"BriefDescription": "Average cycles per completed instruction when dispatch was stalled for any reason",
|
||||
"MetricExpr": "PM_DISP_STALL_CYC / PM_RUN_INST_CMPL",
|
||||
"MetricGroup": "CPI",
|
||||
"MetricGroup": "CPI;CPI_STALL_RATIO",
|
||||
"MetricName": "DISPATCHED_CPI"
|
||||
},
|
||||
{
|
||||
@ -147,13 +147,13 @@
|
||||
{
|
||||
"BriefDescription": "Average cycles per completed instruction when the NTC instruction has been dispatched but not issued for any reason",
|
||||
"MetricExpr": "PM_ISSUE_STALL / PM_RUN_INST_CMPL",
|
||||
"MetricGroup": "CPI",
|
||||
"MetricGroup": "CPI;CPI_STALL_RATIO",
|
||||
"MetricName": "ISSUE_STALL_CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting to be finished in one of the execution units",
|
||||
"MetricExpr": "PM_EXEC_STALL / PM_RUN_INST_CMPL",
|
||||
"MetricGroup": "CPI",
|
||||
"MetricGroup": "CPI;CPI_STALL_RATIO",
|
||||
"MetricName": "EXECUTION_STALL_CPI"
|
||||
},
|
||||
{
|
||||
@ -309,7 +309,7 @@
|
||||
{
|
||||
"BriefDescription": "Average cycles per completed instruction when the NTC instruction cannot complete because the thread was blocked",
|
||||
"MetricExpr": "PM_CMPL_STALL / PM_RUN_INST_CMPL",
|
||||
"MetricGroup": "CPI",
|
||||
"MetricGroup": "CPI;CPI_STALL_RATIO",
|
||||
"MetricName": "COMPLETION_STALL_CPI"
|
||||
},
|
||||
{
|
||||
|
@ -265,7 +265,7 @@
|
||||
"BriefDescription": "Load Missed L1, counted at finish time."
|
||||
},
|
||||
{
|
||||
"EventCode": "0x400FA",
|
||||
"EventCode": "0x500FA",
|
||||
"EventName": "PM_RUN_INST_CMPL",
|
||||
"BriefDescription": "Completed PowerPC instructions gated by the run latch."
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ GenuineIntel-6-A[AC],v1.00,meteorlake,core
|
||||
GenuineIntel-6-1[AEF],v3,nehalemep,core
|
||||
GenuineIntel-6-2E,v3,nehalemex,core
|
||||
GenuineIntel-6-2A,v17,sandybridge,core
|
||||
GenuineIntel-6-8F,v1.09,sapphirerapids,core
|
||||
GenuineIntel-6-(8F|CF),v1.09,sapphirerapids,core
|
||||
GenuineIntel-6-(37|4A|4C|4D|5A),v14,silvermont,core
|
||||
GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v53,skylake,core
|
||||
GenuineIntel-6-55-[01234],v1.28,skylakex,core
|
||||
|
|
@ -11,7 +11,7 @@
|
||||
#include <string.h>
|
||||
#include <stddef.h>
|
||||
|
||||
static const struct pmu_event pme_test_soc_cpu[] = {
|
||||
static const struct pmu_event pmu_events__test_soc_cpu[] = {
|
||||
{
|
||||
.name = "l3_cache_rd",
|
||||
.event = "event=0x40",
|
||||
@ -105,6 +105,14 @@ static const struct pmu_event pme_test_soc_cpu[] = {
|
||||
.desc = "L2 BTB Correction",
|
||||
.topic = "branch",
|
||||
},
|
||||
{
|
||||
.name = 0,
|
||||
.event = 0,
|
||||
.desc = 0,
|
||||
},
|
||||
};
|
||||
|
||||
static const struct pmu_metric pmu_metrics__test_soc_cpu[] = {
|
||||
{
|
||||
.metric_expr = "1 / IPC",
|
||||
.metric_name = "CPI",
|
||||
@ -170,9 +178,8 @@ static const struct pmu_event pme_test_soc_cpu[] = {
|
||||
.metric_name = "L1D_Cache_Fill_BW",
|
||||
},
|
||||
{
|
||||
.name = 0,
|
||||
.event = 0,
|
||||
.desc = 0,
|
||||
.metric_expr = 0,
|
||||
.metric_name = 0,
|
||||
},
|
||||
};
|
||||
|
||||
@ -181,6 +188,11 @@ struct pmu_events_table {
|
||||
const struct pmu_event *entries;
|
||||
};
|
||||
|
||||
/* Struct used to make the PMU metric table implementation opaque to callers. */
|
||||
struct pmu_metrics_table {
|
||||
const struct pmu_metric *entries;
|
||||
};
|
||||
|
||||
/*
|
||||
* Map a CPU to its table of PMU events. The CPU is identified by the
|
||||
* cpuid field, which is an arch-specific identifier for the CPU.
|
||||
@ -192,7 +204,8 @@ struct pmu_events_table {
|
||||
struct pmu_events_map {
|
||||
const char *arch;
|
||||
const char *cpuid;
|
||||
const struct pmu_events_table table;
|
||||
const struct pmu_events_table event_table;
|
||||
const struct pmu_metrics_table metric_table;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -203,12 +216,14 @@ static const struct pmu_events_map pmu_events_map[] = {
|
||||
{
|
||||
.arch = "testarch",
|
||||
.cpuid = "testcpu",
|
||||
.table = { pme_test_soc_cpu },
|
||||
.event_table = { pmu_events__test_soc_cpu },
|
||||
.metric_table = { pmu_metrics__test_soc_cpu },
|
||||
},
|
||||
{
|
||||
.arch = 0,
|
||||
.cpuid = 0,
|
||||
.table = { 0 },
|
||||
.event_table = { 0 },
|
||||
.metric_table = { 0 },
|
||||
},
|
||||
};
|
||||
|
||||
@ -254,9 +269,7 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = {
|
||||
int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn,
|
||||
void *data)
|
||||
{
|
||||
for (const struct pmu_event *pe = &table->entries[0];
|
||||
pe->name || pe->metric_group || pe->metric_name;
|
||||
pe++) {
|
||||
for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) {
|
||||
int ret = fn(pe, table, data);
|
||||
|
||||
if (ret)
|
||||
@ -265,7 +278,19 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_ev
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu)
|
||||
int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
|
||||
void *data)
|
||||
{
|
||||
for (const struct pmu_metric *pm = &table->entries[0]; pm->metric_expr; pm++) {
|
||||
int ret = fn(pm, table, data);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
|
||||
{
|
||||
const struct pmu_events_table *table = NULL;
|
||||
char *cpuid = perf_pmu__getcpuid(pmu);
|
||||
@ -285,7 +310,35 @@ const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu)
|
||||
break;
|
||||
|
||||
if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
|
||||
table = &map->table;
|
||||
table = &map->event_table;
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(cpuid);
|
||||
return table;
|
||||
}
|
||||
|
||||
const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
|
||||
{
|
||||
const struct pmu_metrics_table *table = NULL;
|
||||
char *cpuid = perf_pmu__getcpuid(pmu);
|
||||
int i;
|
||||
|
||||
/* on some platforms which uses cpus map, cpuid can be NULL for
|
||||
* PMUs other than CORE PMUs.
|
||||
*/
|
||||
if (!cpuid)
|
||||
return NULL;
|
||||
|
||||
i = 0;
|
||||
for (;;) {
|
||||
const struct pmu_events_map *map = &pmu_events_map[i++];
|
||||
|
||||
if (!map->cpuid)
|
||||
break;
|
||||
|
||||
if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
|
||||
table = &map->metric_table;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -299,17 +352,39 @@ const struct pmu_events_table *find_core_events_table(const char *arch, const ch
|
||||
tables->arch;
|
||||
tables++) {
|
||||
if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
|
||||
return &tables->table;
|
||||
return &tables->event_table;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid)
|
||||
{
|
||||
for (const struct pmu_events_map *tables = &pmu_events_map[0];
|
||||
tables->arch;
|
||||
tables++) {
|
||||
if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
|
||||
return &tables->metric_table;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
|
||||
{
|
||||
for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) {
|
||||
int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
|
||||
{
|
||||
for (const struct pmu_events_map *tables = &pmu_events_map[0];
|
||||
tables->arch;
|
||||
tables++) {
|
||||
int ret = pmu_events_table_for_each_event(&tables->table, fn, data);
|
||||
int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -340,3 +415,8 @@ int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pmu_for_each_sys_metric(pmu_metric_iter_fn fn __maybe_unused, void *data __maybe_unused)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
"""Convert directories of JSON events to C code."""
|
||||
import argparse
|
||||
import csv
|
||||
from functools import lru_cache
|
||||
import json
|
||||
import metric
|
||||
import os
|
||||
@ -12,30 +13,47 @@ import collections
|
||||
|
||||
# Global command line arguments.
|
||||
_args = None
|
||||
# List of regular event tables.
|
||||
_event_tables = []
|
||||
# List of event tables generated from "/sys" directories.
|
||||
_sys_event_tables = []
|
||||
# List of regular metric tables.
|
||||
_metric_tables = []
|
||||
# List of metric tables generated from "/sys" directories.
|
||||
_sys_metric_tables = []
|
||||
# Mapping between sys event table names and sys metric table names.
|
||||
_sys_event_table_to_metric_table_mapping = {}
|
||||
# Map from an event name to an architecture standard
|
||||
# JsonEvent. Architecture standard events are in json files in the top
|
||||
# f'{_args.starting_dir}/{_args.arch}' directory.
|
||||
_arch_std_events = {}
|
||||
# Track whether an events table is currently being defined and needs closing.
|
||||
_close_table = False
|
||||
# Events to write out when the table is closed
|
||||
_pending_events = []
|
||||
# Name of events table to be written out
|
||||
_pending_events_tblname = None
|
||||
# Metrics to write out when the table is closed
|
||||
_pending_metrics = []
|
||||
# Name of metrics table to be written out
|
||||
_pending_metrics_tblname = None
|
||||
# Global BigCString shared by all structures.
|
||||
_bcs = None
|
||||
# Order specific JsonEvent attributes will be visited.
|
||||
_json_event_attributes = [
|
||||
# cmp_sevent related attributes.
|
||||
'name', 'pmu', 'topic', 'desc', 'metric_name', 'metric_group',
|
||||
'name', 'pmu', 'topic', 'desc',
|
||||
# Seems useful, put it early.
|
||||
'event',
|
||||
# Short things in alphabetical order.
|
||||
'aggr_mode', 'compat', 'deprecated', 'perpkg', 'unit',
|
||||
# Longer things (the last won't be iterated over during decompress).
|
||||
'metric_constraint', 'metric_expr', 'long_desc'
|
||||
'long_desc'
|
||||
]
|
||||
|
||||
# Attributes that are in pmu_metric rather than pmu_event.
|
||||
_json_metric_attributes = [
|
||||
'metric_name', 'metric_group', 'metric_constraint', 'metric_expr', 'desc',
|
||||
'long_desc', 'unit', 'compat', 'aggr_mode'
|
||||
]
|
||||
|
||||
def removesuffix(s: str, suffix: str) -> str:
|
||||
"""Remove the suffix from a string
|
||||
@ -46,14 +64,16 @@ def removesuffix(s: str, suffix: str) -> str:
|
||||
return s[0:-len(suffix)] if s.endswith(suffix) else s
|
||||
|
||||
|
||||
def file_name_to_table_name(parents: Sequence[str], dirname: str) -> str:
|
||||
def file_name_to_table_name(prefix: str, parents: Sequence[str],
|
||||
dirname: str) -> str:
|
||||
"""Generate a C table name from directory names."""
|
||||
tblname = 'pme'
|
||||
tblname = prefix
|
||||
for p in parents:
|
||||
tblname += '_' + p
|
||||
tblname += '_' + dirname
|
||||
return tblname.replace('-', '_')
|
||||
|
||||
|
||||
def c_len(s: str) -> int:
|
||||
"""Return the length of s a C string
|
||||
|
||||
@ -271,7 +291,7 @@ class JsonEvent:
|
||||
self.metric_constraint = jd.get('MetricConstraint')
|
||||
self.metric_expr = None
|
||||
if 'MetricExpr' in jd:
|
||||
self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify()
|
||||
self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify()
|
||||
|
||||
arch_std = jd.get('ArchStdEvent')
|
||||
if precise and self.desc and '(Precise Event)' not in self.desc:
|
||||
@ -320,35 +340,46 @@ class JsonEvent:
|
||||
s += f'\t{attr} = {value},\n'
|
||||
return s + '}'
|
||||
|
||||
def build_c_string(self) -> str:
|
||||
def build_c_string(self, metric: bool) -> str:
|
||||
s = ''
|
||||
for attr in _json_event_attributes:
|
||||
for attr in _json_metric_attributes if metric else _json_event_attributes:
|
||||
x = getattr(self, attr)
|
||||
if x and attr == 'metric_expr':
|
||||
if metric and x and attr == 'metric_expr':
|
||||
# Convert parsed metric expressions into a string. Slashes
|
||||
# must be doubled in the file.
|
||||
x = x.ToPerfJson().replace('\\', '\\\\')
|
||||
s += f'{x}\\000' if x else '\\000'
|
||||
return s
|
||||
|
||||
def to_c_string(self) -> str:
|
||||
def to_c_string(self, metric: bool) -> str:
|
||||
"""Representation of the event as a C struct initializer."""
|
||||
|
||||
s = self.build_c_string()
|
||||
s = self.build_c_string(metric)
|
||||
return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
|
||||
"""Read json events from the specified file."""
|
||||
|
||||
try:
|
||||
result = json.load(open(path), object_hook=JsonEvent)
|
||||
events = json.load(open(path), object_hook=JsonEvent)
|
||||
except BaseException as err:
|
||||
print(f"Exception processing {path}")
|
||||
raise
|
||||
for event in result:
|
||||
metrics: list[Tuple[str, metric.Expression]] = []
|
||||
for event in events:
|
||||
event.topic = topic
|
||||
return result
|
||||
if event.metric_name and '-' not in event.metric_name:
|
||||
metrics.append((event.metric_name, event.metric_expr))
|
||||
updates = metric.RewriteMetricsInTermsOfOthers(metrics)
|
||||
if updates:
|
||||
for event in events:
|
||||
if event.metric_name in updates:
|
||||
# print(f'Updated {event.metric_name} from\n"{event.metric_expr}"\n'
|
||||
# f'to\n"{updates[event.metric_name]}"')
|
||||
event.metric_expr = updates[event.metric_name]
|
||||
|
||||
return events
|
||||
|
||||
def preprocess_arch_std_files(archpath: str) -> None:
|
||||
"""Read in all architecture standard events."""
|
||||
@ -358,26 +389,20 @@ def preprocess_arch_std_files(archpath: str) -> None:
|
||||
for event in read_json_events(item.path, topic=''):
|
||||
if event.name:
|
||||
_arch_std_events[event.name.lower()] = event
|
||||
|
||||
|
||||
def print_events_table_prefix(tblname: str) -> None:
|
||||
"""Called when a new events table is started."""
|
||||
global _close_table
|
||||
if _close_table:
|
||||
raise IOError('Printing table prefix but last table has no suffix')
|
||||
_args.output_file.write(f'static const struct compact_pmu_event {tblname}[] = {{\n')
|
||||
_close_table = True
|
||||
if event.metric_name:
|
||||
_arch_std_events[event.metric_name.lower()] = event
|
||||
|
||||
|
||||
def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
|
||||
"""Add contents of file to _pending_events table."""
|
||||
if not _close_table:
|
||||
raise IOError('Table entries missing prefix')
|
||||
for e in read_json_events(item.path, topic):
|
||||
_pending_events.append(e)
|
||||
if e.name:
|
||||
_pending_events.append(e)
|
||||
if e.metric_name:
|
||||
_pending_metrics.append(e)
|
||||
|
||||
|
||||
def print_events_table_suffix() -> None:
|
||||
def print_pending_events() -> None:
|
||||
"""Optionally close events table."""
|
||||
|
||||
def event_cmp_key(j: JsonEvent) -> Tuple[bool, str, str, str, str]:
|
||||
@ -389,17 +414,58 @@ def print_events_table_suffix() -> None:
|
||||
return (j.desc is not None, fix_none(j.topic), fix_none(j.name), fix_none(j.pmu),
|
||||
fix_none(j.metric_name))
|
||||
|
||||
global _close_table
|
||||
if not _close_table:
|
||||
global _pending_events
|
||||
if not _pending_events:
|
||||
return
|
||||
|
||||
global _pending_events
|
||||
global _pending_events_tblname
|
||||
if _pending_events_tblname.endswith('_sys'):
|
||||
global _sys_event_tables
|
||||
_sys_event_tables.append(_pending_events_tblname)
|
||||
else:
|
||||
global event_tables
|
||||
_event_tables.append(_pending_events_tblname)
|
||||
|
||||
_args.output_file.write(
|
||||
f'static const struct compact_pmu_event {_pending_events_tblname}[] = {{\n')
|
||||
|
||||
for event in sorted(_pending_events, key=event_cmp_key):
|
||||
_args.output_file.write(event.to_c_string())
|
||||
_pending_events = []
|
||||
_args.output_file.write(event.to_c_string(metric=False))
|
||||
_pending_events = []
|
||||
|
||||
_args.output_file.write('};\n\n')
|
||||
|
||||
def print_pending_metrics() -> None:
|
||||
"""Optionally close metrics table."""
|
||||
|
||||
def metric_cmp_key(j: JsonEvent) -> Tuple[bool, str, str]:
|
||||
def fix_none(s: Optional[str]) -> str:
|
||||
if s is None:
|
||||
return ''
|
||||
return s
|
||||
|
||||
return (j.desc is not None, fix_none(j.pmu), fix_none(j.metric_name))
|
||||
|
||||
global _pending_metrics
|
||||
if not _pending_metrics:
|
||||
return
|
||||
|
||||
global _pending_metrics_tblname
|
||||
if _pending_metrics_tblname.endswith('_sys'):
|
||||
global _sys_metric_tables
|
||||
_sys_metric_tables.append(_pending_metrics_tblname)
|
||||
else:
|
||||
global metric_tables
|
||||
_metric_tables.append(_pending_metrics_tblname)
|
||||
|
||||
_args.output_file.write(
|
||||
f'static const struct compact_pmu_event {_pending_metrics_tblname}[] = {{\n')
|
||||
|
||||
for metric in sorted(_pending_metrics, key=metric_cmp_key):
|
||||
_args.output_file.write(metric.to_c_string(metric=True))
|
||||
_pending_metrics = []
|
||||
|
||||
_args.output_file.write('};\n\n')
|
||||
_close_table = False
|
||||
|
||||
def get_topic(topic: str) -> str:
|
||||
if topic.endswith('metrics.json'):
|
||||
@ -423,12 +489,13 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
|
||||
|
||||
topic = get_topic(item.name)
|
||||
for event in read_json_events(item.path, topic):
|
||||
_bcs.add(event.build_c_string())
|
||||
if event.name:
|
||||
_bcs.add(event.build_c_string(metric=False))
|
||||
if event.metric_name:
|
||||
_bcs.add(event.build_c_string(metric=True))
|
||||
|
||||
def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
|
||||
"""Process a JSON file during the main walk."""
|
||||
global _sys_event_tables
|
||||
|
||||
def is_leaf_dir(path: str) -> bool:
|
||||
for item in os.scandir(path):
|
||||
if item.is_dir():
|
||||
@ -437,12 +504,16 @@ def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
|
||||
|
||||
# model directory, reset topic
|
||||
if item.is_dir() and is_leaf_dir(item.path):
|
||||
print_events_table_suffix()
|
||||
print_pending_events()
|
||||
print_pending_metrics()
|
||||
|
||||
global _pending_events_tblname
|
||||
_pending_events_tblname = file_name_to_table_name('pmu_events_', parents, item.name)
|
||||
global _pending_metrics_tblname
|
||||
_pending_metrics_tblname = file_name_to_table_name('pmu_metrics_', parents, item.name)
|
||||
|
||||
tblname = file_name_to_table_name(parents, item.name)
|
||||
if item.name == 'sys':
|
||||
_sys_event_tables.append(tblname)
|
||||
print_events_table_prefix(tblname)
|
||||
_sys_event_table_to_metric_table_mapping[_pending_events_tblname] = _pending_metrics_tblname
|
||||
return
|
||||
|
||||
# base dir or too deep
|
||||
@ -467,6 +538,12 @@ struct pmu_events_table {
|
||||
size_t length;
|
||||
};
|
||||
|
||||
/* Struct used to make the PMU metric table implementation opaque to callers. */
|
||||
struct pmu_metrics_table {
|
||||
const struct compact_pmu_event *entries;
|
||||
size_t length;
|
||||
};
|
||||
|
||||
/*
|
||||
* Map a CPU to its table of PMU events. The CPU is identified by the
|
||||
* cpuid field, which is an arch-specific identifier for the CPU.
|
||||
@ -478,7 +555,8 @@ struct pmu_events_table {
|
||||
struct pmu_events_map {
|
||||
const char *arch;
|
||||
const char *cpuid;
|
||||
struct pmu_events_table table;
|
||||
struct pmu_events_table event_table;
|
||||
struct pmu_metrics_table metric_table;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -492,9 +570,13 @@ const struct pmu_events_map pmu_events_map[] = {
|
||||
_args.output_file.write("""{
|
||||
\t.arch = "testarch",
|
||||
\t.cpuid = "testcpu",
|
||||
\t.table = {
|
||||
\t.entries = pme_test_soc_cpu,
|
||||
\t.length = ARRAY_SIZE(pme_test_soc_cpu),
|
||||
\t.event_table = {
|
||||
\t\t.entries = pmu_events__test_soc_cpu,
|
||||
\t\t.length = ARRAY_SIZE(pmu_events__test_soc_cpu),
|
||||
\t},
|
||||
\t.metric_table = {
|
||||
\t\t.entries = pmu_metrics__test_soc_cpu,
|
||||
\t\t.length = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
|
||||
\t}
|
||||
},
|
||||
""")
|
||||
@ -505,14 +587,31 @@ const struct pmu_events_map pmu_events_map[] = {
|
||||
for row in table:
|
||||
# Skip the first row or any row beginning with #.
|
||||
if not first and len(row) > 0 and not row[0].startswith('#'):
|
||||
tblname = file_name_to_table_name([], row[2].replace('/', '_'))
|
||||
event_tblname = file_name_to_table_name('pmu_events_', [], row[2].replace('/', '_'))
|
||||
if event_tblname in _event_tables:
|
||||
event_size = f'ARRAY_SIZE({event_tblname})'
|
||||
else:
|
||||
event_tblname = 'NULL'
|
||||
event_size = '0'
|
||||
metric_tblname = file_name_to_table_name('pmu_metrics_', [], row[2].replace('/', '_'))
|
||||
if metric_tblname in _metric_tables:
|
||||
metric_size = f'ARRAY_SIZE({metric_tblname})'
|
||||
else:
|
||||
metric_tblname = 'NULL'
|
||||
metric_size = '0'
|
||||
if event_size == '0' and metric_size == '0':
|
||||
continue
|
||||
cpuid = row[0].replace('\\', '\\\\')
|
||||
_args.output_file.write(f"""{{
|
||||
\t.arch = "{arch}",
|
||||
\t.cpuid = "{cpuid}",
|
||||
\t.table = {{
|
||||
\t\t.entries = {tblname},
|
||||
\t\t.length = ARRAY_SIZE({tblname})
|
||||
\t.event_table = {{
|
||||
\t\t.entries = {event_tblname},
|
||||
\t\t.length = {event_size}
|
||||
\t}},
|
||||
\t.metric_table = {{
|
||||
\t\t.entries = {metric_tblname},
|
||||
\t\t.length = {metric_size}
|
||||
\t}}
|
||||
}},
|
||||
""")
|
||||
@ -521,7 +620,8 @@ const struct pmu_events_map pmu_events_map[] = {
|
||||
_args.output_file.write("""{
|
||||
\t.arch = 0,
|
||||
\t.cpuid = 0,
|
||||
\t.table = { 0, 0 },
|
||||
\t.event_table = { 0, 0 },
|
||||
\t.metric_table = { 0, 0 },
|
||||
}
|
||||
};
|
||||
""")
|
||||
@ -532,14 +632,36 @@ def print_system_mapping_table() -> None:
|
||||
_args.output_file.write("""
|
||||
struct pmu_sys_events {
|
||||
\tconst char *name;
|
||||
\tstruct pmu_events_table table;
|
||||
\tstruct pmu_events_table event_table;
|
||||
\tstruct pmu_metrics_table metric_table;
|
||||
};
|
||||
|
||||
static const struct pmu_sys_events pmu_sys_event_tables[] = {
|
||||
""")
|
||||
printed_metric_tables = []
|
||||
for tblname in _sys_event_tables:
|
||||
_args.output_file.write(f"""\t{{
|
||||
\t\t.table = {{
|
||||
\t\t.event_table = {{
|
||||
\t\t\t.entries = {tblname},
|
||||
\t\t\t.length = ARRAY_SIZE({tblname})
|
||||
\t\t}},""")
|
||||
metric_tblname = _sys_event_table_to_metric_table_mapping[tblname]
|
||||
if metric_tblname in _sys_metric_tables:
|
||||
_args.output_file.write(f"""
|
||||
\t\t.metric_table = {{
|
||||
\t\t\t.entries = {metric_tblname},
|
||||
\t\t\t.length = ARRAY_SIZE({metric_tblname})
|
||||
\t\t}},""")
|
||||
printed_metric_tables.append(metric_tblname)
|
||||
_args.output_file.write(f"""
|
||||
\t\t.name = \"{tblname}\",
|
||||
\t}},
|
||||
""")
|
||||
for tblname in _sys_metric_tables:
|
||||
if tblname in printed_metric_tables:
|
||||
continue
|
||||
_args.output_file.write(f"""\t{{
|
||||
\t\t.metric_table = {{
|
||||
\t\t\t.entries = {tblname},
|
||||
\t\t\t.length = ARRAY_SIZE({tblname})
|
||||
\t\t}},
|
||||
@ -547,11 +669,12 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = {
|
||||
\t}},
|
||||
""")
|
||||
_args.output_file.write("""\t{
|
||||
\t\t.table = { 0, 0 }
|
||||
\t\t.event_table = { 0, 0 },
|
||||
\t\t.metric_table = { 0, 0 },
|
||||
\t},
|
||||
};
|
||||
|
||||
static void decompress(int offset, struct pmu_event *pe)
|
||||
static void decompress_event(int offset, struct pmu_event *pe)
|
||||
{
|
||||
\tconst char *p = &big_c_string[offset];
|
||||
""")
|
||||
@ -564,6 +687,19 @@ static void decompress(int offset, struct pmu_event *pe)
|
||||
_args.output_file.write('\twhile (*p++);')
|
||||
_args.output_file.write("""}
|
||||
|
||||
static void decompress_metric(int offset, struct pmu_metric *pm)
|
||||
{
|
||||
\tconst char *p = &big_c_string[offset];
|
||||
""")
|
||||
for attr in _json_metric_attributes:
|
||||
_args.output_file.write(f"""
|
||||
\tpm->{attr} = (*p == '\\0' ? NULL : p);
|
||||
""")
|
||||
if attr == _json_metric_attributes[-1]:
|
||||
continue
|
||||
_args.output_file.write('\twhile (*p++);')
|
||||
_args.output_file.write("""}
|
||||
|
||||
int pmu_events_table_for_each_event(const struct pmu_events_table *table,
|
||||
pmu_event_iter_fn fn,
|
||||
void *data)
|
||||
@ -572,7 +708,9 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table,
|
||||
struct pmu_event pe;
|
||||
int ret;
|
||||
|
||||
decompress(table->entries[i].offset, &pe);
|
||||
decompress_event(table->entries[i].offset, &pe);
|
||||
if (!pe.name)
|
||||
continue;
|
||||
ret = fn(&pe, table, data);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -580,7 +718,25 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table,
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu)
|
||||
int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table,
|
||||
pmu_metric_iter_fn fn,
|
||||
void *data)
|
||||
{
|
||||
for (size_t i = 0; i < table->length; i++) {
|
||||
struct pmu_metric pm;
|
||||
int ret;
|
||||
|
||||
decompress_metric(table->entries[i].offset, &pm);
|
||||
if (!pm.metric_expr)
|
||||
continue;
|
||||
ret = fn(&pm, table, data);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
|
||||
{
|
||||
const struct pmu_events_table *table = NULL;
|
||||
char *cpuid = perf_pmu__getcpuid(pmu);
|
||||
@ -599,7 +755,34 @@ const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu)
|
||||
break;
|
||||
|
||||
if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
|
||||
table = &map->table;
|
||||
table = &map->event_table;
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(cpuid);
|
||||
return table;
|
||||
}
|
||||
|
||||
const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
|
||||
{
|
||||
const struct pmu_metrics_table *table = NULL;
|
||||
char *cpuid = perf_pmu__getcpuid(pmu);
|
||||
int i;
|
||||
|
||||
/* on some platforms which uses cpus map, cpuid can be NULL for
|
||||
* PMUs other than CORE PMUs.
|
||||
*/
|
||||
if (!cpuid)
|
||||
return NULL;
|
||||
|
||||
i = 0;
|
||||
for (;;) {
|
||||
const struct pmu_events_map *map = &pmu_events_map[i++];
|
||||
if (!map->arch)
|
||||
break;
|
||||
|
||||
if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
|
||||
table = &map->metric_table;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -613,7 +796,18 @@ const struct pmu_events_table *find_core_events_table(const char *arch, const ch
|
||||
tables->arch;
|
||||
tables++) {
|
||||
if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
|
||||
return &tables->table;
|
||||
return &tables->event_table;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid)
|
||||
{
|
||||
for (const struct pmu_events_map *tables = &pmu_events_map[0];
|
||||
tables->arch;
|
||||
tables++) {
|
||||
if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
|
||||
return &tables->metric_table;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@ -623,7 +817,20 @@ int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
|
||||
for (const struct pmu_events_map *tables = &pmu_events_map[0];
|
||||
tables->arch;
|
||||
tables++) {
|
||||
int ret = pmu_events_table_for_each_event(&tables->table, fn, data);
|
||||
int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
|
||||
{
|
||||
for (const struct pmu_events_map *tables = &pmu_events_map[0];
|
||||
tables->arch;
|
||||
tables++) {
|
||||
int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -637,7 +844,7 @@ const struct pmu_events_table *find_sys_events_table(const char *name)
|
||||
tables->name;
|
||||
tables++) {
|
||||
if (!strcmp(tables->name, name))
|
||||
return &tables->table;
|
||||
return &tables->event_table;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@ -647,7 +854,20 @@ int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
|
||||
for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
|
||||
tables->name;
|
||||
tables++) {
|
||||
int ret = pmu_events_table_for_each_event(&tables->table, fn, data);
|
||||
int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data)
|
||||
{
|
||||
for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
|
||||
tables->name;
|
||||
tables++) {
|
||||
int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -670,12 +890,24 @@ def main() -> None:
|
||||
action: Callable[[Sequence[str], os.DirEntry], None]) -> None:
|
||||
"""Replicate the directory/file walking behavior of C's file tree walk."""
|
||||
for item in os.scandir(path):
|
||||
if _args.model != 'all' and item.is_dir():
|
||||
# Check if the model matches one in _args.model.
|
||||
if len(parents) == _args.model.split(',')[0].count('/'):
|
||||
# We're testing the correct directory.
|
||||
item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name
|
||||
if 'test' not in item_path and item_path not in _args.model.split(','):
|
||||
continue
|
||||
action(parents, item)
|
||||
if item.is_dir():
|
||||
ftw(item.path, parents + [item.name], action)
|
||||
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument('arch', help='Architecture name like x86')
|
||||
ap.add_argument('model', help='''Select a model such as skylake to
|
||||
reduce the code size. Normally set to "all". For architectures like
|
||||
ARM64 with an implementor/model, the model must include the implementor
|
||||
such as "arm/cortex-a34".''',
|
||||
default='all')
|
||||
ap.add_argument(
|
||||
'starting_dir',
|
||||
type=dir_path,
|
||||
@ -721,7 +953,8 @@ struct compact_pmu_event {
|
||||
for arch in archs:
|
||||
arch_path = f'{_args.starting_dir}/{arch}'
|
||||
ftw(arch_path, [], process_one_file)
|
||||
print_events_table_suffix()
|
||||
print_pending_events()
|
||||
print_pending_metrics()
|
||||
|
||||
print_mapping_table(archs)
|
||||
print_system_mapping_table()
|
||||
|
@ -4,7 +4,7 @@ import ast
|
||||
import decimal
|
||||
import json
|
||||
import re
|
||||
from typing import Dict, List, Optional, Set, Union
|
||||
from typing import Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
|
||||
class Expression:
|
||||
@ -26,6 +26,9 @@ class Expression:
|
||||
"""Returns true when two expressions are the same."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def Substitute(self, name: str, expression: 'Expression') -> 'Expression':
|
||||
raise NotImplementedError()
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.ToPerfJson()
|
||||
|
||||
@ -186,6 +189,15 @@ class Operator(Expression):
|
||||
other.lhs) and self.rhs.Equals(other.rhs)
|
||||
return False
|
||||
|
||||
def Substitute(self, name: str, expression: Expression) -> Expression:
|
||||
if self.Equals(expression):
|
||||
return Event(name)
|
||||
lhs = self.lhs.Substitute(name, expression)
|
||||
rhs = None
|
||||
if self.rhs:
|
||||
rhs = self.rhs.Substitute(name, expression)
|
||||
return Operator(self.operator, lhs, rhs)
|
||||
|
||||
|
||||
class Select(Expression):
|
||||
"""Represents a select ternary in the parse tree."""
|
||||
@ -225,6 +237,14 @@ class Select(Expression):
|
||||
other.false_val) and self.true_val.Equals(other.true_val)
|
||||
return False
|
||||
|
||||
def Substitute(self, name: str, expression: Expression) -> Expression:
|
||||
if self.Equals(expression):
|
||||
return Event(name)
|
||||
true_val = self.true_val.Substitute(name, expression)
|
||||
cond = self.cond.Substitute(name, expression)
|
||||
false_val = self.false_val.Substitute(name, expression)
|
||||
return Select(true_val, cond, false_val)
|
||||
|
||||
|
||||
class Function(Expression):
|
||||
"""A function in an expression like min, max, d_ratio."""
|
||||
@ -261,10 +281,21 @@ class Function(Expression):
|
||||
|
||||
def Equals(self, other: Expression) -> bool:
|
||||
if isinstance(other, Function):
|
||||
return self.fn == other.fn and self.lhs.Equals(
|
||||
other.lhs) and self.rhs.Equals(other.rhs)
|
||||
result = self.fn == other.fn and self.lhs.Equals(other.lhs)
|
||||
if self.rhs:
|
||||
result = result and self.rhs.Equals(other.rhs)
|
||||
return result
|
||||
return False
|
||||
|
||||
def Substitute(self, name: str, expression: Expression) -> Expression:
|
||||
if self.Equals(expression):
|
||||
return Event(name)
|
||||
lhs = self.lhs.Substitute(name, expression)
|
||||
rhs = None
|
||||
if self.rhs:
|
||||
rhs = self.rhs.Substitute(name, expression)
|
||||
return Function(self.fn, lhs, rhs)
|
||||
|
||||
|
||||
def _FixEscapes(s: str) -> str:
|
||||
s = re.sub(r'([^\\]),', r'\1\\,', s)
|
||||
@ -291,6 +322,9 @@ class Event(Expression):
|
||||
def Equals(self, other: Expression) -> bool:
|
||||
return isinstance(other, Event) and self.name == other.name
|
||||
|
||||
def Substitute(self, name: str, expression: Expression) -> Expression:
|
||||
return self
|
||||
|
||||
|
||||
class Constant(Expression):
|
||||
"""A constant within the expression tree."""
|
||||
@ -315,6 +349,9 @@ class Constant(Expression):
|
||||
def Equals(self, other: Expression) -> bool:
|
||||
return isinstance(other, Constant) and self.value == other.value
|
||||
|
||||
def Substitute(self, name: str, expression: Expression) -> Expression:
|
||||
return self
|
||||
|
||||
|
||||
class Literal(Expression):
|
||||
"""A runtime literal within the expression tree."""
|
||||
@ -334,6 +371,9 @@ class Literal(Expression):
|
||||
def Equals(self, other: Expression) -> bool:
|
||||
return isinstance(other, Literal) and self.value == other.value
|
||||
|
||||
def Substitute(self, name: str, expression: Expression) -> Expression:
|
||||
return self
|
||||
|
||||
|
||||
def min(lhs: Union[int, float, Expression], rhs: Union[int, float,
|
||||
Expression]) -> Function:
|
||||
@ -459,6 +499,7 @@ class MetricGroup:
|
||||
|
||||
|
||||
class _RewriteIfExpToSelect(ast.NodeTransformer):
|
||||
"""Transformer to convert if-else nodes to Select expressions."""
|
||||
|
||||
def visit_IfExp(self, node):
|
||||
# pylint: disable=invalid-name
|
||||
@ -496,7 +537,37 @@ def ParsePerfJson(orig: str) -> Expression:
|
||||
for kw in keywords:
|
||||
py = re.sub(rf'Event\(r"{kw}"\)', kw, py)
|
||||
|
||||
parsed = ast.parse(py, mode='eval')
|
||||
try:
|
||||
parsed = ast.parse(py, mode='eval')
|
||||
except SyntaxError as e:
|
||||
raise SyntaxError(f'Parsing expression:\n{orig}') from e
|
||||
_RewriteIfExpToSelect().visit(parsed)
|
||||
parsed = ast.fix_missing_locations(parsed)
|
||||
return _Constify(eval(compile(parsed, orig, 'eval')))
|
||||
|
||||
|
||||
def RewriteMetricsInTermsOfOthers(metrics: List[Tuple[str, Expression]]
|
||||
)-> Dict[str, Expression]:
|
||||
"""Shorten metrics by rewriting in terms of others.
|
||||
|
||||
Args:
|
||||
metrics (list): pairs of metric names and their expressions.
|
||||
Returns:
|
||||
Dict: mapping from a metric name to a shortened expression.
|
||||
"""
|
||||
updates: Dict[str, Expression] = dict()
|
||||
for outer_name, outer_expression in metrics:
|
||||
updated = outer_expression
|
||||
while True:
|
||||
for inner_name, inner_expression in metrics:
|
||||
if inner_name.lower() == outer_name.lower():
|
||||
continue
|
||||
if inner_name in updates:
|
||||
inner_expression = updates[inner_name]
|
||||
updated = updated.Substitute(inner_name, inner_expression)
|
||||
if updated.Equals(outer_expression):
|
||||
break
|
||||
if outer_name in updates and updated.Equals(updates[outer_name]):
|
||||
break
|
||||
updates[outer_name] = updated
|
||||
return updates
|
||||
|
15
tools/perf/pmu-events/metric_test.py
Normal file → Executable file
15
tools/perf/pmu-events/metric_test.py
Normal file → Executable file
@ -1,8 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
import unittest
|
||||
from metric import Constant
|
||||
from metric import Event
|
||||
from metric import Expression
|
||||
from metric import ParsePerfJson
|
||||
from metric import RewriteMetricsInTermsOfOthers
|
||||
|
||||
|
||||
class TestMetricExpressions(unittest.TestCase):
|
||||
@ -87,8 +90,8 @@ class TestMetricExpressions(unittest.TestCase):
|
||||
after = r'min((a + b if c > 1 else c + d), e + f)'
|
||||
self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
|
||||
|
||||
before =3D r'a if b else c if d else e'
|
||||
after =3D r'(a if b else (c if d else e))'
|
||||
before = r'a if b else c if d else e'
|
||||
after = r'(a if b else (c if d else e))'
|
||||
self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
|
||||
|
||||
def test_ToPython(self):
|
||||
@ -153,5 +156,13 @@ class TestMetricExpressions(unittest.TestCase):
|
||||
after = '0 * SLOTS'
|
||||
self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
|
||||
|
||||
def test_RewriteMetricsInTermsOfOthers(self):
|
||||
Expression.__eq__ = lambda e1, e2: e1.Equals(e2)
|
||||
before = [('m1', ParsePerfJson('a + b + c + d')),
|
||||
('m2', ParsePerfJson('a + b + c'))]
|
||||
after = {'m1': ParsePerfJson('m2 + d')}
|
||||
self.assertEqual(RewriteMetricsInTermsOfOthers(before), after)
|
||||
Expression.__eq__ = None
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -23,27 +23,47 @@ struct pmu_event {
|
||||
const char *unit;
|
||||
const char *perpkg;
|
||||
const char *aggr_mode;
|
||||
const char *metric_expr;
|
||||
const char *deprecated;
|
||||
};
|
||||
|
||||
struct pmu_metric {
|
||||
const char *metric_name;
|
||||
const char *metric_group;
|
||||
const char *deprecated;
|
||||
const char *metric_expr;
|
||||
const char *unit;
|
||||
const char *compat;
|
||||
const char *aggr_mode;
|
||||
const char *metric_constraint;
|
||||
const char *desc;
|
||||
const char *long_desc;
|
||||
};
|
||||
|
||||
struct pmu_events_table;
|
||||
struct pmu_metrics_table;
|
||||
|
||||
typedef int (*pmu_event_iter_fn)(const struct pmu_event *pe,
|
||||
const struct pmu_events_table *table,
|
||||
void *data);
|
||||
|
||||
typedef int (*pmu_metric_iter_fn)(const struct pmu_metric *pm,
|
||||
const struct pmu_metrics_table *table,
|
||||
void *data);
|
||||
|
||||
int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn,
|
||||
void *data);
|
||||
int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
|
||||
void *data);
|
||||
|
||||
const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu);
|
||||
const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu);
|
||||
const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu);
|
||||
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid);
|
||||
const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid);
|
||||
int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data);
|
||||
int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data);
|
||||
|
||||
const struct pmu_events_table *find_sys_events_table(const char *name);
|
||||
const struct pmu_metrics_table *find_sys_metrics_table(const char *name);
|
||||
int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data);
|
||||
int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data);
|
||||
|
||||
#endif
|
||||
|
@ -19,12 +19,34 @@
|
||||
# pylint: disable=missing-function-docstring
|
||||
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import os
|
||||
import io
|
||||
import argparse
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.request
|
||||
|
||||
minimal_html = """<head>
|
||||
<link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/npm/d3-flame-graph@4.1.3/dist/d3-flamegraph.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="chart"></div>
|
||||
<script type="text/javascript" src="https://d3js.org/d3.v7.js"></script>
|
||||
<script type="text/javascript" src="https://cdn.jsdelivr.net/npm/d3-flame-graph@4.1.3/dist/d3-flamegraph.min.js"></script>
|
||||
<script type="text/javascript">
|
||||
const stacks = [/** @flamegraph_json **/];
|
||||
// Note, options is unused.
|
||||
const options = [/** @options_json **/];
|
||||
|
||||
var chart = flamegraph();
|
||||
d3.select("#chart")
|
||||
.datum(stacks[0])
|
||||
.call(chart);
|
||||
</script>
|
||||
</body>
|
||||
"""
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class Node:
|
||||
@ -50,16 +72,6 @@ class FlameGraphCLI:
|
||||
self.args = args
|
||||
self.stack = Node("all", "root")
|
||||
|
||||
if self.args.format == "html" and \
|
||||
not os.path.isfile(self.args.template):
|
||||
print("Flame Graph template {} does not exist. Please install "
|
||||
"the js-d3-flame-graph (RPM) or libjs-d3-flame-graph (deb) "
|
||||
"package, specify an existing flame graph template "
|
||||
"(--template PATH) or another output format "
|
||||
"(--format FORMAT).".format(self.args.template),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
@staticmethod
|
||||
def get_libtype_from_dso(dso):
|
||||
"""
|
||||
@ -128,16 +140,63 @@ class FlameGraphCLI:
|
||||
}
|
||||
options_json = json.dumps(options)
|
||||
|
||||
template_md5sum = None
|
||||
if self.args.format == "html":
|
||||
if os.path.isfile(self.args.template):
|
||||
template = f"file://{self.args.template}"
|
||||
else:
|
||||
if not self.args.allow_download:
|
||||
print(f"""Warning: Flame Graph template '{self.args.template}'
|
||||
does not exist. To avoid this please install a package such as the
|
||||
js-d3-flame-graph or libjs-d3-flame-graph, specify an existing flame
|
||||
graph template (--template PATH) or use another output format (--format
|
||||
FORMAT).""",
|
||||
file=sys.stderr)
|
||||
if self.args.input == "-":
|
||||
print("""Not attempting to download Flame Graph template as script command line
|
||||
input is disabled due to using live mode. If you want to download the
|
||||
template retry without live mode. For example, use 'perf record -a -g
|
||||
-F 99 sleep 60' and 'perf script report flamegraph'. Alternatively,
|
||||
download the template from:
|
||||
https://cdn.jsdelivr.net/npm/d3-flame-graph@4.1.3/dist/templates/d3-flamegraph-base.html
|
||||
and place it at:
|
||||
/usr/share/d3-flame-graph/d3-flamegraph-base.html""",
|
||||
file=sys.stderr)
|
||||
quit()
|
||||
s = None
|
||||
while s != "y" and s != "n":
|
||||
s = input("Do you wish to download a template from cdn.jsdelivr.net? (this warning can be suppressed with --allow-download) [yn] ").lower()
|
||||
if s == "n":
|
||||
quit()
|
||||
template = "https://cdn.jsdelivr.net/npm/d3-flame-graph@4.1.3/dist/templates/d3-flamegraph-base.html"
|
||||
template_md5sum = "143e0d06ba69b8370b9848dcd6ae3f36"
|
||||
|
||||
try:
|
||||
with io.open(self.args.template, encoding="utf-8") as template:
|
||||
output_str = (
|
||||
template.read()
|
||||
.replace("/** @options_json **/", options_json)
|
||||
.replace("/** @flamegraph_json **/", stacks_json)
|
||||
)
|
||||
except IOError as err:
|
||||
print("Error reading template file: {}".format(err), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
with urllib.request.urlopen(template) as template:
|
||||
output_str = "".join([
|
||||
l.decode("utf-8") for l in template.readlines()
|
||||
])
|
||||
except Exception as err:
|
||||
print(f"Error reading template {template}: {err}\n"
|
||||
"a minimal flame graph will be generated", file=sys.stderr)
|
||||
output_str = minimal_html
|
||||
template_md5sum = None
|
||||
|
||||
if template_md5sum:
|
||||
download_md5sum = hashlib.md5(output_str.encode("utf-8")).hexdigest()
|
||||
if download_md5sum != template_md5sum:
|
||||
s = None
|
||||
while s != "y" and s != "n":
|
||||
s = input(f"""Unexpected template md5sum.
|
||||
{download_md5sum} != {template_md5sum}, for:
|
||||
{output_str}
|
||||
continue?[yn] """).lower()
|
||||
if s == "n":
|
||||
quit()
|
||||
|
||||
output_str = output_str.replace("/** @options_json **/", options_json)
|
||||
output_str = output_str.replace("/** @flamegraph_json **/", stacks_json)
|
||||
|
||||
output_fn = self.args.output or "flamegraph.html"
|
||||
else:
|
||||
output_str = stacks_json
|
||||
@ -172,6 +231,10 @@ if __name__ == "__main__":
|
||||
choices=["blue-green", "orange"])
|
||||
parser.add_argument("-i", "--input",
|
||||
help=argparse.SUPPRESS)
|
||||
parser.add_argument("--allow-download",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="allow unprompted downloading of HTML template")
|
||||
|
||||
cli_args = parser.parse_args()
|
||||
cli = FlameGraphCLI(cli_args)
|
||||
|
@ -68,6 +68,7 @@ perf-y += perf-time-to-tsc.o
|
||||
perf-y += dlfilter-test.o
|
||||
perf-y += sigtrap.o
|
||||
perf-y += event_groups.o
|
||||
perf-y += symbols.o
|
||||
|
||||
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
|
||||
$(call rule_mkdir)
|
||||
|
@ -43,7 +43,7 @@ struct {
|
||||
__type(value, int);
|
||||
} flip_table SEC(".maps");
|
||||
|
||||
SEC("func=do_epoll_wait")
|
||||
SEC("syscalls:sys_enter_epoll_pwait")
|
||||
int bpf_func__SyS_epoll_pwait(void *ctx)
|
||||
{
|
||||
int ind =0;
|
||||
|
@ -23,7 +23,7 @@
|
||||
#define NR_ITERS 111
|
||||
#define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test"
|
||||
|
||||
#ifdef HAVE_LIBBPF_SUPPORT
|
||||
#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf.h>
|
||||
|
||||
@ -126,6 +126,10 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
|
||||
|
||||
err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL);
|
||||
parse_events_error__exit(&parse_error);
|
||||
if (err == -ENODATA) {
|
||||
pr_debug("Failed to add events selected by BPF, debuginfo package not installed\n");
|
||||
return TEST_SKIP;
|
||||
}
|
||||
if (err || list_empty(&parse_state.list)) {
|
||||
pr_debug("Failed to add events selected by BPF\n");
|
||||
return TEST_FAIL;
|
||||
@ -330,10 +334,10 @@ static int test__bpf(int i)
|
||||
static int test__basic_bpf_test(struct test_suite *test __maybe_unused,
|
||||
int subtest __maybe_unused)
|
||||
{
|
||||
#ifdef HAVE_LIBBPF_SUPPORT
|
||||
#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
|
||||
return test__bpf(0);
|
||||
#else
|
||||
pr_debug("Skip BPF test because BPF support is not compiled\n");
|
||||
pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n");
|
||||
return TEST_SKIP;
|
||||
#endif
|
||||
}
|
||||
@ -341,10 +345,10 @@ static int test__basic_bpf_test(struct test_suite *test __maybe_unused,
|
||||
static int test__bpf_pinning(struct test_suite *test __maybe_unused,
|
||||
int subtest __maybe_unused)
|
||||
{
|
||||
#ifdef HAVE_LIBBPF_SUPPORT
|
||||
#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
|
||||
return test__bpf(1);
|
||||
#else
|
||||
pr_debug("Skip BPF test because BPF support is not compiled\n");
|
||||
pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n");
|
||||
return TEST_SKIP;
|
||||
#endif
|
||||
}
|
||||
@ -352,30 +356,30 @@ static int test__bpf_pinning(struct test_suite *test __maybe_unused,
|
||||
static int test__bpf_prologue_test(struct test_suite *test __maybe_unused,
|
||||
int subtest __maybe_unused)
|
||||
{
|
||||
#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_BPF_PROLOGUE)
|
||||
#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_BPF_PROLOGUE) && defined(HAVE_LIBTRACEEVENT)
|
||||
return test__bpf(2);
|
||||
#else
|
||||
pr_debug("Skip BPF test because BPF support is not compiled\n");
|
||||
pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n");
|
||||
return TEST_SKIP;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static struct test_case bpf_tests[] = {
|
||||
#ifdef HAVE_LIBBPF_SUPPORT
|
||||
#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
|
||||
TEST_CASE("Basic BPF filtering", basic_bpf_test),
|
||||
TEST_CASE_REASON("BPF pinning", bpf_pinning,
|
||||
"clang isn't installed or environment missing BPF support"),
|
||||
#ifdef HAVE_BPF_PROLOGUE
|
||||
TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test,
|
||||
"clang isn't installed or environment missing BPF support"),
|
||||
"clang/debuginfo isn't installed or environment missing BPF support"),
|
||||
#else
|
||||
TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
|
||||
#endif
|
||||
#else
|
||||
TEST_CASE_REASON("Basic BPF filtering", basic_bpf_test, "not compiled in"),
|
||||
TEST_CASE_REASON("BPF pinning", bpf_pinning, "not compiled in"),
|
||||
TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
|
||||
TEST_CASE_REASON("Basic BPF filtering", basic_bpf_test, "not compiled in or missing libtraceevent support"),
|
||||
TEST_CASE_REASON("BPF pinning", bpf_pinning, "not compiled in or missing libtraceevent support"),
|
||||
TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in or missing libtraceevent support"),
|
||||
#endif
|
||||
{ .name = NULL, }
|
||||
};
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "builtin-test-list.h"
|
||||
|
||||
static bool dont_fork;
|
||||
const char *dso_to_test;
|
||||
|
||||
struct test_suite *__weak arch_tests[] = {
|
||||
NULL,
|
||||
@ -117,6 +118,7 @@ static struct test_suite *generic_tests[] = {
|
||||
&suite__dlfilter,
|
||||
&suite__sigtrap,
|
||||
&suite__event_groups,
|
||||
&suite__symbols,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -521,6 +523,7 @@ int cmd_test(int argc, const char **argv)
|
||||
OPT_BOOLEAN('F', "dont-fork", &dont_fork,
|
||||
"Do not fork for testcase"),
|
||||
OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"),
|
||||
OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"),
|
||||
OPT_END()
|
||||
};
|
||||
const char * const test_subcommands[] = { "list", NULL };
|
||||
|
@ -67,6 +67,7 @@ int test_dwarf_unwind__compare(void *p1, void *p2);
|
||||
int test_dwarf_unwind__krava_3(struct thread *thread);
|
||||
int test_dwarf_unwind__krava_2(struct thread *thread);
|
||||
int test_dwarf_unwind__krava_1(struct thread *thread);
|
||||
int test__dwarf_unwind(struct test_suite *test, int subtest);
|
||||
|
||||
#define MAX_STACK 8
|
||||
|
||||
@ -195,8 +196,8 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_1(struct thread *th
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test__dwarf_unwind(struct test_suite *test __maybe_unused,
|
||||
int subtest __maybe_unused)
|
||||
noinline int test__dwarf_unwind(struct test_suite *test __maybe_unused,
|
||||
int subtest __maybe_unused)
|
||||
{
|
||||
struct machine *machine;
|
||||
struct thread *thread;
|
||||
|
@ -180,13 +180,13 @@ static int expand_metric_events(void)
|
||||
struct evlist *evlist;
|
||||
struct rblist metric_events;
|
||||
const char metric_str[] = "CPI";
|
||||
const struct pmu_events_table *pme_test;
|
||||
const struct pmu_metrics_table *pme_test;
|
||||
|
||||
evlist = evlist__new();
|
||||
TEST_ASSERT_VAL("failed to get evlist", evlist);
|
||||
|
||||
rblist__init(&metric_events);
|
||||
pme_test = find_core_events_table("testarch", "testcpu");
|
||||
pme_test = find_core_metrics_table("testarch", "testcpu");
|
||||
ret = metricgroup__parse_groups_test(evlist, pme_test, metric_str,
|
||||
false, false, &metric_events);
|
||||
if (ret < 0) {
|
||||
|
@ -72,7 +72,7 @@ static int __compute_metric(const char *name, struct value *vals,
|
||||
struct rblist metric_events = {
|
||||
.nr_entries = 0,
|
||||
};
|
||||
const struct pmu_events_table *pme_test;
|
||||
const struct pmu_metrics_table *pme_test;
|
||||
struct perf_cpu_map *cpus;
|
||||
struct runtime_stat st;
|
||||
struct evlist *evlist;
|
||||
@ -96,7 +96,7 @@ static int __compute_metric(const char *name, struct value *vals,
|
||||
runtime_stat__init(&st);
|
||||
|
||||
/* Parse the metric into metric_events list. */
|
||||
pme_test = find_core_events_table("testarch", "testcpu");
|
||||
pme_test = find_core_metrics_table("testarch", "testcpu");
|
||||
err = metricgroup__parse_groups_test(evlist, pme_test, name,
|
||||
false, false,
|
||||
&metric_events);
|
||||
|
@ -337,36 +337,12 @@ static int compare_pmu_events(const struct pmu_event *e1, const struct pmu_event
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!is_same(e1->metric_expr, e2->metric_expr)) {
|
||||
pr_debug2("testing event e1 %s: mismatched metric_expr, %s vs %s\n",
|
||||
e1->name, e1->metric_expr, e2->metric_expr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!is_same(e1->metric_name, e2->metric_name)) {
|
||||
pr_debug2("testing event e1 %s: mismatched metric_name, %s vs %s\n",
|
||||
e1->name, e1->metric_name, e2->metric_name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!is_same(e1->metric_group, e2->metric_group)) {
|
||||
pr_debug2("testing event e1 %s: mismatched metric_group, %s vs %s\n",
|
||||
e1->name, e1->metric_group, e2->metric_group);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!is_same(e1->deprecated, e2->deprecated)) {
|
||||
pr_debug2("testing event e1 %s: mismatched deprecated, %s vs %s\n",
|
||||
e1->name, e1->deprecated, e2->deprecated);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!is_same(e1->metric_constraint, e2->metric_constraint)) {
|
||||
pr_debug2("testing event e1 %s: mismatched metric_constant, %s vs %s\n",
|
||||
e1->name, e1->metric_constraint, e2->metric_constraint);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -432,9 +408,6 @@ static int test__pmu_event_table_core_callback(const struct pmu_event *pe,
|
||||
struct perf_pmu_test_event const **test_event_table;
|
||||
bool found = false;
|
||||
|
||||
if (!pe->name)
|
||||
return 0;
|
||||
|
||||
if (pe->pmu)
|
||||
test_event_table = &uncore_events[0];
|
||||
else
|
||||
@ -496,7 +469,8 @@ static int test__pmu_event_table_sys_callback(const struct pmu_event *pe,
|
||||
static int test__pmu_event_table(struct test_suite *test __maybe_unused,
|
||||
int subtest __maybe_unused)
|
||||
{
|
||||
const struct pmu_events_table *sys_event_table = find_sys_events_table("pme_test_soc_sys");
|
||||
const struct pmu_events_table *sys_event_table =
|
||||
find_sys_events_table("pmu_events__test_soc_sys");
|
||||
const struct pmu_events_table *table = find_core_events_table("testarch", "testcpu");
|
||||
int map_events = 0, expected_events, err;
|
||||
|
||||
@ -840,7 +814,8 @@ struct metric {
|
||||
struct metric_ref metric_ref;
|
||||
};
|
||||
|
||||
static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_events_table *table,
|
||||
static int test__parsing_callback(const struct pmu_metric *pm,
|
||||
const struct pmu_metrics_table *table,
|
||||
void *data)
|
||||
{
|
||||
int *failures = data;
|
||||
@ -854,10 +829,10 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e
|
||||
};
|
||||
int err = 0;
|
||||
|
||||
if (!pe->metric_expr)
|
||||
if (!pm->metric_expr)
|
||||
return 0;
|
||||
|
||||
pr_debug("Found metric '%s'\n", pe->metric_name);
|
||||
pr_debug("Found metric '%s'\n", pm->metric_name);
|
||||
(*failures)++;
|
||||
|
||||
/*
|
||||
@ -877,14 +852,14 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e
|
||||
perf_evlist__set_maps(&evlist->core, cpus, NULL);
|
||||
runtime_stat__init(&st);
|
||||
|
||||
err = metricgroup__parse_groups_test(evlist, table, pe->metric_name,
|
||||
err = metricgroup__parse_groups_test(evlist, table, pm->metric_name,
|
||||
false, false,
|
||||
&metric_events);
|
||||
if (err) {
|
||||
if (!strcmp(pe->metric_name, "M1") || !strcmp(pe->metric_name, "M2") ||
|
||||
!strcmp(pe->metric_name, "M3")) {
|
||||
if (!strcmp(pm->metric_name, "M1") || !strcmp(pm->metric_name, "M2") ||
|
||||
!strcmp(pm->metric_name, "M3")) {
|
||||
(*failures)--;
|
||||
pr_debug("Expected broken metric %s skipping\n", pe->metric_name);
|
||||
pr_debug("Expected broken metric %s skipping\n", pm->metric_name);
|
||||
err = 0;
|
||||
}
|
||||
goto out_err;
|
||||
@ -912,7 +887,7 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e
|
||||
struct metric_expr *mexp;
|
||||
|
||||
list_for_each_entry (mexp, &me->head, nd) {
|
||||
if (strcmp(mexp->metric_name, pe->metric_name))
|
||||
if (strcmp(mexp->metric_name, pm->metric_name))
|
||||
continue;
|
||||
pr_debug("Result %f\n", test_generic_metric(mexp, 0, &st));
|
||||
err = 0;
|
||||
@ -921,11 +896,11 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e
|
||||
}
|
||||
}
|
||||
}
|
||||
pr_debug("Didn't find parsed metric %s", pe->metric_name);
|
||||
pr_debug("Didn't find parsed metric %s", pm->metric_name);
|
||||
err = 1;
|
||||
out_err:
|
||||
if (err)
|
||||
pr_debug("Broken metric %s\n", pe->metric_name);
|
||||
pr_debug("Broken metric %s\n", pm->metric_name);
|
||||
|
||||
/* ... cleanup. */
|
||||
metricgroup__rblist_exit(&metric_events);
|
||||
@ -941,8 +916,8 @@ static int test__parsing(struct test_suite *test __maybe_unused,
|
||||
{
|
||||
int failures = 0;
|
||||
|
||||
pmu_for_each_core_event(test__parsing_callback, &failures);
|
||||
pmu_for_each_sys_event(test__parsing_callback, &failures);
|
||||
pmu_for_each_core_metric(test__parsing_callback, &failures);
|
||||
pmu_for_each_sys_metric(test__parsing_callback, &failures);
|
||||
|
||||
return failures == 0 ? TEST_OK : TEST_FAIL;
|
||||
}
|
||||
@ -975,6 +950,7 @@ static int metric_parse_fake(const char *metric_name, const char *str)
|
||||
pr_debug("expr__ctx_new failed");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
ctx->sctx.is_test = true;
|
||||
if (expr__find_ids(str, NULL, ctx) < 0) {
|
||||
pr_err("expr__find_ids failed\n");
|
||||
return -1;
|
||||
@ -1021,14 +997,11 @@ static int metric_parse_fake(const char *metric_name, const char *str)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test__parsing_fake_callback(const struct pmu_event *pe,
|
||||
const struct pmu_events_table *table __maybe_unused,
|
||||
static int test__parsing_fake_callback(const struct pmu_metric *pm,
|
||||
const struct pmu_metrics_table *table __maybe_unused,
|
||||
void *data __maybe_unused)
|
||||
{
|
||||
if (!pe->metric_expr)
|
||||
return 0;
|
||||
|
||||
return metric_parse_fake(pe->metric_name, pe->metric_expr);
|
||||
return metric_parse_fake(pm->metric_name, pm->metric_expr);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1047,11 +1020,11 @@ static int test__parsing_fake(struct test_suite *test __maybe_unused,
|
||||
return err;
|
||||
}
|
||||
|
||||
err = pmu_for_each_core_event(test__parsing_fake_callback, NULL);
|
||||
err = pmu_for_each_core_metric(test__parsing_fake_callback, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return pmu_for_each_sys_event(test__parsing_fake_callback, NULL);
|
||||
return pmu_for_each_sys_metric(test__parsing_fake_callback, NULL);
|
||||
}
|
||||
|
||||
static struct test_case pmu_events_tests[] = {
|
||||
|
@ -37,7 +37,7 @@
|
||||
* in branch_stack variable.
|
||||
*/
|
||||
#define BS_EXPECTED_BE 0xa000d00000000000
|
||||
#define BS_EXPECTED_LE 0xd5000000
|
||||
#define BS_EXPECTED_LE 0x1aa00000000
|
||||
#define FLAG(s) s->branch_stack->entries[i].flags
|
||||
|
||||
static bool samples_same(const struct perf_sample *s1,
|
||||
|
@ -66,7 +66,9 @@ check()
|
||||
esac
|
||||
echo "build id: ${id}"
|
||||
|
||||
link=${build_id_dir}/.build-id/${id:0:2}/${id:2}
|
||||
id_file=${id#??}
|
||||
id_dir=${id%$id_file}
|
||||
link=$build_id_dir/.build-id/$id_dir/$id_file
|
||||
echo "link: ${link}"
|
||||
|
||||
if [ ! -h $link ]; then
|
||||
@ -74,7 +76,7 @@ check()
|
||||
exit 1
|
||||
fi
|
||||
|
||||
file=${build_id_dir}/.build-id/${id:0:2}/`readlink ${link}`/elf
|
||||
file=${build_id_dir}/.build-id/$id_dir/`readlink ${link}`/elf
|
||||
echo "file: ${file}"
|
||||
|
||||
# Check for file permission of original file
|
||||
@ -130,20 +132,22 @@ test_record()
|
||||
{
|
||||
data=$(mktemp /tmp/perf.data.XXX)
|
||||
build_id_dir=$(mktemp -d /tmp/perf.debug.XXX)
|
||||
log=$(mktemp /tmp/perf.log.XXX)
|
||||
log_out=$(mktemp /tmp/perf.log.out.XXX)
|
||||
log_err=$(mktemp /tmp/perf.log.err.XXX)
|
||||
perf="perf --buildid-dir ${build_id_dir}"
|
||||
|
||||
echo "running: perf record $@"
|
||||
${perf} record --buildid-all -o ${data} $@ &> ${log}
|
||||
${perf} record --buildid-all -o ${data} $@ 1>${log_out} 2>${log_err}
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "failed: record $@"
|
||||
echo "see log: ${log}"
|
||||
echo "see log: ${log_err}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
check ${@: -1}
|
||||
args="$*"
|
||||
check ${args##* }
|
||||
|
||||
rm -f ${log}
|
||||
rm -f ${log_out} ${log_err}
|
||||
rm -rf ${build_id_dir}
|
||||
rm -rf ${data}
|
||||
}
|
||||
|
@ -58,9 +58,9 @@ perf_dump_aux_verify() {
|
||||
# compiler may produce different code depending on the compiler and
|
||||
# optimization options, so this is rough just to see if we're
|
||||
# either missing almost all the data or all of it
|
||||
ATOM_FX_NUM=`grep I_ATOM_F "$DUMP" | wc -l`
|
||||
ASYNC_NUM=`grep I_ASYNC "$DUMP" | wc -l`
|
||||
TRACE_INFO_NUM=`grep I_TRACE_INFO "$DUMP" | wc -l`
|
||||
ATOM_FX_NUM=$(grep -c I_ATOM_F "$DUMP")
|
||||
ASYNC_NUM=$(grep -c I_ASYNC "$DUMP")
|
||||
TRACE_INFO_NUM=$(grep -c I_TRACE_INFO "$DUMP")
|
||||
rm -f "$DUMP"
|
||||
|
||||
# Arguments provide minimums for a pass
|
||||
@ -96,18 +96,18 @@ perf_dump_aux_tid_verify() {
|
||||
|
||||
# The TID test tools will print a TID per stdout line that are being
|
||||
# tested
|
||||
TIDS=`cat "$2"`
|
||||
TIDS=$(cat "$2")
|
||||
# Scan the perf report to find the TIDs that are actually CID in hex
|
||||
# and build a list of the ones found
|
||||
FOUND_TIDS=`perf report --stdio --dump -i "$1" | \
|
||||
FOUND_TIDS=$(perf report --stdio --dump -i "$1" | \
|
||||
grep -o "CID=0x[0-9a-z]\+" | sed 's/CID=//g' | \
|
||||
uniq | sort | uniq`
|
||||
uniq | sort | uniq)
|
||||
# No CID=xxx found - maybe your kernel is reporting these as
|
||||
# VMID=xxx so look there
|
||||
if test -z "$FOUND_TIDS"; then
|
||||
FOUND_TIDS=`perf report --stdio --dump -i "$1" | \
|
||||
FOUND_TIDS=$(perf report --stdio --dump -i "$1" | \
|
||||
grep -o "VMID=0x[0-9a-z]\+" | sed 's/VMID=//g' | \
|
||||
uniq | sort | uniq`
|
||||
uniq | sort | uniq)
|
||||
fi
|
||||
|
||||
# Iterate over the list of TIDs that the test says it has and find
|
||||
@ -116,7 +116,7 @@ perf_dump_aux_tid_verify() {
|
||||
for TID2 in $TIDS; do
|
||||
FOUND=""
|
||||
for TIDHEX in $FOUND_TIDS; do
|
||||
TID=`printf "%i" $TIDHEX`
|
||||
TID=$(printf "%i" $TIDHEX)
|
||||
if test "$TID" -eq "$TID2"; then
|
||||
FOUND="y"
|
||||
break
|
||||
|
@ -22,3 +22,11 @@ skip_if_no_debuginfo() {
|
||||
add_probe_vfs_getname -v 2>&1 | grep -E -q "^(Failed to find the path for the kernel|Debuginfo-analysis is not supported)|(file has no debug information)" && return 2
|
||||
return 1
|
||||
}
|
||||
|
||||
# check if perf is compiled with libtraceevent support
|
||||
skip_no_probe_record_support() {
|
||||
if [ $had_vfs_getname -eq 1 ] ; then
|
||||
perf record --dry-run -e $1 2>&1 | grep "libtraceevent is necessary for tracepoint support" && return 2
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
@ -128,7 +128,7 @@ test_type_filter()
|
||||
echo "Testing perf lock contention --type-filter (w/ spinlock)"
|
||||
perf lock contention -i ${perfdata} -Y spinlock -q 2> ${result}
|
||||
if [ $(grep -c -v spinlock "${result}") != "0" ]; then
|
||||
echo "[Fail] Recorded should not have non-spinlocks:" $(cat "${result}")
|
||||
echo "[Fail] Recorded result should not have non-spinlocks:" $(cat "${result}")
|
||||
err=1
|
||||
exit
|
||||
fi
|
||||
@ -139,7 +139,7 @@ test_type_filter()
|
||||
|
||||
perf lock con -a -b -Y spinlock -q -- perf bench sched messaging > /dev/null 2> ${result}
|
||||
if [ $(grep -c -v spinlock "${result}") != "0" ]; then
|
||||
echo "[Fail] Recorded should not have non-spinlocks:" $(cat "${result}")
|
||||
echo "[Fail] BPF result should not have non-spinlocks:" $(cat "${result}")
|
||||
err=1
|
||||
exit
|
||||
fi
|
||||
@ -160,7 +160,7 @@ test_lock_filter()
|
||||
local type=$(head -1 "${result}" | awk '{ print $8 }' | sed -e 's/:.*//')
|
||||
|
||||
if [ $(grep -c -v "${type}" "${result}") != "0" ]; then
|
||||
echo "[Fail] Recorded should not have non-${type} locks:" $(cat "${result}")
|
||||
echo "[Fail] Recorded result should not have non-${type} locks:" $(cat "${result}")
|
||||
err=1
|
||||
exit
|
||||
fi
|
||||
@ -171,7 +171,63 @@ test_lock_filter()
|
||||
|
||||
perf lock con -a -b -L tasklist_lock -q -- perf bench sched messaging > /dev/null 2> ${result}
|
||||
if [ $(grep -c -v "${type}" "${result}") != "0" ]; then
|
||||
echo "[Fail] Recorded should not have non-${type} locks:" $(cat "${result}")
|
||||
echo "[Fail] BPF result should not have non-${type} locks:" $(cat "${result}")
|
||||
err=1
|
||||
exit
|
||||
fi
|
||||
}
|
||||
|
||||
test_stack_filter()
|
||||
{
|
||||
echo "Testing perf lock contention --callstack-filter (w/ unix_stream)"
|
||||
perf lock contention -i ${perfdata} -v -q 2> ${result}
|
||||
if [ $(grep -c unix_stream "${result}") == "0" ]; then
|
||||
echo "[Skip] Could not find 'unix_stream'"
|
||||
return
|
||||
fi
|
||||
|
||||
perf lock contention -i ${perfdata} -E 1 -S unix_stream -q 2> ${result}
|
||||
if [ $(cat "${result}" | wc -l) != "1" ]; then
|
||||
echo "[Fail] Recorded result should have a lock from unix_stream:" $(cat "${result}")
|
||||
err=1
|
||||
exit
|
||||
fi
|
||||
|
||||
if ! perf lock con -b true > /dev/null 2>&1 ; then
|
||||
return
|
||||
fi
|
||||
|
||||
perf lock con -a -b -S unix_stream -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result}
|
||||
if [ $(cat "${result}" | wc -l) != "1" ]; then
|
||||
echo "[Fail] BPF result should have a lock from unix_stream:" $(cat "${result}")
|
||||
err=1
|
||||
exit
|
||||
fi
|
||||
}
|
||||
|
||||
test_aggr_task_stack_filter()
|
||||
{
|
||||
echo "Testing perf lock contention --callstack-filter with task aggregation"
|
||||
perf lock contention -i ${perfdata} -v -q 2> ${result}
|
||||
if [ $(grep -c unix_stream "${result}") == "0" ]; then
|
||||
echo "[Skip] Could not find 'unix_stream'"
|
||||
return
|
||||
fi
|
||||
|
||||
perf lock contention -i ${perfdata} -t -E 1 -S unix_stream -q 2> ${result}
|
||||
if [ $(cat "${result}" | wc -l) != "1" ]; then
|
||||
echo "[Fail] Recorded result should have a task from unix_stream:" $(cat "${result}")
|
||||
err=1
|
||||
exit
|
||||
fi
|
||||
|
||||
if ! perf lock con -b true > /dev/null 2>&1 ; then
|
||||
return
|
||||
fi
|
||||
|
||||
perf lock con -a -b -t -S unix_stream -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result}
|
||||
if [ $(cat "${result}" | wc -l) != "1" ]; then
|
||||
echo "[Fail] BPF result should have a task from unix_stream:" $(cat "${result}")
|
||||
err=1
|
||||
exit
|
||||
fi
|
||||
@ -186,5 +242,7 @@ test_aggr_task
|
||||
test_aggr_addr
|
||||
test_type_filter
|
||||
test_lock_filter
|
||||
test_stack_filter
|
||||
test_aggr_task_stack_filter
|
||||
|
||||
exit ${err}
|
||||
|
@ -11,6 +11,7 @@
|
||||
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
|
||||
|
||||
. $(dirname $0)/lib/probe.sh
|
||||
. $(dirname $0)/lib/probe_vfs_getname.sh
|
||||
|
||||
libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g')
|
||||
nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254
|
||||
@ -57,7 +58,17 @@ trace_libc_inet_pton_backtrace() {
|
||||
|
||||
perf_data=`mktemp -u /tmp/perf.data.XXX`
|
||||
perf_script=`mktemp -u /tmp/perf.script.XXX`
|
||||
|
||||
# Check presence of libtraceevent support to run perf record
|
||||
skip_no_probe_record_support "$event_name/$eventattr/"
|
||||
[ $? -eq 2 ] && return 2
|
||||
|
||||
perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1
|
||||
# check if perf data file got created in above step.
|
||||
if [ ! -e $perf_data ]; then
|
||||
printf "FAIL: perf record failed to create \"%s\" \n" "$perf_data"
|
||||
return 1
|
||||
fi
|
||||
perf script -i $perf_data | tac | grep -m1 ^ping -B9 | tac > $perf_script
|
||||
|
||||
exec 3<$perf_script
|
||||
|
@ -17,6 +17,9 @@ skip_if_no_perf_probe || exit 2
|
||||
|
||||
record_open_file() {
|
||||
echo "Recording open file:"
|
||||
# Check presence of libtraceevent support to run perf record
|
||||
skip_no_probe_record_support "probe:vfs_getname*"
|
||||
[ $? -eq 2 ] && return 2
|
||||
perf record -o ${perfdata} -e probe:vfs_getname\* touch $file
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@ for m in $(perf list --raw-dump metrics); do
|
||||
continue
|
||||
fi
|
||||
# Failed so try system wide.
|
||||
result=$(perf stat -M "$m" -a true 2>&1)
|
||||
result=$(perf stat -M "$m" -a sleep 0.01 2>&1)
|
||||
if [[ "$result" =~ "${m:0:50}" ]]
|
||||
then
|
||||
continue
|
||||
|
@ -30,14 +30,14 @@ test_user_branches() {
|
||||
# brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL
|
||||
|
||||
set -x
|
||||
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL/.*$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL/.*$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET/.*$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$" $TMPDIR/perf.script
|
||||
grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$" $TMPDIR/perf.script
|
||||
set +x
|
||||
|
||||
# some branch types are still not being tested:
|
||||
@ -57,7 +57,7 @@ test_filter() {
|
||||
|
||||
# fail if we find any branch type that doesn't match any of the expected ones
|
||||
# also consider UNKNOWN branch types (-)
|
||||
if grep -E -vm1 "^[^ ]*/($expect|-|( *))$" $TMPDIR/perf.script; then
|
||||
if grep -E -vm1 "^[^ ]*/($expect|-|( *))/.*$" $TMPDIR/perf.script; then
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
@ -620,6 +620,22 @@ test_event_trace()
|
||||
return 0
|
||||
}
|
||||
|
||||
test_pipe()
|
||||
{
|
||||
echo "--- Test with pipe mode ---"
|
||||
# Check if it works with pipe
|
||||
if ! perf_record_no_bpf -o- -e intel_pt//u uname | perf report -q -i- --itrace=i10000 ; then
|
||||
echo "perf record + report failed with pipe mode"
|
||||
return 1
|
||||
fi
|
||||
if ! perf_record_no_bpf -o- -e intel_pt//u uname | perf inject -b > /dev/null ; then
|
||||
echo "perf record + inject failed with pipe mode"
|
||||
return 1
|
||||
fi
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
count_result()
|
||||
{
|
||||
if [ "$1" -eq 2 ] ; then
|
||||
@ -647,6 +663,7 @@ test_virtual_lbr || ret=$? ; count_result $ret ; ret=0
|
||||
test_power_event || ret=$? ; count_result $ret ; ret=0
|
||||
test_no_tnt || ret=$? ; count_result $ret ; ret=0
|
||||
test_event_trace || ret=$? ; count_result $ret ; ret=0
|
||||
test_pipe || ret=$? ; count_result $ret ; ret=0
|
||||
|
||||
cleanup
|
||||
|
||||
|
150
tools/perf/tests/symbols.c
Normal file
150
tools/perf/tests/symbols.c
Normal file
@ -0,0 +1,150 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <limits.h>
|
||||
#include "debug.h"
|
||||
#include "dso.h"
|
||||
#include "machine.h"
|
||||
#include "thread.h"
|
||||
#include "symbol.h"
|
||||
#include "map.h"
|
||||
#include "util.h"
|
||||
#include "tests.h"
|
||||
|
||||
struct test_info {
|
||||
struct machine *machine;
|
||||
struct thread *thread;
|
||||
};
|
||||
|
||||
static int init_test_info(struct test_info *ti)
|
||||
{
|
||||
ti->machine = machine__new_host();
|
||||
if (!ti->machine) {
|
||||
pr_debug("machine__new_host() failed!\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
/* Create a dummy thread */
|
||||
ti->thread = machine__findnew_thread(ti->machine, 100, 100);
|
||||
if (!ti->thread) {
|
||||
pr_debug("machine__findnew_thread() failed!\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
return TEST_OK;
|
||||
}
|
||||
|
||||
static void exit_test_info(struct test_info *ti)
|
||||
{
|
||||
thread__put(ti->thread);
|
||||
machine__delete(ti->machine);
|
||||
}
|
||||
|
||||
static void get_test_dso_filename(char *filename, size_t max_sz)
|
||||
{
|
||||
if (dso_to_test)
|
||||
strlcpy(filename, dso_to_test, max_sz);
|
||||
else
|
||||
perf_exe(filename, max_sz);
|
||||
}
|
||||
|
||||
static int create_map(struct test_info *ti, char *filename, struct map **map_p)
|
||||
{
|
||||
/* Create a dummy map at 0x100000 */
|
||||
*map_p = map__new(ti->machine, 0x100000, 0xffffffff, 0, NULL,
|
||||
PROT_EXEC, 0, NULL, filename, ti->thread);
|
||||
if (!*map_p) {
|
||||
pr_debug("Failed to create map!");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
return TEST_OK;
|
||||
}
|
||||
|
||||
static int test_dso(struct dso *dso)
|
||||
{
|
||||
struct symbol *last_sym = NULL;
|
||||
struct rb_node *nd;
|
||||
int ret = TEST_OK;
|
||||
|
||||
/* dso__fprintf() prints all the symbols */
|
||||
if (verbose > 1)
|
||||
dso__fprintf(dso, stderr);
|
||||
|
||||
for (nd = rb_first_cached(&dso->symbols); nd; nd = rb_next(nd)) {
|
||||
struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
|
||||
|
||||
if (sym->type != STT_FUNC && sym->type != STT_GNU_IFUNC)
|
||||
continue;
|
||||
|
||||
/* Check for overlapping function symbols */
|
||||
if (last_sym && sym->start < last_sym->end) {
|
||||
pr_debug("Overlapping symbols:\n");
|
||||
symbol__fprintf(last_sym, stderr);
|
||||
symbol__fprintf(sym, stderr);
|
||||
ret = TEST_FAIL;
|
||||
}
|
||||
/* Check for zero-length function symbol */
|
||||
if (sym->start == sym->end) {
|
||||
pr_debug("Zero-length symbol:\n");
|
||||
symbol__fprintf(sym, stderr);
|
||||
ret = TEST_FAIL;
|
||||
}
|
||||
last_sym = sym;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_file(struct test_info *ti, char *filename)
|
||||
{
|
||||
struct map *map = NULL;
|
||||
int ret, nr;
|
||||
|
||||
pr_debug("Testing %s\n", filename);
|
||||
|
||||
ret = create_map(ti, filename, &map);
|
||||
if (ret != TEST_OK)
|
||||
return ret;
|
||||
|
||||
nr = dso__load(map->dso, map);
|
||||
if (nr < 0) {
|
||||
pr_debug("dso__load() failed!\n");
|
||||
ret = TEST_FAIL;
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
if (nr == 0) {
|
||||
pr_debug("DSO has no symbols!\n");
|
||||
ret = TEST_SKIP;
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
ret = test_dso(map->dso);
|
||||
out_put:
|
||||
map__put(map);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test__symbols(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
|
||||
{
|
||||
char filename[PATH_MAX];
|
||||
struct test_info ti;
|
||||
int ret;
|
||||
|
||||
ret = init_test_info(&ti);
|
||||
if (ret != TEST_OK)
|
||||
return ret;
|
||||
|
||||
get_test_dso_filename(filename, sizeof(filename));
|
||||
|
||||
ret = test_file(&ti, filename);
|
||||
|
||||
exit_test_info(&ti);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
DEFINE_SUITE("Symbols", symbols);
|
@ -148,6 +148,7 @@ DECLARE_SUITE(perf_time_to_tsc);
|
||||
DECLARE_SUITE(dlfilter);
|
||||
DECLARE_SUITE(sigtrap);
|
||||
DECLARE_SUITE(event_groups);
|
||||
DECLARE_SUITE(symbols);
|
||||
|
||||
/*
|
||||
* PowerPC and S390 do not support creation of instruction breakpoints using the
|
||||
@ -208,4 +209,6 @@ DECLARE_WORKLOAD(sqrtloop);
|
||||
DECLARE_WORKLOAD(brstack);
|
||||
DECLARE_WORKLOAD(datasym);
|
||||
|
||||
extern const char *dso_to_test;
|
||||
|
||||
#endif /* TESTS_H */
|
||||
|
@ -20,7 +20,7 @@ static void sighandler(int sig __maybe_unused)
|
||||
noinline void test_loop(void)
|
||||
{
|
||||
while (!done)
|
||||
count++;
|
||||
__atomic_fetch_add(&count, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static void *thfunc(void *arg)
|
||||
|
@ -154,7 +154,10 @@ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
|
||||
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
|
||||
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
|
||||
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
|
||||
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
|
||||
|
||||
ifeq ($(CONFIG_LIBTRACEEVENT),y)
|
||||
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_LIBTRACEEVENT),y)
|
||||
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
|
||||
|
@ -68,7 +68,11 @@ static u64 arm_spe_calc_ip(int index, u64 payload)
|
||||
/* Clean highest byte */
|
||||
payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
|
||||
} else {
|
||||
pr_err("unsupported address packet index: 0x%x\n", index);
|
||||
static u32 seen_idx = 0;
|
||||
if (!(seen_idx & BIT(index))) {
|
||||
seen_idx |= BIT(index);
|
||||
pr_warning("ignoring unsupported address packet index: 0x%x\n", index);
|
||||
}
|
||||
}
|
||||
|
||||
return payload;
|
||||
|
@ -422,16 +422,18 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
|
||||
int ch, pat;
|
||||
u64 payload = packet->payload;
|
||||
int err = 0;
|
||||
static const char *idx_name[] = {"PC", "TGT", "VA", "PA", "PBT"};
|
||||
|
||||
switch (idx) {
|
||||
case SPE_ADDR_PKT_HDR_INDEX_INS:
|
||||
case SPE_ADDR_PKT_HDR_INDEX_BRANCH:
|
||||
case SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH:
|
||||
ns = !!SPE_ADDR_PKT_GET_NS(payload);
|
||||
el = SPE_ADDR_PKT_GET_EL(payload);
|
||||
payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
|
||||
arm_spe_pkt_out_string(&err, &buf, &buf_len,
|
||||
"%s 0x%llx el%d ns=%d",
|
||||
(idx == 1) ? "TGT" : "PC", payload, el, ns);
|
||||
idx_name[idx], payload, el, ns);
|
||||
break;
|
||||
case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT:
|
||||
arm_spe_pkt_out_string(&err, &buf, &buf_len,
|
||||
|
@ -65,6 +65,7 @@ struct arm_spe_pkt {
|
||||
#define SPE_ADDR_PKT_HDR_INDEX_BRANCH 0x1
|
||||
#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT 0x2
|
||||
#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS 0x3
|
||||
#define SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH 0x4
|
||||
|
||||
/* Address packet payload */
|
||||
#define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT 56
|
||||
|
@ -1133,6 +1133,9 @@ int auxtrace_queue_data(struct perf_session *session, bool samples, bool events)
|
||||
if (auxtrace__dont_decode(session))
|
||||
return 0;
|
||||
|
||||
if (perf_data__is_pipe(session->data))
|
||||
return 0;
|
||||
|
||||
if (!session->auxtrace || !session->auxtrace->queue_data)
|
||||
return -EINVAL;
|
||||
|
||||
@ -1391,6 +1394,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
|
||||
synth_opts->calls = true;
|
||||
} else {
|
||||
synth_opts->instructions = true;
|
||||
synth_opts->cycles = true;
|
||||
synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
|
||||
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
|
||||
}
|
||||
@ -1479,7 +1483,11 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
|
||||
for (p = str; *p;) {
|
||||
switch (*p++) {
|
||||
case 'i':
|
||||
synth_opts->instructions = true;
|
||||
case 'y':
|
||||
if (p[-1] == 'y')
|
||||
synth_opts->cycles = true;
|
||||
else
|
||||
synth_opts->instructions = true;
|
||||
while (*p == ' ' || *p == ',')
|
||||
p += 1;
|
||||
if (isdigit(*p)) {
|
||||
@ -1638,7 +1646,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
|
||||
}
|
||||
}
|
||||
out:
|
||||
if (synth_opts->instructions) {
|
||||
if (synth_opts->instructions || synth_opts->cycles) {
|
||||
if (!period_type_set)
|
||||
synth_opts->period_type =
|
||||
PERF_ITRACE_DEFAULT_PERIOD_TYPE;
|
||||
|
@ -71,6 +71,9 @@ enum itrace_period_type {
|
||||
* @inject: indicates the event (not just the sample) must be fully synthesized
|
||||
* because 'perf inject' will write it out
|
||||
* @instructions: whether to synthesize 'instructions' events
|
||||
* @cycles: whether to synthesize 'cycles' events
|
||||
* (not fully accurate, since CYC packets are only emitted
|
||||
* together with other events, such as branches)
|
||||
* @branches: whether to synthesize 'branches' events
|
||||
* (branch misses only for Arm SPE)
|
||||
* @transactions: whether to synthesize events for transactions
|
||||
@ -119,6 +122,7 @@ struct itrace_synth_opts {
|
||||
bool default_no_sample;
|
||||
bool inject;
|
||||
bool instructions;
|
||||
bool cycles;
|
||||
bool branches;
|
||||
bool transactions;
|
||||
bool ptwrites;
|
||||
@ -643,6 +647,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
|
||||
|
||||
#define ITRACE_HELP \
|
||||
" i[period]: synthesize instructions events\n" \
|
||||
" y[period]: synthesize cycles events (same period as i)\n" \
|
||||
" b: synthesize branches events (branch misses for Arm SPE)\n" \
|
||||
" c: synthesize branches events (calls only)\n" \
|
||||
" r: synthesize branches events (returns only)\n" \
|
||||
@ -674,7 +679,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
|
||||
" A: approximate IPC\n" \
|
||||
" Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \
|
||||
" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \
|
||||
" concatenate multiple options. Default is ibxwpe or cewp\n"
|
||||
" concatenate multiple options. Default is iybxwpe or cewp\n"
|
||||
|
||||
static inline
|
||||
void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts,
|
||||
|
@ -34,13 +34,15 @@ int lock_contention_prepare(struct lock_contention *con)
|
||||
bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
|
||||
bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries);
|
||||
|
||||
if (con->aggr_mode == LOCK_AGGR_TASK) {
|
||||
if (con->aggr_mode == LOCK_AGGR_TASK)
|
||||
bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries);
|
||||
bpf_map__set_max_entries(skel->maps.stacks, 1);
|
||||
} else {
|
||||
else
|
||||
bpf_map__set_max_entries(skel->maps.task_data, 1);
|
||||
|
||||
if (con->save_callstack)
|
||||
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
|
||||
}
|
||||
else
|
||||
bpf_map__set_max_entries(skel->maps.stacks, 1);
|
||||
|
||||
if (target__has_cpu(target))
|
||||
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
|
||||
@ -146,6 +148,8 @@ int lock_contention_prepare(struct lock_contention *con)
|
||||
/* these don't work well if in the rodata section */
|
||||
skel->bss->stack_skip = con->stack_skip;
|
||||
skel->bss->aggr_mode = con->aggr_mode;
|
||||
skel->bss->needs_callstack = con->save_callstack;
|
||||
skel->bss->lock_owner = con->owner;
|
||||
|
||||
lock_contention_bpf__attach(skel);
|
||||
return 0;
|
||||
@ -163,9 +167,70 @@ int lock_contention_stop(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *lock_contention_get_name(struct lock_contention *con,
|
||||
struct contention_key *key,
|
||||
u64 *stack_trace)
|
||||
{
|
||||
int idx = 0;
|
||||
u64 addr;
|
||||
const char *name = "";
|
||||
static char name_buf[KSYM_NAME_LEN];
|
||||
struct symbol *sym;
|
||||
struct map *kmap;
|
||||
struct machine *machine = con->machine;
|
||||
|
||||
if (con->aggr_mode == LOCK_AGGR_TASK) {
|
||||
struct contention_task_data task;
|
||||
int pid = key->pid;
|
||||
int task_fd = bpf_map__fd(skel->maps.task_data);
|
||||
|
||||
/* do not update idle comm which contains CPU number */
|
||||
if (pid) {
|
||||
struct thread *t = __machine__findnew_thread(machine, /*pid=*/-1, pid);
|
||||
|
||||
if (t == NULL)
|
||||
return name;
|
||||
if (!bpf_map_lookup_elem(task_fd, &pid, &task) &&
|
||||
thread__set_comm(t, task.comm, /*timestamp=*/0))
|
||||
name = task.comm;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
if (con->aggr_mode == LOCK_AGGR_ADDR) {
|
||||
sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap);
|
||||
if (sym)
|
||||
name = sym->name;
|
||||
return name;
|
||||
}
|
||||
|
||||
/* LOCK_AGGR_CALLER: skip lock internal functions */
|
||||
while (machine__is_lock_function(machine, stack_trace[idx]) &&
|
||||
idx < con->max_stack - 1)
|
||||
idx++;
|
||||
|
||||
addr = stack_trace[idx];
|
||||
sym = machine__find_kernel_symbol(machine, addr, &kmap);
|
||||
|
||||
if (sym) {
|
||||
unsigned long offset;
|
||||
|
||||
offset = kmap->map_ip(kmap, addr) - sym->start;
|
||||
|
||||
if (offset == 0)
|
||||
return sym->name;
|
||||
|
||||
snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset);
|
||||
} else {
|
||||
snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr);
|
||||
}
|
||||
|
||||
return name_buf;
|
||||
}
|
||||
|
||||
int lock_contention_read(struct lock_contention *con)
|
||||
{
|
||||
int fd, stack, task_fd, err = 0;
|
||||
int fd, stack, err = 0;
|
||||
struct contention_key *prev_key, key;
|
||||
struct contention_data data = {};
|
||||
struct lock_stat *st = NULL;
|
||||
@ -175,7 +240,6 @@ int lock_contention_read(struct lock_contention *con)
|
||||
|
||||
fd = bpf_map__fd(skel->maps.lock_stat);
|
||||
stack = bpf_map__fd(skel->maps.stacks);
|
||||
task_fd = bpf_map__fd(skel->maps.task_data);
|
||||
|
||||
con->lost = skel->bss->lost;
|
||||
|
||||
@ -195,16 +259,50 @@ int lock_contention_read(struct lock_contention *con)
|
||||
|
||||
prev_key = NULL;
|
||||
while (!bpf_map_get_next_key(fd, prev_key, &key)) {
|
||||
struct map *kmap;
|
||||
struct symbol *sym;
|
||||
int idx = 0;
|
||||
s32 stack_id;
|
||||
s64 ls_key;
|
||||
const char *name;
|
||||
|
||||
/* to handle errors in the loop body */
|
||||
err = -1;
|
||||
|
||||
bpf_map_lookup_elem(fd, &key, &data);
|
||||
st = zalloc(sizeof(*st));
|
||||
if (con->save_callstack) {
|
||||
bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
|
||||
|
||||
if (!match_callstack_filter(machine, stack_trace))
|
||||
goto next;
|
||||
}
|
||||
|
||||
switch (con->aggr_mode) {
|
||||
case LOCK_AGGR_CALLER:
|
||||
ls_key = key.stack_id;
|
||||
break;
|
||||
case LOCK_AGGR_TASK:
|
||||
ls_key = key.pid;
|
||||
break;
|
||||
case LOCK_AGGR_ADDR:
|
||||
ls_key = key.lock_addr;
|
||||
break;
|
||||
default:
|
||||
goto next;
|
||||
}
|
||||
|
||||
st = lock_stat_find(ls_key);
|
||||
if (st != NULL) {
|
||||
st->wait_time_total += data.total_time;
|
||||
if (st->wait_time_max < data.max_time)
|
||||
st->wait_time_max = data.max_time;
|
||||
if (st->wait_time_min > data.min_time)
|
||||
st->wait_time_min = data.min_time;
|
||||
|
||||
st->nr_contended += data.count;
|
||||
if (st->nr_contended)
|
||||
st->avg_wait_time = st->wait_time_total / st->nr_contended;
|
||||
goto next;
|
||||
}
|
||||
|
||||
name = lock_contention_get_name(con, &key, stack_trace);
|
||||
st = lock_stat_findnew(ls_key, name, data.flags);
|
||||
if (st == NULL)
|
||||
break;
|
||||
|
||||
@ -216,77 +314,20 @@ int lock_contention_read(struct lock_contention *con)
|
||||
if (data.count)
|
||||
st->avg_wait_time = data.total_time / data.count;
|
||||
|
||||
st->flags = data.flags;
|
||||
st->addr = key.aggr_key;
|
||||
|
||||
if (con->aggr_mode == LOCK_AGGR_TASK) {
|
||||
struct contention_task_data task;
|
||||
struct thread *t;
|
||||
int pid = key.aggr_key;
|
||||
|
||||
/* do not update idle comm which contains CPU number */
|
||||
if (st->addr) {
|
||||
bpf_map_lookup_elem(task_fd, &pid, &task);
|
||||
t = __machine__findnew_thread(machine, /*pid=*/-1, pid);
|
||||
thread__set_comm(t, task.comm, /*timestamp=*/0);
|
||||
}
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (con->aggr_mode == LOCK_AGGR_ADDR) {
|
||||
sym = machine__find_kernel_symbol(machine, st->addr, &kmap);
|
||||
if (sym)
|
||||
st->name = strdup(sym->name);
|
||||
goto next;
|
||||
}
|
||||
|
||||
stack_id = key.aggr_key;
|
||||
bpf_map_lookup_elem(stack, &stack_id, stack_trace);
|
||||
|
||||
/* skip lock internal functions */
|
||||
while (machine__is_lock_function(machine, stack_trace[idx]) &&
|
||||
idx < con->max_stack - 1)
|
||||
idx++;
|
||||
|
||||
st->addr = stack_trace[idx];
|
||||
sym = machine__find_kernel_symbol(machine, st->addr, &kmap);
|
||||
|
||||
if (sym) {
|
||||
unsigned long offset;
|
||||
int ret = 0;
|
||||
|
||||
offset = kmap->map_ip(kmap, st->addr) - sym->start;
|
||||
|
||||
if (offset)
|
||||
ret = asprintf(&st->name, "%s+%#lx", sym->name, offset);
|
||||
else
|
||||
st->name = strdup(sym->name);
|
||||
|
||||
if (ret < 0 || st->name == NULL)
|
||||
break;
|
||||
} else if (asprintf(&st->name, "%#lx", (unsigned long)st->addr) < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (verbose > 0) {
|
||||
if (con->save_callstack) {
|
||||
st->callstack = memdup(stack_trace, stack_size);
|
||||
if (st->callstack == NULL)
|
||||
break;
|
||||
}
|
||||
|
||||
next:
|
||||
hlist_add_head(&st->hash_entry, con->result);
|
||||
prev_key = &key;
|
||||
|
||||
/* we're fine now, reset the values */
|
||||
st = NULL;
|
||||
/* we're fine now, reset the error */
|
||||
err = 0;
|
||||
}
|
||||
|
||||
free(stack_trace);
|
||||
if (st) {
|
||||
free(st->name);
|
||||
free(st);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -10,6 +10,14 @@
|
||||
/* default buffer size */
|
||||
#define MAX_ENTRIES 10240
|
||||
|
||||
/* lock contention flags from include/trace/events/lock.h */
|
||||
#define LCB_F_SPIN (1U << 0)
|
||||
#define LCB_F_READ (1U << 1)
|
||||
#define LCB_F_WRITE (1U << 2)
|
||||
#define LCB_F_RT (1U << 3)
|
||||
#define LCB_F_PERCPU (1U << 4)
|
||||
#define LCB_F_MUTEX (1U << 5)
|
||||
|
||||
struct tstamp_data {
|
||||
__u64 timestamp;
|
||||
__u64 lock;
|
||||
@ -76,13 +84,23 @@ struct {
|
||||
__uint(max_entries, 1);
|
||||
} addr_filter SEC(".maps");
|
||||
|
||||
struct rw_semaphore___old {
|
||||
struct task_struct *owner;
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
struct rw_semaphore___new {
|
||||
atomic_long_t owner;
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
/* control flags */
|
||||
int enabled;
|
||||
int has_cpu;
|
||||
int has_task;
|
||||
int has_type;
|
||||
int has_addr;
|
||||
int needs_callstack;
|
||||
int stack_skip;
|
||||
int lock_owner;
|
||||
|
||||
/* determine the key of lock stat */
|
||||
int aggr_mode;
|
||||
@ -131,17 +149,59 @@ static inline int can_record(u64 *ctx)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void update_task_data(__u32 pid)
|
||||
static inline int update_task_data(struct task_struct *task)
|
||||
{
|
||||
struct contention_task_data *p;
|
||||
int pid, err;
|
||||
|
||||
err = bpf_core_read(&pid, sizeof(pid), &task->pid);
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
p = bpf_map_lookup_elem(&task_data, &pid);
|
||||
if (p == NULL) {
|
||||
struct contention_task_data data;
|
||||
struct contention_task_data data = {};
|
||||
|
||||
bpf_get_current_comm(data.comm, sizeof(data.comm));
|
||||
BPF_CORE_READ_STR_INTO(&data.comm, task, comm);
|
||||
bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags)
|
||||
{
|
||||
struct task_struct *task;
|
||||
__u64 owner = 0;
|
||||
|
||||
if (flags & LCB_F_MUTEX) {
|
||||
struct mutex *mutex = (void *)lock;
|
||||
owner = BPF_CORE_READ(mutex, owner.counter);
|
||||
} else if (flags == LCB_F_READ || flags == LCB_F_WRITE) {
|
||||
#if __has_builtin(bpf_core_type_matches)
|
||||
if (bpf_core_type_matches(struct rw_semaphore___old)) {
|
||||
struct rw_semaphore___old *rwsem = (void *)lock;
|
||||
owner = (unsigned long)BPF_CORE_READ(rwsem, owner);
|
||||
} else if (bpf_core_type_matches(struct rw_semaphore___new)) {
|
||||
struct rw_semaphore___new *rwsem = (void *)lock;
|
||||
owner = BPF_CORE_READ(rwsem, owner.counter);
|
||||
}
|
||||
#else
|
||||
/* assume new struct */
|
||||
struct rw_semaphore *rwsem = (void *)lock;
|
||||
owner = BPF_CORE_READ(rwsem, owner.counter);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (!owner)
|
||||
return NULL;
|
||||
|
||||
task = (void *)(owner & ~7UL);
|
||||
return task;
|
||||
}
|
||||
|
||||
SEC("tp_btf/contention_begin")
|
||||
@ -173,11 +233,31 @@ int contention_begin(u64 *ctx)
|
||||
pelem->lock = (__u64)ctx[0];
|
||||
pelem->flags = (__u32)ctx[1];
|
||||
|
||||
if (aggr_mode == LOCK_AGGR_CALLER) {
|
||||
if (needs_callstack) {
|
||||
pelem->stack_id = bpf_get_stackid(ctx, &stacks,
|
||||
BPF_F_FAST_STACK_CMP | stack_skip);
|
||||
if (pelem->stack_id < 0)
|
||||
lost++;
|
||||
} else if (aggr_mode == LOCK_AGGR_TASK) {
|
||||
struct task_struct *task;
|
||||
|
||||
if (lock_owner) {
|
||||
task = get_lock_owner(pelem->lock, pelem->flags);
|
||||
|
||||
/* The flags is not used anymore. Pass the owner pid. */
|
||||
if (task)
|
||||
pelem->flags = BPF_CORE_READ(task, pid);
|
||||
else
|
||||
pelem->flags = -1U;
|
||||
|
||||
} else {
|
||||
task = bpf_get_current_task_btf();
|
||||
}
|
||||
|
||||
if (task) {
|
||||
if (update_task_data(task) < 0 && lock_owner)
|
||||
pelem->flags = -1U;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -188,7 +268,7 @@ int contention_end(u64 *ctx)
|
||||
{
|
||||
__u32 pid;
|
||||
struct tstamp_data *pelem;
|
||||
struct contention_key key;
|
||||
struct contention_key key = {};
|
||||
struct contention_data *data;
|
||||
__u64 duration;
|
||||
|
||||
@ -204,14 +284,20 @@ int contention_end(u64 *ctx)
|
||||
|
||||
switch (aggr_mode) {
|
||||
case LOCK_AGGR_CALLER:
|
||||
key.aggr_key = pelem->stack_id;
|
||||
key.stack_id = pelem->stack_id;
|
||||
break;
|
||||
case LOCK_AGGR_TASK:
|
||||
key.aggr_key = pid;
|
||||
update_task_data(pid);
|
||||
if (lock_owner)
|
||||
key.pid = pelem->flags;
|
||||
else
|
||||
key.pid = pid;
|
||||
if (needs_callstack)
|
||||
key.stack_id = pelem->stack_id;
|
||||
break;
|
||||
case LOCK_AGGR_ADDR:
|
||||
key.aggr_key = pelem->lock;
|
||||
key.lock_addr = pelem->lock;
|
||||
if (needs_callstack)
|
||||
key.stack_id = pelem->stack_id;
|
||||
break;
|
||||
default:
|
||||
/* should not happen */
|
||||
|
@ -4,7 +4,9 @@
|
||||
#define UTIL_BPF_SKEL_LOCK_DATA_H
|
||||
|
||||
struct contention_key {
|
||||
u64 aggr_key; /* can be stack_id, pid or lock addr */
|
||||
u32 stack_id;
|
||||
u32 pid;
|
||||
u64 lock_addr;
|
||||
};
|
||||
|
||||
#define TASK_COMM_LEN 16
|
||||
|
@ -212,3 +212,18 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size)
|
||||
|
||||
return printed;
|
||||
}
|
||||
|
||||
const char *branch_spec_desc(int spec)
|
||||
{
|
||||
const char *branch_spec_outcomes[PERF_BR_SPEC_MAX] = {
|
||||
"N/A",
|
||||
"SPEC_WRONG_PATH",
|
||||
"NON_SPEC_CORRECT_PATH",
|
||||
"SPEC_CORRECT_PATH",
|
||||
};
|
||||
|
||||
if (spec >= 0 && spec < PERF_BR_SPEC_MAX)
|
||||
return branch_spec_outcomes[spec];
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -89,4 +89,6 @@ const char *get_branch_type(struct branch_entry *e);
|
||||
void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
|
||||
int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
|
||||
|
||||
const char *branch_spec_desc(int spec);
|
||||
|
||||
#endif /* _PERF_BRANCH_H */
|
||||
|
@ -6,16 +6,31 @@
|
||||
|
||||
int __pure cacheline_size(void);
|
||||
|
||||
static inline u64 cl_address(u64 address)
|
||||
|
||||
/*
|
||||
* Some architectures have 'Adjacent Cacheline Prefetch' feature,
|
||||
* which performs like the cacheline size being doubled.
|
||||
*/
|
||||
static inline u64 cl_address(u64 address, bool double_cl)
|
||||
{
|
||||
u64 size = cacheline_size();
|
||||
|
||||
if (double_cl)
|
||||
size *= 2;
|
||||
|
||||
/* return the cacheline of the address */
|
||||
return (address & ~(cacheline_size() - 1));
|
||||
return (address & ~(size - 1));
|
||||
}
|
||||
|
||||
static inline u64 cl_offset(u64 address)
|
||||
static inline u64 cl_offset(u64 address, bool double_cl)
|
||||
{
|
||||
/* return the cacheline of the address */
|
||||
return (address & (cacheline_size() - 1));
|
||||
u64 size = cacheline_size();
|
||||
|
||||
if (double_cl)
|
||||
size *= 2;
|
||||
|
||||
/* return the offset inside cacheline */
|
||||
return (address & (size - 1));
|
||||
}
|
||||
|
||||
#endif // PERF_CACHELINE_H
|
||||
|
@ -481,7 +481,6 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
|
||||
nr_cgroups++;
|
||||
|
||||
if (metric_events) {
|
||||
perf_stat__collect_metric_expr(tmp_list);
|
||||
if (metricgroup__copy_metric_events(tmp_list, cgrp,
|
||||
metric_events,
|
||||
&orig_metric_events) < 0)
|
||||
|
@ -422,8 +422,6 @@ void numa_topology__delete(struct numa_topology *tp)
|
||||
static int load_hybrid_node(struct hybrid_topology_node *node,
|
||||
struct perf_pmu *pmu)
|
||||
{
|
||||
const char *sysfs;
|
||||
char path[PATH_MAX];
|
||||
char *buf = NULL, *p;
|
||||
FILE *fp;
|
||||
size_t len = 0;
|
||||
@ -432,12 +430,7 @@ static int load_hybrid_node(struct hybrid_topology_node *node,
|
||||
if (!node->pmu_name)
|
||||
return -1;
|
||||
|
||||
sysfs = sysfs__mountpoint();
|
||||
if (!sysfs)
|
||||
goto err;
|
||||
|
||||
snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, pmu->name);
|
||||
fp = fopen(path, "r");
|
||||
fp = perf_pmu__open_file(pmu, "cpus");
|
||||
if (!fp)
|
||||
goto err;
|
||||
|
||||
|
@ -36,7 +36,22 @@ static const char * const cs_etmv4_priv_fmts[] = {
|
||||
[CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n",
|
||||
[CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n",
|
||||
[CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n",
|
||||
[CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n"
|
||||
[CS_ETMV4_TS_SOURCE] = " TS_SOURCE %lld\n",
|
||||
};
|
||||
|
||||
static const char * const cs_ete_priv_fmts[] = {
|
||||
[CS_ETM_MAGIC] = " Magic number %llx\n",
|
||||
[CS_ETM_CPU] = " CPU %lld\n",
|
||||
[CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n",
|
||||
[CS_ETE_TRCCONFIGR] = " TRCCONFIGR %llx\n",
|
||||
[CS_ETE_TRCTRACEIDR] = " TRCTRACEIDR %llx\n",
|
||||
[CS_ETE_TRCIDR0] = " TRCIDR0 %llx\n",
|
||||
[CS_ETE_TRCIDR1] = " TRCIDR1 %llx\n",
|
||||
[CS_ETE_TRCIDR2] = " TRCIDR2 %llx\n",
|
||||
[CS_ETE_TRCIDR8] = " TRCIDR8 %llx\n",
|
||||
[CS_ETE_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n",
|
||||
[CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n",
|
||||
[CS_ETE_TS_SOURCE] = " TS_SOURCE %lld\n",
|
||||
};
|
||||
|
||||
static const char * const param_unk_fmt =
|
||||
@ -96,18 +111,21 @@ static int cs_etm__print_cpu_metadata_v1(u64 *val, int *offset)
|
||||
else
|
||||
fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
|
||||
}
|
||||
} else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) {
|
||||
/*
|
||||
* ETE and ETMv4 can be printed in the same block because the number of parameters
|
||||
* is saved and they share the list of parameter names. ETE is also only supported
|
||||
* in V1 files.
|
||||
*/
|
||||
} else if (magic == __perf_cs_etmv4_magic) {
|
||||
for (j = 0; j < total_params; j++, i++) {
|
||||
/* if newer record - could be excess params */
|
||||
if (j >= CS_ETMV4_PRIV_MAX)
|
||||
fprintf(stdout, param_unk_fmt, j, val[i]);
|
||||
else
|
||||
fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
|
||||
}
|
||||
} else if (magic == __perf_cs_ete_magic) {
|
||||
for (j = 0; j < total_params; j++, i++) {
|
||||
/* if newer record - could be excess params */
|
||||
if (j >= CS_ETE_PRIV_MAX)
|
||||
fprintf(stdout, param_unk_fmt, j, val[i]);
|
||||
else
|
||||
fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
|
||||
fprintf(stdout, cs_ete_priv_fmts[j], val[i]);
|
||||
}
|
||||
} else {
|
||||
/* failure - note bad magic value and error out */
|
||||
|
@ -30,6 +30,15 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Assume a maximum of 0.1ns elapsed per instruction. This would be the
|
||||
* case with a theoretical 10GHz core executing 1 instruction per cycle.
|
||||
* Used to estimate the sample time for synthesized instructions because
|
||||
* Coresight only emits a timestamp for a range of instructions rather
|
||||
* than per instruction.
|
||||
*/
|
||||
const u32 INSTR_PER_NS = 10;
|
||||
|
||||
struct cs_etm_decoder {
|
||||
void *data;
|
||||
void (*packet_printer)(const char *msg);
|
||||
@ -112,6 +121,20 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the number of nanoseconds elapsed.
|
||||
*
|
||||
* instr_count is updated in place with the remainder of the instructions
|
||||
* which didn't make up a whole nanosecond.
|
||||
*/
|
||||
static u32 cs_etm_decoder__dec_instr_count_to_ns(u32 *instr_count)
|
||||
{
|
||||
const u32 instr_copy = *instr_count;
|
||||
|
||||
*instr_count %= INSTR_PER_NS;
|
||||
return instr_copy / INSTR_PER_NS;
|
||||
}
|
||||
|
||||
static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params,
|
||||
ocsd_etmv3_cfg *config)
|
||||
{
|
||||
@ -260,15 +283,17 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
|
||||
struct cs_etm_packet_queue *packet_queue,
|
||||
const uint8_t trace_chan_id)
|
||||
{
|
||||
u64 estimated_ts;
|
||||
|
||||
/* No timestamp packet has been received, nothing to do */
|
||||
if (!packet_queue->cs_timestamp)
|
||||
if (!packet_queue->next_cs_timestamp)
|
||||
return OCSD_RESP_CONT;
|
||||
|
||||
packet_queue->cs_timestamp = packet_queue->next_cs_timestamp;
|
||||
estimated_ts = packet_queue->cs_timestamp +
|
||||
cs_etm_decoder__dec_instr_count_to_ns(&packet_queue->instr_count);
|
||||
|
||||
/* Estimate the timestamp for the next range packet */
|
||||
packet_queue->next_cs_timestamp += packet_queue->instr_count;
|
||||
packet_queue->instr_count = 0;
|
||||
/* Estimated TS can never be higher than the next real one in the trace */
|
||||
packet_queue->cs_timestamp = min(packet_queue->next_cs_timestamp, estimated_ts);
|
||||
|
||||
/* Tell the front end which traceid_queue needs attention */
|
||||
cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
|
||||
@ -283,24 +308,37 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
|
||||
const ocsd_trc_index_t indx)
|
||||
{
|
||||
struct cs_etm_packet_queue *packet_queue;
|
||||
u64 converted_timestamp;
|
||||
u64 estimated_first_ts;
|
||||
|
||||
/* First get the packet queue for this traceID */
|
||||
packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
|
||||
if (!packet_queue)
|
||||
return OCSD_RESP_FATAL_SYS_ERR;
|
||||
|
||||
/*
|
||||
* Coresight timestamps are raw timer values which need to be scaled to ns. Assume
|
||||
* 0 is a bad value so don't try to convert it.
|
||||
*/
|
||||
converted_timestamp = elem->timestamp ?
|
||||
cs_etm__convert_sample_time(etmq, elem->timestamp) : 0;
|
||||
|
||||
/*
|
||||
* We've seen a timestamp packet before - simply record the new value.
|
||||
* Function do_soft_timestamp() will report the value to the front end,
|
||||
* hence asking the decoder to keep decoding rather than stopping.
|
||||
*/
|
||||
if (packet_queue->cs_timestamp) {
|
||||
packet_queue->next_cs_timestamp = elem->timestamp;
|
||||
if (packet_queue->next_cs_timestamp) {
|
||||
/*
|
||||
* What was next is now where new ranges start from, overwriting
|
||||
* any previous estimate in cs_timestamp
|
||||
*/
|
||||
packet_queue->cs_timestamp = packet_queue->next_cs_timestamp;
|
||||
packet_queue->next_cs_timestamp = converted_timestamp;
|
||||
return OCSD_RESP_CONT;
|
||||
}
|
||||
|
||||
|
||||
if (!elem->timestamp) {
|
||||
if (!converted_timestamp) {
|
||||
/*
|
||||
* Zero timestamps can be seen due to misconfiguration or hardware bugs.
|
||||
* Warn once, and don't try to subtract instr_count as it would result in an
|
||||
@ -312,7 +350,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
|
||||
". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n",
|
||||
indx);
|
||||
|
||||
} else if (packet_queue->instr_count > elem->timestamp) {
|
||||
} else if (packet_queue->instr_count / INSTR_PER_NS > converted_timestamp) {
|
||||
/*
|
||||
* Sanity check that the elem->timestamp - packet_queue->instr_count would not
|
||||
* result in an underflow. Warn and clamp at 0 if it would.
|
||||
@ -325,11 +363,14 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
|
||||
* or a discontinuity. Since timestamps packets are generated *after*
|
||||
* range packets have been generated, we need to estimate the time at
|
||||
* which instructions started by subtracting the number of instructions
|
||||
* executed to the timestamp.
|
||||
* executed to the timestamp. Don't estimate earlier than the last used
|
||||
* timestamp though.
|
||||
*/
|
||||
packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
|
||||
estimated_first_ts = converted_timestamp -
|
||||
(packet_queue->instr_count / INSTR_PER_NS);
|
||||
packet_queue->cs_timestamp = max(packet_queue->cs_timestamp, estimated_first_ts);
|
||||
}
|
||||
packet_queue->next_cs_timestamp = elem->timestamp;
|
||||
packet_queue->next_cs_timestamp = converted_timestamp;
|
||||
packet_queue->instr_count = 0;
|
||||
|
||||
/* Tell the front end which traceid_queue needs attention */
|
||||
@ -342,7 +383,6 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
|
||||
static void
|
||||
cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
|
||||
{
|
||||
packet_queue->cs_timestamp = 0;
|
||||
packet_queue->next_cs_timestamp = 0;
|
||||
packet_queue->instr_count = 0;
|
||||
}
|
||||
@ -604,6 +644,9 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
|
||||
case OCSD_GEN_TRC_ELEM_CUSTOM:
|
||||
case OCSD_GEN_TRC_ELEM_SYNC_MARKER:
|
||||
case OCSD_GEN_TRC_ELEM_MEMTRANS:
|
||||
#if (OCSD_VER_NUM >= 0x010400)
|
||||
case OCSD_GEN_TRC_ELEM_INSTRUMENTATION:
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "tool.h"
|
||||
#include "thread.h"
|
||||
#include "thread-stack.h"
|
||||
#include "tsc.h"
|
||||
#include <tools/libc_compat.h>
|
||||
#include "util/synthetic-events.h"
|
||||
|
||||
@ -46,10 +47,12 @@ struct cs_etm_auxtrace {
|
||||
struct perf_session *session;
|
||||
struct machine *machine;
|
||||
struct thread *unknown_thread;
|
||||
struct perf_tsc_conversion tc;
|
||||
|
||||
u8 timeless_decoding;
|
||||
u8 snapshot_mode;
|
||||
u8 data_queued;
|
||||
u8 has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
|
||||
|
||||
int num_cpu;
|
||||
u64 latest_kernel_timestamp;
|
||||
@ -464,12 +467,12 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
|
||||
u64 **metadata = etm->metadata;
|
||||
|
||||
t_params[idx].protocol = CS_ETM_PROTO_ETE;
|
||||
t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
|
||||
t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
|
||||
t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
|
||||
t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
|
||||
t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
|
||||
t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
|
||||
t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0];
|
||||
t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1];
|
||||
t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2];
|
||||
t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8];
|
||||
t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR];
|
||||
t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR];
|
||||
t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH];
|
||||
}
|
||||
|
||||
@ -1161,6 +1164,30 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
|
||||
sample->insn_len, (void *)sample->insn);
|
||||
}
|
||||
|
||||
u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
|
||||
{
|
||||
struct cs_etm_auxtrace *etm = etmq->etm;
|
||||
|
||||
if (etm->has_virtual_ts)
|
||||
return tsc_to_perf_time(cs_timestamp, &etm->tc);
|
||||
else
|
||||
return cs_timestamp;
|
||||
}
|
||||
|
||||
static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
|
||||
struct cs_etm_traceid_queue *tidq)
|
||||
{
|
||||
struct cs_etm_auxtrace *etm = etmq->etm;
|
||||
struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
|
||||
|
||||
if (etm->timeless_decoding)
|
||||
return 0;
|
||||
else if (etm->has_virtual_ts)
|
||||
return packet_queue->cs_timestamp;
|
||||
else
|
||||
return etm->latest_kernel_timestamp;
|
||||
}
|
||||
|
||||
static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
|
||||
struct cs_etm_traceid_queue *tidq,
|
||||
u64 addr, u64 period)
|
||||
@ -1174,8 +1201,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
|
||||
event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
|
||||
event->sample.header.size = sizeof(struct perf_event_header);
|
||||
|
||||
if (!etm->timeless_decoding)
|
||||
sample.time = etm->latest_kernel_timestamp;
|
||||
/* Set time field based on etm auxtrace config. */
|
||||
sample.time = cs_etm__resolve_sample_time(etmq, tidq);
|
||||
|
||||
sample.ip = addr;
|
||||
sample.pid = tidq->pid;
|
||||
sample.tid = tidq->tid;
|
||||
@ -1232,8 +1260,9 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
|
||||
event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
|
||||
event->sample.header.size = sizeof(struct perf_event_header);
|
||||
|
||||
if (!etm->timeless_decoding)
|
||||
sample.time = etm->latest_kernel_timestamp;
|
||||
/* Set time field based on etm auxtrace config. */
|
||||
sample.time = cs_etm__resolve_sample_time(etmq, tidq);
|
||||
|
||||
sample.ip = ip;
|
||||
sample.pid = tidq->pid;
|
||||
sample.tid = tidq->tid;
|
||||
@ -2746,12 +2775,42 @@ static int cs_etm__queue_aux_records(struct perf_session *session)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
|
||||
(CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
|
||||
|
||||
/*
|
||||
* Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
|
||||
* timestamps).
|
||||
*/
|
||||
static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
|
||||
{
|
||||
int j;
|
||||
|
||||
for (j = 0; j < num_cpu; j++) {
|
||||
switch (metadata[j][CS_ETM_MAGIC]) {
|
||||
case __perf_cs_etmv4_magic:
|
||||
if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
|
||||
return false;
|
||||
break;
|
||||
case __perf_cs_ete_magic:
|
||||
if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
/* Unknown / unsupported magic number. */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int cs_etm__process_auxtrace_info_full(union perf_event *event,
|
||||
struct perf_session *session)
|
||||
{
|
||||
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
|
||||
struct cs_etm_auxtrace *etm = NULL;
|
||||
struct int_node *inode;
|
||||
struct perf_record_time_conv *tc = &session->time_conv;
|
||||
int event_header_size = sizeof(struct perf_event_header);
|
||||
int total_size = auxtrace_info->header.size;
|
||||
int priv_size = 0;
|
||||
@ -2886,6 +2945,13 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
|
||||
etm->auxtrace_type = auxtrace_info->type;
|
||||
etm->timeless_decoding = cs_etm__is_timeless_decoding(etm);
|
||||
|
||||
/* Use virtual timestamps if all ETMs report ts_source = 1 */
|
||||
etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
|
||||
|
||||
if (!etm->has_virtual_ts)
|
||||
ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
|
||||
"The time field of the samples will not be set accurately.\n\n");
|
||||
|
||||
etm->auxtrace.process_event = cs_etm__process_event;
|
||||
etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
|
||||
etm->auxtrace.flush_events = cs_etm__flush_events;
|
||||
@ -2915,6 +2981,15 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
|
||||
goto err_delete_thread;
|
||||
}
|
||||
|
||||
etm->tc.time_shift = tc->time_shift;
|
||||
etm->tc.time_mult = tc->time_mult;
|
||||
etm->tc.time_zero = tc->time_zero;
|
||||
if (event_contains(*tc, time_cycles)) {
|
||||
etm->tc.time_cycles = tc->time_cycles;
|
||||
etm->tc.time_mask = tc->time_mask;
|
||||
etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
|
||||
etm->tc.cap_user_time_short = tc->cap_user_time_short;
|
||||
}
|
||||
err = cs_etm__synth_events(etm, session);
|
||||
if (err)
|
||||
goto err_delete_thread;
|
||||
|
@ -71,6 +71,7 @@ enum {
|
||||
CS_ETMV4_TRCIDR2,
|
||||
CS_ETMV4_TRCIDR8,
|
||||
CS_ETMV4_TRCAUTHSTATUS,
|
||||
CS_ETMV4_TS_SOURCE,
|
||||
CS_ETMV4_PRIV_MAX,
|
||||
};
|
||||
|
||||
@ -82,7 +83,17 @@ enum {
|
||||
* added in header V1
|
||||
*/
|
||||
enum {
|
||||
CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX,
|
||||
/* Dynamic, configurable parameters */
|
||||
CS_ETE_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1,
|
||||
CS_ETE_TRCTRACEIDR,
|
||||
/* RO, taken from sysFS */
|
||||
CS_ETE_TRCIDR0,
|
||||
CS_ETE_TRCIDR1,
|
||||
CS_ETE_TRCIDR2,
|
||||
CS_ETE_TRCIDR8,
|
||||
CS_ETE_TRCAUTHSTATUS,
|
||||
CS_ETE_TRCDEVARCH,
|
||||
CS_ETE_TS_SOURCE,
|
||||
CS_ETE_PRIV_MAX
|
||||
};
|
||||
|
||||
@ -181,7 +192,7 @@ struct cs_etm_packet_queue {
|
||||
u32 head;
|
||||
u32 tail;
|
||||
u32 instr_count;
|
||||
u64 cs_timestamp;
|
||||
u64 cs_timestamp; /* Timestamp from trace data, converted to ns if possible */
|
||||
u64 next_cs_timestamp;
|
||||
struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
|
||||
};
|
||||
@ -220,6 +231,7 @@ struct cs_etm_packet_queue
|
||||
*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id);
|
||||
int cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused,
|
||||
struct perf_session *session __maybe_unused);
|
||||
u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp);
|
||||
#else
|
||||
static inline int
|
||||
cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused,
|
||||
|
@ -322,10 +322,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
|
||||
offset = tmp_val;
|
||||
len = offset >> 16;
|
||||
offset &= 0xffff;
|
||||
#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
|
||||
if (flags & TEP_FIELD_IS_RELATIVE)
|
||||
if (tep_field_is_relative(flags))
|
||||
offset += fmtf->offset + fmtf->size;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (flags & TEP_FIELD_IS_ARRAY) {
|
||||
|
@ -19,12 +19,19 @@
|
||||
#include "debug.h"
|
||||
#include "print_binary.h"
|
||||
#include "target.h"
|
||||
#include "trace-event.h"
|
||||
#include "ui/helpline.h"
|
||||
#include "ui/ui.h"
|
||||
#include "util/parse-sublevel-options.h"
|
||||
|
||||
#include <linux/ctype.h>
|
||||
|
||||
#ifdef HAVE_LIBTRACEEVENT
|
||||
#include <traceevent/event-parse.h>
|
||||
#else
|
||||
#define LIBTRACEEVENT_VERSION 0
|
||||
#endif
|
||||
|
||||
int verbose;
|
||||
int debug_peo_args;
|
||||
bool dump_trace = false, quiet = false;
|
||||
@ -228,6 +235,14 @@ int perf_debug_option(const char *str)
|
||||
/* Allow only verbose value in range (0, 10), otherwise set 0. */
|
||||
verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose;
|
||||
|
||||
#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 3, 0)
|
||||
if (verbose == 1)
|
||||
tep_set_loglevel(TEP_LOG_INFO);
|
||||
else if (verbose == 2)
|
||||
tep_set_loglevel(TEP_LOG_DEBUG);
|
||||
else if (verbose >= 3)
|
||||
tep_set_loglevel(TEP_LOG_ALL);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -285,8 +285,6 @@ void evsel__init(struct evsel *evsel,
|
||||
evsel->sample_size = __evsel__sample_size(attr->sample_type);
|
||||
evsel__calc_id_pos(evsel);
|
||||
evsel->cmdline_group_boundary = false;
|
||||
evsel->metric_expr = NULL;
|
||||
evsel->metric_name = NULL;
|
||||
evsel->metric_events = NULL;
|
||||
evsel->per_pkg_mask = NULL;
|
||||
evsel->collect_stat = false;
|
||||
@ -2319,7 +2317,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
|
||||
* abort:1 //transaction abort
|
||||
* cycles:16 //cycle count to last branch
|
||||
* type:4 //branch type
|
||||
* reserved:40
|
||||
* spec:2 //branch speculation info
|
||||
* new_type:4 //additional branch type
|
||||
* priv:3 //privilege level
|
||||
* reserved:31
|
||||
* }
|
||||
* }
|
||||
*
|
||||
@ -2335,7 +2336,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
|
||||
new_val |= bitfield_swap(value, 3, 1);
|
||||
new_val |= bitfield_swap(value, 4, 16);
|
||||
new_val |= bitfield_swap(value, 20, 4);
|
||||
new_val |= bitfield_swap(value, 24, 40);
|
||||
new_val |= bitfield_swap(value, 24, 2);
|
||||
new_val |= bitfield_swap(value, 26, 4);
|
||||
new_val |= bitfield_swap(value, 30, 3);
|
||||
new_val |= bitfield_swap(value, 33, 31);
|
||||
} else {
|
||||
new_val = bitfield_swap(value, 63, 1);
|
||||
new_val |= bitfield_swap(value, 62, 1);
|
||||
@ -2343,7 +2347,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
|
||||
new_val |= bitfield_swap(value, 60, 1);
|
||||
new_val |= bitfield_swap(value, 44, 16);
|
||||
new_val |= bitfield_swap(value, 40, 4);
|
||||
new_val |= bitfield_swap(value, 0, 40);
|
||||
new_val |= bitfield_swap(value, 38, 2);
|
||||
new_val |= bitfield_swap(value, 34, 4);
|
||||
new_val |= bitfield_swap(value, 31, 3);
|
||||
new_val |= bitfield_swap(value, 0, 31);
|
||||
}
|
||||
|
||||
return new_val;
|
||||
@ -2784,10 +2791,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char
|
||||
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
|
||||
offset = *(int *)(sample->raw_data + field->offset);
|
||||
offset &= 0xffff;
|
||||
#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
|
||||
if (field->flags & TEP_FIELD_IS_RELATIVE)
|
||||
if (tep_field_is_relative(field->flags))
|
||||
offset += field->offset + field->size;
|
||||
#endif
|
||||
}
|
||||
|
||||
return sample->raw_data + offset;
|
||||
|
@ -105,8 +105,6 @@ struct evsel {
|
||||
* metric fields are similar, but needs more care as they can have
|
||||
* references to other metric (evsel).
|
||||
*/
|
||||
const char * metric_expr;
|
||||
const char * metric_name;
|
||||
struct evsel **metric_events;
|
||||
struct evsel *metric_leader;
|
||||
|
||||
|
@ -22,9 +22,9 @@ bool evswitch__discard(struct evswitch *evswitch, struct evsel *evsel);
|
||||
|
||||
#define OPTS_EVSWITCH(evswitch) \
|
||||
OPT_STRING(0, "switch-on", &(evswitch)->on_name, \
|
||||
"event", "Consider events after the ocurrence of this event"), \
|
||||
"event", "Consider events after the occurrence of this event"), \
|
||||
OPT_STRING(0, "switch-off", &(evswitch)->off_name, \
|
||||
"event", "Stop considering events after the ocurrence of this event"), \
|
||||
"event", "Stop considering events after the occurrence of this event"), \
|
||||
OPT_BOOLEAN(0, "show-on-off-events", &(evswitch)->show_on_off_events, \
|
||||
"Show the on/off switch events, used with --switch-on and --switch-off")
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/zalloc.h>
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include "pmu.h"
|
||||
|
||||
#ifdef PARSER_DEBUG
|
||||
extern int expr_debug;
|
||||
@ -448,6 +449,10 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx
|
||||
result = topology->core_cpus_lists;
|
||||
goto out;
|
||||
}
|
||||
if (!strcmp("#slots", literal)) {
|
||||
result = perf_pmu__cpu_slots_per_cycle();
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_err("Unrecognized literal '%s'", literal);
|
||||
out:
|
||||
|
@ -9,6 +9,7 @@ struct expr_scanner_ctx {
|
||||
char *user_requested_cpu_list;
|
||||
int runtime;
|
||||
bool system_wide;
|
||||
bool is_test;
|
||||
};
|
||||
|
||||
struct expr_parse_ctx {
|
||||
|
@ -87,9 +87,11 @@ static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx)
|
||||
YYSTYPE *yylval = expr_get_lval(scanner);
|
||||
|
||||
yylval->num = expr__get_literal(expr_get_text(scanner), sctx);
|
||||
if (isnan(yylval->num))
|
||||
return EXPR_ERROR;
|
||||
|
||||
if (isnan(yylval->num)) {
|
||||
if (!sctx->is_test)
|
||||
return EXPR_ERROR;
|
||||
yylval->num = 1;
|
||||
}
|
||||
return LITERAL;
|
||||
}
|
||||
%}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user