mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-08 15:04:45 +00:00
ea15483e7c
Add 'simd' sort field to visualize SIMD ops in 'perf report'. Rows are labeled with the SIMD ISA, and the type of predicate (if any): - [p] partial predicate - [e] empty predicate (no elements in the vector being used) Example with Arm SPE and SVE (Scalable Vector Extension): #include <arm_sve.h> double src[1025], dst[1025]; int main(void) { svfloat64_t vc = svdup_f64(1); for(;;) for(int i = 0; i < 1025; i += svcntd()) { svbool_t pg = svwhilelt_b64(i, 1025); svfloat64_t vsrc = svld1(pg, &src[i]); svfloat64_t vdst = svadd_x(pg, vsrc, vc); svst1(pg, &dst[i], vdst); } return 0; } ... compiled using "gcc-11 -march=armv8-a+sve -O3" Profiling on a platform that implements FEAT_SVE and FEAT_SPEv1p1: $ perf record -e arm_spe_0// -- ./a.out $ perf report --itrace=i1i -s overhead,pid,simd,sym Overhead Pid:Command Simd Symbol ........ ................ ....... ...................... 53.76% 10758:program [.] main 46.14% 10758:program [.] SVE [.] main 0.09% 10758:program [p] SVE [.] main The report shows 0.09% of the sampled SVE operations use partial predicates due to src and dst arrays not being multiples of the vector register lengths. Signed-off-by: German Gomez <german.gomez@arm.com> Acked-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Anshuman.Khandual@arm.com Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.g.garry@oracle.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mike Leach <mike.leach@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230320151509.1137462-2-james.clark@arm.com Signed-off-by: James Clark <james.clark@arm.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> |
||
---|---|---|
.. | ||
android.txt | ||
arm-coresight.txt | ||
asciidoc.conf | ||
asciidoctor-extensions.rb | ||
build-docdep.perl | ||
build-xed.txt | ||
Build.txt | ||
callchain-overhead-calculation.txt | ||
cat-texi.perl | ||
db-export.txt | ||
examples.txt | ||
guest-files.txt | ||
guestmount.txt | ||
intel-bts.txt | ||
intel-hybrid.txt | ||
intel-pt.txt | ||
itrace.txt | ||
jit-interface.txt | ||
jitdump-specification.txt | ||
Makefile | ||
manpage-1.72.xsl | ||
manpage-base.xsl | ||
manpage-bold-literal.xsl | ||
manpage-normal.xsl | ||
manpage-suppress-sp.xsl | ||
perf-annotate.txt | ||
perf-archive.txt | ||
perf-arm-spe.txt | ||
perf-bench.txt | ||
perf-buildid-cache.txt | ||
perf-buildid-list.txt | ||
perf-c2c.txt | ||
perf-config.txt | ||
perf-daemon.txt | ||
perf-data.txt | ||
perf-diff.txt | ||
perf-dlfilter.txt | ||
perf-evlist.txt | ||
perf-ftrace.txt | ||
perf-help.txt | ||
perf-inject.txt | ||
perf-intel-pt.txt | ||
perf-iostat.txt | ||
perf-kallsyms.txt | ||
perf-kmem.txt | ||
perf-kvm.txt | ||
perf-kwork.txt | ||
perf-list.txt | ||
perf-lock.txt | ||
perf-mem.txt | ||
perf-probe.txt | ||
perf-record.txt | ||
perf-report.txt | ||
perf-sched.txt | ||
perf-script-perl.txt | ||
perf-script-python.txt | ||
perf-script.txt | ||
perf-stat.txt | ||
perf-test.txt | ||
perf-timechart.txt | ||
perf-top.txt | ||
perf-trace.txt | ||
perf-version.txt | ||
perf.data-directory-format.txt | ||
perf.data-file-format.txt | ||
perf.txt | ||
perfconfig.example | ||
security.txt | ||
tips.txt | ||
topdown.txt |