2009-04-20 15:37:32 +02:00
|
|
|
/*
|
2009-06-02 23:37:05 +02:00
|
|
|
* builtin-stat.c
|
|
|
|
*
|
|
|
|
* Builtin stat command: Give a precise performance counters summary
|
|
|
|
* overview about any workload, CPU or specific PID.
|
|
|
|
*
|
|
|
|
* Sample output:
|
2009-04-20 15:37:32 +02:00
|
|
|
|
2009-06-02 23:37:05 +02:00
|
|
|
$ perf stat ~/hackbench 10
|
|
|
|
Time: 0.104
|
2009-04-20 15:37:32 +02:00
|
|
|
|
2009-06-02 23:37:05 +02:00
|
|
|
Performance counter stats for '/home/mingo/hackbench':
|
2009-04-20 15:37:32 +02:00
|
|
|
|
2009-06-02 23:37:05 +02:00
|
|
|
1255.538611 task clock ticks # 10.143 CPU utilization factor
|
|
|
|
54011 context switches # 0.043 M/sec
|
|
|
|
385 CPU migrations # 0.000 M/sec
|
|
|
|
17755 pagefaults # 0.014 M/sec
|
|
|
|
3808323185 CPU cycles # 3033.219 M/sec
|
|
|
|
1575111190 instructions # 1254.530 M/sec
|
|
|
|
17367895 cache references # 13.833 M/sec
|
|
|
|
7674421 cache misses # 6.112 M/sec
|
2009-04-20 15:37:32 +02:00
|
|
|
|
2009-06-02 23:37:05 +02:00
|
|
|
Wall-clock time elapsed: 123.786620 msecs
|
2009-04-20 15:37:32 +02:00
|
|
|
|
2009-05-26 09:17:18 +02:00
|
|
|
*
|
|
|
|
* Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
|
|
|
|
*
|
|
|
|
* Improvements and fixes by:
|
|
|
|
*
|
|
|
|
* Arjan van de Ven <arjan@linux.intel.com>
|
|
|
|
* Yanmin Zhang <yanmin.zhang@intel.com>
|
|
|
|
* Wu Fengguang <fengguang.wu@intel.com>
|
|
|
|
* Mike Galbraith <efault@gmx.de>
|
|
|
|
* Paul Mackerras <paulus@samba.org>
|
2009-06-27 03:02:07 +05:30
|
|
|
* Jaswinder Singh Rajput <jaswinder@kernel.org>
|
2009-05-26 09:17:18 +02:00
|
|
|
*
|
|
|
|
* Released under the GPL v2. (and only v2, not any later version)
|
2009-04-20 15:37:32 +02:00
|
|
|
*/
|
|
|
|
|
2009-05-23 18:28:58 +02:00
|
|
|
#include "perf.h"
|
2009-05-27 09:10:38 +02:00
|
|
|
#include "builtin.h"
|
2009-04-27 08:02:14 +02:00
|
|
|
#include "util/util.h"
|
2009-05-26 09:17:18 +02:00
|
|
|
#include "util/parse-options.h"
|
|
|
|
#include "util/parse-events.h"
|
2009-08-16 22:05:48 +02:00
|
|
|
#include "util/event.h"
|
2011-01-03 16:39:04 -02:00
|
|
|
#include "util/evsel.h"
|
2009-08-16 22:05:48 +02:00
|
|
|
#include "util/debug.h"
|
2009-12-31 16:05:50 +08:00
|
|
|
#include "util/header.h"
|
perf tools: Fix sparse CPU numbering related bugs
At present, the perf subcommands that do system-wide monitoring
(perf stat, perf record and perf top) don't work properly unless
the online cpus are numbered 0, 1, ..., N-1. These tools ask
for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN)
and then try to create events for cpus 0, 1, ..., N-1.
This creates problems for systems where the online cpus are
numbered sparsely. For example, a POWER6 system in
single-threaded mode (i.e. only running 1 hardware thread per
core) will have only even-numbered cpus online.
This fixes the problem by reading the /sys/devices/system/cpu/online
file to find out which cpus are online. The code that does that is in
tools/perf/util/cpumap.[ch], and consists of a read_cpu_map()
function that sets up a cpumap[] array and returns the number of
online cpus. If /sys/devices/system/cpu/online can't be read or
can't be parsed successfully, it falls back to using sysconf to
ask how many cpus are online and sets up an identity map in cpumap[].
The perf record, perf stat and perf top code then calls
read_cpu_map() in the system-wide monitoring case (instead of
sysconf) and uses cpumap[] to get the cpu numbers to pass to
perf_event_open.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-10 20:36:09 +11:00
|
|
|
#include "util/cpumap.h"
|
2010-03-18 11:36:05 -03:00
|
|
|
#include "util/thread.h"
|
2009-04-20 15:37:32 +02:00
|
|
|
|
|
|
|
#include <sys/prctl.h>
|
2009-06-13 14:57:28 +02:00
|
|
|
#include <math.h>
|
perf stat: add perf stat -B to pretty print large numbers
It is hard to read very large numbers so provide an option to perf stat
to separate thousands using a separator. The patch leverages the locale
support of stdio. You need to set your LC_NUMERIC appropriately, for
instance LC_NUMERIC=en_US.UTF8. You need to pass -B to activate this
feature. This way existing scripts parsing the output do not need to be
changed. Here is an example.
$ perf stat noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.347031 task-clock-msecs # 0.998 CPUs
61 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
118 page-faults # 0.000 M/sec
4,138,410,900 cycles # 2070.917 M/sec (scaled from 70.01%)
2,062,650,268 instructions # 0.498 IPC (scaled from 70.01%)
2,057,653,466 branches # 1029.678 M/sec (scaled from 70.01%)
40,267 branch-misses # 0.002 % (scaled from 30.04%)
2,055,961,348 cache-references # 1028.831 M/sec (scaled from 30.03%)
53,725 cache-misses # 0.027 M/sec (scaled from 30.02%)
2.001393933 seconds time elapsed
$ perf stat -B noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.297883 task-clock-msecs # 0.998 CPUs
59 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
119 page-faults # 0.000 M/sec
4,131,380,160 cycles # 2067.450 M/sec (scaled from 70.01%)
2,059,096,507 instructions # 0.498 IPC (scaled from 70.01%)
2,054,681,303 branches # 1028.216 M/sec (scaled from 70.01%)
25,650 branch-misses # 0.001 % (scaled from 30.05%)
2,056,283,014 cache-references # 1029.017 M/sec (scaled from 30.03%)
47,097 cache-misses # 0.024 M/sec (scaled from 30.02%)
2.001391016 seconds time elapsed
Cc: David S. Miller <davem@davemloft.net>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4bf28fe8.914ed80a.01ca.fffff5f5@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-05-18 15:00:01 +02:00
|
|
|
#include <locale.h>
|
2009-05-05 17:50:27 +02:00
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
#define DEFAULT_SEPARATOR " "
|
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 12:02:48 +02:00
|
|
|
static struct perf_event_attr default_attrs[] = {
|
2009-04-20 15:37:32 +02:00
|
|
|
|
perf stat: Re-align the default_attrs[] array
Clean up the array definition to be vertically aligned.
No functional effects.
Cc: Tim Blechmann <tim@klingt.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4ADC3975.8050109@klingt.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
tools/perf/builtin-stat.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c373683..95a55ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
};
2009-10-19 13:27:08 +02:00
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
|
|
|
|
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
|
2009-06-11 14:06:28 +02:00
|
|
|
|
perf stat: Re-align the default_attrs[] array
Clean up the array definition to be vertically aligned.
No functional effects.
Cc: Tim Blechmann <tim@klingt.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4ADC3975.8050109@klingt.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
tools/perf/builtin-stat.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c373683..95a55ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
};
2009-10-19 13:27:08 +02:00
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
|
perf stat: Count branches first
Count branches first, cache-misses second. The reason is that
on x86 branches are not counted by all counters on all CPUs.
Before:
Performance counter stats for 'ls':
0.756653 task-clock-msecs # 0.802 CPUs
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
250 page-faults # 0.330 M/sec
2375725 cycles # 3139.781 M/sec
1628129 instructions # 0.685 IPC
19643 cache-references # 25.960 M/sec
4608 cache-misses # 6.090 M/sec
342532 branches # 452.694 M/sec
<not counted> branch-misses
0.000943356 seconds time elapsed
After:
Performance counter stats for 'ls':
1.056734 task-clock-msecs # 0.859 CPUs
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
259 page-faults # 0.245 M/sec
3345932 cycles # 3166.295 M/sec
3074090 instructions # 0.919 IPC
616928 branches # 583.806 M/sec
39279 branch-misses # 6.367 %
21312 cache-references # 20.168 M/sec
3661 cache-misses # 3.464 M/sec
0.001230551 seconds time elapsed
(also prettify the printout of branch misses, in case it's
getting scaled.)
Cc: Tim Blechmann <tim@klingt.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4ADC3975.8050109@klingt.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
tools/perf/builtin-stat.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c373683..95a55ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
};
---
tools/perf/builtin-stat.c | 20 ++++++++++----------
1 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 95a55ea..90e0a26 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -50,17 +50,17 @@
static struct perf_event_attr default_attrs[] = {
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
-
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
+
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
};
2009-10-19 13:33:03 +02:00
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES },
|
|
|
|
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
|
2009-06-11 14:06:28 +02:00
|
|
|
|
2009-04-20 15:37:32 +02:00
|
|
|
};
|
2009-05-26 09:17:18 +02:00
|
|
|
|
2010-04-13 18:37:33 +10:00
|
|
|
static bool system_wide = false;
|
2010-05-28 12:00:01 +02:00
|
|
|
static int nr_cpus = 0;
|
2009-06-24 18:19:34 +05:30
|
|
|
static int run_idx = 0;
|
2009-04-20 15:37:32 +02:00
|
|
|
|
2009-06-24 18:19:34 +05:30
|
|
|
static int run_count = 1;
|
2010-05-12 10:40:01 +02:00
|
|
|
static bool no_inherit = false;
|
2010-04-13 18:37:33 +10:00
|
|
|
static bool scale = true;
|
2010-11-16 11:05:01 +02:00
|
|
|
static bool no_aggr = false;
|
2009-10-04 01:35:01 +01:00
|
|
|
static pid_t target_pid = -1;
|
2010-03-18 11:36:05 -03:00
|
|
|
static pid_t target_tid = -1;
|
|
|
|
static pid_t *all_tids = NULL;
|
|
|
|
static int thread_num = 0;
|
2009-10-04 01:35:01 +01:00
|
|
|
static pid_t child_pid = -1;
|
2010-04-13 18:37:33 +10:00
|
|
|
static bool null_run = false;
|
2010-12-01 17:53:27 -02:00
|
|
|
static bool big_num = true;
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
static int big_num_opt = -1;
|
2010-05-28 12:00:01 +02:00
|
|
|
static const char *cpu_list;
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
static const char *csv_sep = NULL;
|
|
|
|
static bool csv_output = false;
|
perf stat: add perf stat -B to pretty print large numbers
It is hard to read very large numbers so provide an option to perf stat
to separate thousands using a separator. The patch leverages the locale
support of stdio. You need to set your LC_NUMERIC appropriately, for
instance LC_NUMERIC=en_US.UTF8. You need to pass -B to activate this
feature. This way existing scripts parsing the output do not need to be
changed. Here is an example.
$ perf stat noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.347031 task-clock-msecs # 0.998 CPUs
61 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
118 page-faults # 0.000 M/sec
4,138,410,900 cycles # 2070.917 M/sec (scaled from 70.01%)
2,062,650,268 instructions # 0.498 IPC (scaled from 70.01%)
2,057,653,466 branches # 1029.678 M/sec (scaled from 70.01%)
40,267 branch-misses # 0.002 % (scaled from 30.04%)
2,055,961,348 cache-references # 1028.831 M/sec (scaled from 30.03%)
53,725 cache-misses # 0.027 M/sec (scaled from 30.02%)
2.001393933 seconds time elapsed
$ perf stat -B noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.297883 task-clock-msecs # 0.998 CPUs
59 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
119 page-faults # 0.000 M/sec
4,131,380,160 cycles # 2067.450 M/sec (scaled from 70.01%)
2,059,096,507 instructions # 0.498 IPC (scaled from 70.01%)
2,054,681,303 branches # 1028.216 M/sec (scaled from 70.01%)
25,650 branch-misses # 0.001 % (scaled from 30.05%)
2,056,283,014 cache-references # 1029.017 M/sec (scaled from 30.03%)
47,097 cache-misses # 0.024 M/sec (scaled from 30.02%)
2.001391016 seconds time elapsed
Cc: David S. Miller <davem@davemloft.net>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4bf28fe8.914ed80a.01ca.fffff5f5@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-05-18 15:00:01 +02:00
|
|
|
|
2009-12-31 16:05:50 +08:00
|
|
|
static volatile int done = 0;
|
|
|
|
|
2009-09-04 15:36:12 +02:00
|
|
|
struct stats
|
|
|
|
{
|
2009-09-04 17:26:26 +02:00
|
|
|
double n, mean, M2;
|
2009-09-04 15:36:12 +02:00
|
|
|
};
|
2009-06-13 14:57:28 +02:00
|
|
|
|
2011-01-03 16:39:04 -02:00
|
|
|
struct perf_stat {
|
|
|
|
struct stats res_stats[3];
|
|
|
|
};
|
|
|
|
|
2011-01-03 17:45:52 -02:00
|
|
|
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
|
2011-01-03 16:39:04 -02:00
|
|
|
{
|
2011-01-03 17:45:52 -02:00
|
|
|
evsel->priv = zalloc(sizeof(struct perf_stat));
|
2011-01-03 16:39:04 -02:00
|
|
|
return evsel->priv == NULL ? -ENOMEM : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
|
|
|
|
{
|
|
|
|
free(evsel->priv);
|
|
|
|
evsel->priv = NULL;
|
|
|
|
}
|
|
|
|
|
2009-09-04 15:36:08 +02:00
|
|
|
static void update_stats(struct stats *stats, u64 val)
|
|
|
|
{
|
2009-09-04 17:26:26 +02:00
|
|
|
double delta;
|
2009-09-04 15:36:08 +02:00
|
|
|
|
2009-09-04 17:26:26 +02:00
|
|
|
stats->n++;
|
|
|
|
delta = val - stats->mean;
|
|
|
|
stats->mean += delta / stats->n;
|
|
|
|
stats->M2 += delta*(val - stats->mean);
|
2009-09-04 15:36:08 +02:00
|
|
|
}
|
|
|
|
|
2009-09-04 15:36:12 +02:00
|
|
|
static double avg_stats(struct stats *stats)
|
|
|
|
{
|
2009-09-04 17:26:26 +02:00
|
|
|
return stats->mean;
|
2009-09-04 15:36:12 +02:00
|
|
|
}
|
2009-06-13 14:57:28 +02:00
|
|
|
|
2009-09-04 15:36:12 +02:00
|
|
|
/*
|
2009-09-04 17:03:13 +02:00
|
|
|
* http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
|
|
|
*
|
2009-09-04 17:26:26 +02:00
|
|
|
* (\Sum n_i^2) - ((\Sum n_i)^2)/n
|
|
|
|
* s^2 = -------------------------------
|
|
|
|
* n - 1
|
2009-09-04 17:03:13 +02:00
|
|
|
*
|
|
|
|
* http://en.wikipedia.org/wiki/Stddev
|
|
|
|
*
|
|
|
|
* The std dev of the mean is related to the std dev by:
|
|
|
|
*
|
|
|
|
* s
|
|
|
|
* s_mean = -------
|
|
|
|
* sqrt(n)
|
|
|
|
*
|
2009-09-04 15:36:12 +02:00
|
|
|
*/
|
|
|
|
static double stddev_stats(struct stats *stats)
|
|
|
|
{
|
2009-09-04 17:26:26 +02:00
|
|
|
double variance = stats->M2 / (stats->n - 1);
|
|
|
|
double variance_mean = variance / stats->n;
|
2009-06-13 14:57:28 +02:00
|
|
|
|
2009-09-04 17:03:13 +02:00
|
|
|
return sqrt(variance_mean);
|
2009-09-04 15:36:12 +02:00
|
|
|
}
|
2009-06-13 14:57:28 +02:00
|
|
|
|
2010-11-16 11:05:01 +02:00
|
|
|
struct stats runtime_nsecs_stats[MAX_NR_CPUS];
|
|
|
|
struct stats runtime_cycles_stats[MAX_NR_CPUS];
|
|
|
|
struct stats runtime_branches_stats[MAX_NR_CPUS];
|
2009-09-04 15:36:12 +02:00
|
|
|
struct stats walltime_nsecs_stats;
|
2009-05-29 09:10:54 +02:00
|
|
|
|
2011-01-03 17:48:12 -02:00
|
|
|
static int create_perf_stat_counter(struct perf_evsel *evsel)
|
2009-04-20 15:37:32 +02:00
|
|
|
{
|
2011-01-03 16:39:04 -02:00
|
|
|
struct perf_event_attr *attr = &evsel->attr;
|
2009-05-05 17:50:27 +02:00
|
|
|
|
2009-04-20 15:37:32 +02:00
|
|
|
if (scale)
|
2009-06-06 09:58:57 +02:00
|
|
|
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
|
|
|
|
PERF_FORMAT_TOTAL_TIME_RUNNING;
|
2009-04-20 15:37:32 +02:00
|
|
|
|
2011-01-03 17:48:12 -02:00
|
|
|
if (system_wide)
|
|
|
|
return perf_evsel__open_per_cpu(evsel, nr_cpus, cpumap);
|
|
|
|
|
|
|
|
attr->inherit = !no_inherit;
|
|
|
|
if (target_pid == -1 && target_tid == -1) {
|
|
|
|
attr->disabled = 1;
|
|
|
|
attr->enable_on_exec = 1;
|
2009-04-20 15:37:32 +02:00
|
|
|
}
|
2010-03-22 13:10:28 -03:00
|
|
|
|
2011-01-03 17:48:12 -02:00
|
|
|
return perf_evsel__open_per_thread(evsel, thread_num, all_tids);
|
2009-04-20 15:37:32 +02:00
|
|
|
}
|
|
|
|
|
2009-05-29 09:10:54 +02:00
|
|
|
/*
|
|
|
|
* Does the counter have nsecs as a unit?
|
|
|
|
*/
|
2011-01-03 16:49:44 -02:00
|
|
|
static inline int nsec_counter(struct perf_evsel *evsel)
|
2009-05-29 09:10:54 +02:00
|
|
|
{
|
2011-01-03 16:49:44 -02:00
|
|
|
if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
|
|
|
|
perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
|
2009-05-29 09:10:54 +02:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-05-29 09:10:54 +02:00
|
|
|
* Read out the results of a single counter:
|
2010-11-16 11:05:01 +02:00
|
|
|
* aggregate counts across CPUs in system-wide mode
|
2009-05-29 09:10:54 +02:00
|
|
|
*/
|
2011-01-03 17:45:52 -02:00
|
|
|
static int read_counter_aggr(struct perf_evsel *counter)
|
2009-05-29 09:10:54 +02:00
|
|
|
{
|
2011-01-03 16:39:04 -02:00
|
|
|
struct perf_stat *ps = counter->priv;
|
2011-01-03 17:45:52 -02:00
|
|
|
u64 *count = counter->counts->aggr.values;
|
|
|
|
int i;
|
2009-05-29 09:10:54 +02:00
|
|
|
|
2011-01-03 17:45:52 -02:00
|
|
|
if (__perf_evsel__read(counter, nr_cpus, thread_num, scale) < 0)
|
|
|
|
return -1;
|
2009-09-04 15:36:08 +02:00
|
|
|
|
|
|
|
for (i = 0; i < 3; i++)
|
2011-01-03 16:39:04 -02:00
|
|
|
update_stats(&ps->res_stats[i], count[i]);
|
2009-09-04 15:36:08 +02:00
|
|
|
|
|
|
|
if (verbose) {
|
|
|
|
fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter),
|
|
|
|
count[0], count[1], count[2]);
|
|
|
|
}
|
|
|
|
|
2009-05-29 09:10:54 +02:00
|
|
|
/*
|
|
|
|
* Save the full runtime - to allow normalization during printout:
|
|
|
|
*/
|
2011-01-03 16:49:44 -02:00
|
|
|
if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
|
2010-11-16 11:05:01 +02:00
|
|
|
update_stats(&runtime_nsecs_stats[0], count[0]);
|
2011-01-03 16:49:44 -02:00
|
|
|
if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
|
2010-11-16 11:05:01 +02:00
|
|
|
update_stats(&runtime_cycles_stats[0], count[0]);
|
2011-01-03 16:49:44 -02:00
|
|
|
if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
|
2010-11-16 11:05:01 +02:00
|
|
|
update_stats(&runtime_branches_stats[0], count[0]);
|
2011-01-03 17:45:52 -02:00
|
|
|
|
|
|
|
return 0;
|
2010-11-16 11:05:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read out the results of a single counter:
|
|
|
|
* do not aggregate counts across CPUs in system-wide mode
|
|
|
|
*/
|
2011-01-03 17:45:52 -02:00
|
|
|
static int read_counter(struct perf_evsel *counter)
|
2010-11-16 11:05:01 +02:00
|
|
|
{
|
2011-01-03 17:45:52 -02:00
|
|
|
u64 *count;
|
2010-11-16 11:05:01 +02:00
|
|
|
int cpu;
|
|
|
|
|
|
|
|
for (cpu = 0; cpu < nr_cpus; cpu++) {
|
2011-01-03 17:45:52 -02:00
|
|
|
if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
|
|
|
|
return -1;
|
2010-11-16 11:05:01 +02:00
|
|
|
|
2011-01-03 17:45:52 -02:00
|
|
|
count = counter->counts->cpu[cpu].values;
|
2010-11-16 11:05:01 +02:00
|
|
|
|
2011-01-03 16:49:44 -02:00
|
|
|
if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
|
2010-11-16 11:05:01 +02:00
|
|
|
update_stats(&runtime_nsecs_stats[cpu], count[0]);
|
2011-01-03 16:49:44 -02:00
|
|
|
if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
|
2010-11-16 11:05:01 +02:00
|
|
|
update_stats(&runtime_cycles_stats[cpu], count[0]);
|
2011-01-03 16:49:44 -02:00
|
|
|
if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
|
2010-11-16 11:05:01 +02:00
|
|
|
update_stats(&runtime_branches_stats[cpu], count[0]);
|
|
|
|
}
|
2011-01-03 17:45:52 -02:00
|
|
|
|
|
|
|
return 0;
|
2009-05-29 09:10:54 +02:00
|
|
|
}
|
|
|
|
|
2009-07-01 12:37:06 +02:00
|
|
|
static int run_perf_stat(int argc __used, const char **argv)
|
2009-06-13 14:57:28 +02:00
|
|
|
{
|
|
|
|
unsigned long long t0, t1;
|
2011-01-03 16:39:04 -02:00
|
|
|
struct perf_evsel *counter;
|
2009-06-13 14:57:28 +02:00
|
|
|
int status = 0;
|
2009-06-29 21:13:21 +10:00
|
|
|
int child_ready_pipe[2], go_pipe[2];
|
2010-03-18 11:36:03 -03:00
|
|
|
const bool forks = (argc > 0);
|
2009-06-29 21:13:21 +10:00
|
|
|
char buf;
|
2009-06-13 14:57:28 +02:00
|
|
|
|
|
|
|
if (!system_wide)
|
|
|
|
nr_cpus = 1;
|
|
|
|
|
2009-12-31 16:05:50 +08:00
|
|
|
if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
|
2009-06-29 21:13:21 +10:00
|
|
|
perror("failed to create pipes");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2009-12-31 16:05:50 +08:00
|
|
|
if (forks) {
|
2010-03-18 11:36:03 -03:00
|
|
|
if ((child_pid = fork()) < 0)
|
2009-12-31 16:05:50 +08:00
|
|
|
perror("failed to fork");
|
|
|
|
|
2010-03-18 11:36:03 -03:00
|
|
|
if (!child_pid) {
|
2009-12-31 16:05:50 +08:00
|
|
|
close(child_ready_pipe[0]);
|
|
|
|
close(go_pipe[1]);
|
|
|
|
fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do a dummy execvp to get the PLT entry resolved,
|
|
|
|
* so we avoid the resolver overhead on the real
|
|
|
|
* execvp call.
|
|
|
|
*/
|
|
|
|
execvp("", (char **)argv);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tell the parent we're ready to go
|
|
|
|
*/
|
|
|
|
close(child_ready_pipe[1]);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait until the parent tells us to go.
|
|
|
|
*/
|
|
|
|
if (read(go_pipe[0], &buf, 1) == -1)
|
|
|
|
perror("unable to read pipe");
|
|
|
|
|
|
|
|
execvp(argv[0], (char **)argv);
|
|
|
|
|
|
|
|
perror(argv[0]);
|
|
|
|
exit(-1);
|
|
|
|
}
|
2009-06-29 21:13:21 +10:00
|
|
|
|
2010-03-18 11:36:05 -03:00
|
|
|
if (target_tid == -1 && target_pid == -1 && !system_wide)
|
|
|
|
all_tids[0] = child_pid;
|
|
|
|
|
2009-06-29 21:13:21 +10:00
|
|
|
/*
|
2009-12-31 16:05:50 +08:00
|
|
|
* Wait for the child to be ready to exec.
|
2009-06-29 21:13:21 +10:00
|
|
|
*/
|
|
|
|
close(child_ready_pipe[1]);
|
2009-12-31 16:05:50 +08:00
|
|
|
close(go_pipe[0]);
|
|
|
|
if (read(child_ready_pipe[0], &buf, 1) == -1)
|
2009-07-01 21:02:10 +02:00
|
|
|
perror("unable to read pipe");
|
2009-12-31 16:05:50 +08:00
|
|
|
close(child_ready_pipe[0]);
|
2009-06-29 21:13:21 +10:00
|
|
|
}
|
|
|
|
|
2011-01-03 17:48:12 -02:00
|
|
|
list_for_each_entry(counter, &evsel_list, node) {
|
|
|
|
if (create_perf_stat_counter(counter) < 0) {
|
|
|
|
if (errno == -EPERM || errno == -EACCES) {
|
|
|
|
error("You may not have permission to collect %sstats.\n"
|
|
|
|
"\t Consider tweaking"
|
|
|
|
" /proc/sys/kernel/perf_event_paranoid or running as root.",
|
|
|
|
system_wide ? "system-wide " : "");
|
|
|
|
} else {
|
|
|
|
error("open_counter returned with %d (%s). "
|
|
|
|
"/bin/dmesg may provide additional information.\n",
|
|
|
|
errno, strerror(errno));
|
|
|
|
}
|
|
|
|
if (child_pid != -1)
|
|
|
|
kill(child_pid, SIGTERM);
|
|
|
|
die("Not all events could be opened.\n");
|
|
|
|
return -1;
|
|
|
|
}
|
2010-03-22 13:10:28 -03:00
|
|
|
}
|
2009-06-13 14:57:28 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Enable counters and exec the command:
|
|
|
|
*/
|
|
|
|
t0 = rdclock();
|
|
|
|
|
2009-12-31 16:05:50 +08:00
|
|
|
if (forks) {
|
|
|
|
close(go_pipe[1]);
|
|
|
|
wait(&status);
|
|
|
|
} else {
|
2010-03-18 11:36:03 -03:00
|
|
|
while(!done) sleep(1);
|
2009-12-31 16:05:50 +08:00
|
|
|
}
|
2009-06-13 14:57:28 +02:00
|
|
|
|
|
|
|
t1 = rdclock();
|
|
|
|
|
2009-09-04 15:36:08 +02:00
|
|
|
update_stats(&walltime_nsecs_stats, t1 - t0);
|
2009-06-13 14:57:28 +02:00
|
|
|
|
2010-11-16 11:05:01 +02:00
|
|
|
if (no_aggr) {
|
2011-01-03 17:45:52 -02:00
|
|
|
list_for_each_entry(counter, &evsel_list, node) {
|
2010-11-16 11:05:01 +02:00
|
|
|
read_counter(counter);
|
2011-01-03 17:45:52 -02:00
|
|
|
perf_evsel__close_fd(counter, nr_cpus, 1);
|
|
|
|
}
|
2010-11-16 11:05:01 +02:00
|
|
|
} else {
|
2011-01-03 17:45:52 -02:00
|
|
|
list_for_each_entry(counter, &evsel_list, node) {
|
2010-11-16 11:05:01 +02:00
|
|
|
read_counter_aggr(counter);
|
2011-01-03 17:45:52 -02:00
|
|
|
perf_evsel__close_fd(counter, nr_cpus, thread_num);
|
|
|
|
}
|
2010-11-16 11:05:01 +02:00
|
|
|
}
|
2011-01-03 17:45:52 -02:00
|
|
|
|
2009-06-13 14:57:28 +02:00
|
|
|
return WEXITSTATUS(status);
|
|
|
|
}
|
|
|
|
|
2011-01-03 16:39:04 -02:00
|
|
|
static void print_noise(struct perf_evsel *evsel, double avg)
|
2009-06-13 14:57:28 +02:00
|
|
|
{
|
2011-01-03 16:39:04 -02:00
|
|
|
struct perf_stat *ps;
|
|
|
|
|
2009-09-04 18:23:38 +02:00
|
|
|
if (run_count == 1)
|
|
|
|
return;
|
|
|
|
|
2011-01-03 16:39:04 -02:00
|
|
|
ps = evsel->priv;
|
2009-09-04 18:23:38 +02:00
|
|
|
fprintf(stderr, " ( +- %7.3f%% )",
|
2011-01-03 16:39:04 -02:00
|
|
|
100 * stddev_stats(&ps->res_stats[0]) / avg);
|
2009-06-13 14:57:28 +02:00
|
|
|
}
|
|
|
|
|
2011-01-03 16:49:44 -02:00
|
|
|
static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
|
2009-06-13 13:35:00 +02:00
|
|
|
{
|
2009-09-04 15:36:12 +02:00
|
|
|
double msecs = avg / 1e6;
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
char cpustr[16] = { '\0', };
|
|
|
|
const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
|
2009-06-13 13:35:00 +02:00
|
|
|
|
2010-11-16 11:05:01 +02:00
|
|
|
if (no_aggr)
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
sprintf(cpustr, "CPU%*d%s",
|
|
|
|
csv_output ? 0 : -4,
|
|
|
|
cpumap[cpu], csv_sep);
|
|
|
|
|
2011-01-03 16:49:44 -02:00
|
|
|
fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
|
|
|
|
if (csv_output)
|
|
|
|
return;
|
2009-06-13 13:35:00 +02:00
|
|
|
|
2011-01-03 16:49:44 -02:00
|
|
|
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
|
2009-09-04 15:36:12 +02:00
|
|
|
fprintf(stderr, " # %10.3f CPUs ",
|
|
|
|
avg / avg_stats(&walltime_nsecs_stats));
|
2009-06-13 13:35:00 +02:00
|
|
|
}
|
|
|
|
|
2011-01-03 16:49:44 -02:00
|
|
|
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
|
2009-06-13 13:35:00 +02:00
|
|
|
{
|
2009-09-22 14:53:51 +02:00
|
|
|
double total, ratio = 0.0;
|
2010-11-16 11:05:01 +02:00
|
|
|
char cpustr[16] = { '\0', };
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
const char *fmt;
|
|
|
|
|
|
|
|
if (csv_output)
|
|
|
|
fmt = "%s%.0f%s%s";
|
|
|
|
else if (big_num)
|
|
|
|
fmt = "%s%'18.0f%s%-24s";
|
|
|
|
else
|
|
|
|
fmt = "%s%18.0f%s%-24s";
|
2010-11-16 11:05:01 +02:00
|
|
|
|
|
|
|
if (no_aggr)
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
sprintf(cpustr, "CPU%*d%s",
|
|
|
|
csv_output ? 0 : -4,
|
|
|
|
cpumap[cpu], csv_sep);
|
2010-11-16 11:05:01 +02:00
|
|
|
else
|
|
|
|
cpu = 0;
|
2009-09-22 14:53:51 +02:00
|
|
|
|
2011-01-03 16:49:44 -02:00
|
|
|
fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
|
|
|
|
if (csv_output)
|
|
|
|
return;
|
2009-06-13 13:35:00 +02:00
|
|
|
|
2011-01-03 16:49:44 -02:00
|
|
|
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
|
2010-11-16 11:05:01 +02:00
|
|
|
total = avg_stats(&runtime_cycles_stats[cpu]);
|
2009-09-22 14:53:51 +02:00
|
|
|
|
|
|
|
if (total)
|
|
|
|
ratio = avg / total;
|
|
|
|
|
|
|
|
fprintf(stderr, " # %10.3f IPC ", ratio);
|
2011-01-03 16:49:44 -02:00
|
|
|
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
|
2010-11-16 11:05:01 +02:00
|
|
|
runtime_branches_stats[cpu].n != 0) {
|
|
|
|
total = avg_stats(&runtime_branches_stats[cpu]);
|
2009-10-18 22:29:23 +11:00
|
|
|
|
|
|
|
if (total)
|
|
|
|
ratio = avg * 100 / total;
|
|
|
|
|
perf stat: Count branches first
Count branches first, cache-misses second. The reason is that
on x86 branches are not counted by all counters on all CPUs.
Before:
Performance counter stats for 'ls':
0.756653 task-clock-msecs # 0.802 CPUs
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
250 page-faults # 0.330 M/sec
2375725 cycles # 3139.781 M/sec
1628129 instructions # 0.685 IPC
19643 cache-references # 25.960 M/sec
4608 cache-misses # 6.090 M/sec
342532 branches # 452.694 M/sec
<not counted> branch-misses
0.000943356 seconds time elapsed
After:
Performance counter stats for 'ls':
1.056734 task-clock-msecs # 0.859 CPUs
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
259 page-faults # 0.245 M/sec
3345932 cycles # 3166.295 M/sec
3074090 instructions # 0.919 IPC
616928 branches # 583.806 M/sec
39279 branch-misses # 6.367 %
21312 cache-references # 20.168 M/sec
3661 cache-misses # 3.464 M/sec
0.001230551 seconds time elapsed
(also prettify the printout of branch misses, in case it's
getting scaled.)
Cc: Tim Blechmann <tim@klingt.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4ADC3975.8050109@klingt.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
tools/perf/builtin-stat.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c373683..95a55ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
};
---
tools/perf/builtin-stat.c | 20 ++++++++++----------
1 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 95a55ea..90e0a26 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -50,17 +50,17 @@
static struct perf_event_attr default_attrs[] = {
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
-
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
+
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
};
2009-10-19 13:33:03 +02:00
|
|
|
fprintf(stderr, " # %10.3f %% ", ratio);
|
2009-10-18 22:29:23 +11:00
|
|
|
|
2010-11-16 11:05:01 +02:00
|
|
|
} else if (runtime_nsecs_stats[cpu].n != 0) {
|
|
|
|
total = avg_stats(&runtime_nsecs_stats[cpu]);
|
2009-09-22 14:53:51 +02:00
|
|
|
|
|
|
|
if (total)
|
|
|
|
ratio = 1000.0 * avg / total;
|
|
|
|
|
|
|
|
fprintf(stderr, " # %10.3f M/sec", ratio);
|
2009-06-13 13:35:00 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-29 09:10:54 +02:00
|
|
|
/*
|
|
|
|
* Print out the results of a single counter:
|
2010-11-16 11:05:01 +02:00
|
|
|
* aggregated counts in system-wide mode
|
2009-05-29 09:10:54 +02:00
|
|
|
*/
|
2011-01-03 16:39:04 -02:00
|
|
|
static void print_counter_aggr(struct perf_evsel *counter)
|
2009-05-29 09:10:54 +02:00
|
|
|
{
|
2011-01-03 16:39:04 -02:00
|
|
|
struct perf_stat *ps = counter->priv;
|
|
|
|
double avg = avg_stats(&ps->res_stats[0]);
|
2011-01-03 17:45:52 -02:00
|
|
|
int scaled = counter->counts->scaled;
|
2009-05-29 09:10:54 +02:00
|
|
|
|
|
|
|
if (scaled == -1) {
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
fprintf(stderr, "%*s%s%-24s\n",
|
|
|
|
csv_output ? 0 : 18,
|
|
|
|
"<not counted>", csv_sep, event_name(counter));
|
2009-05-29 09:10:54 +02:00
|
|
|
return;
|
|
|
|
}
|
2009-05-29 09:10:54 +02:00
|
|
|
|
2009-06-13 13:35:00 +02:00
|
|
|
if (nsec_counter(counter))
|
2010-11-16 11:05:01 +02:00
|
|
|
nsec_printout(-1, counter, avg);
|
2009-06-13 13:35:00 +02:00
|
|
|
else
|
2010-11-16 11:05:01 +02:00
|
|
|
abs_printout(-1, counter, avg);
|
2009-09-04 18:23:38 +02:00
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
if (csv_output) {
|
|
|
|
fputc('\n', stderr);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-09-04 18:23:38 +02:00
|
|
|
print_noise(counter, avg);
|
2009-09-04 15:36:12 +02:00
|
|
|
|
|
|
|
if (scaled) {
|
|
|
|
double avg_enabled, avg_running;
|
|
|
|
|
2011-01-03 16:39:04 -02:00
|
|
|
avg_enabled = avg_stats(&ps->res_stats[1]);
|
|
|
|
avg_running = avg_stats(&ps->res_stats[2]);
|
2009-05-30 12:38:51 +02:00
|
|
|
|
2009-06-29 21:50:54 +02:00
|
|
|
fprintf(stderr, " (scaled from %.2f%%)",
|
2009-09-04 15:36:12 +02:00
|
|
|
100 * avg_running / avg_enabled);
|
|
|
|
}
|
2009-06-13 13:35:00 +02:00
|
|
|
|
2009-05-29 09:10:54 +02:00
|
|
|
fprintf(stderr, "\n");
|
|
|
|
}
|
|
|
|
|
2010-11-16 11:05:01 +02:00
|
|
|
/*
|
|
|
|
* Print out the results of a single counter:
|
|
|
|
* does not use aggregated count in system-wide
|
|
|
|
*/
|
2011-01-03 16:39:04 -02:00
|
|
|
static void print_counter(struct perf_evsel *counter)
|
2010-11-16 11:05:01 +02:00
|
|
|
{
|
|
|
|
u64 ena, run, val;
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
for (cpu = 0; cpu < nr_cpus; cpu++) {
|
2011-01-03 17:45:52 -02:00
|
|
|
val = counter->counts->cpu[cpu].val;
|
|
|
|
ena = counter->counts->cpu[cpu].ena;
|
|
|
|
run = counter->counts->cpu[cpu].run;
|
2010-11-16 11:05:01 +02:00
|
|
|
if (run == 0 || ena == 0) {
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
fprintf(stderr, "CPU%*d%s%*s%s%-24s",
|
|
|
|
csv_output ? 0 : -4,
|
|
|
|
cpumap[cpu], csv_sep,
|
|
|
|
csv_output ? 0 : 18,
|
|
|
|
"<not counted>", csv_sep,
|
|
|
|
event_name(counter));
|
2010-11-16 11:05:01 +02:00
|
|
|
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nsec_counter(counter))
|
|
|
|
nsec_printout(cpu, counter, val);
|
|
|
|
else
|
|
|
|
abs_printout(cpu, counter, val);
|
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
if (!csv_output) {
|
|
|
|
print_noise(counter, 1.0);
|
2010-11-16 11:05:01 +02:00
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
if (run != ena) {
|
|
|
|
fprintf(stderr, " (scaled from %.2f%%)",
|
2010-11-16 11:05:01 +02:00
|
|
|
100.0 * run / ena);
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
}
|
2010-11-16 11:05:01 +02:00
|
|
|
}
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-13 14:57:28 +02:00
|
|
|
static void print_stat(int argc, const char **argv)
|
|
|
|
{
|
2011-01-03 16:39:04 -02:00
|
|
|
struct perf_evsel *counter;
|
|
|
|
int i;
|
2009-06-13 14:57:28 +02:00
|
|
|
|
2009-04-20 15:37:32 +02:00
|
|
|
fflush(stdout);
|
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
if (!csv_output) {
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
fprintf(stderr, " Performance counter stats for ");
|
|
|
|
if(target_pid == -1 && target_tid == -1) {
|
|
|
|
fprintf(stderr, "\'%s", argv[0]);
|
|
|
|
for (i = 1; i < argc; i++)
|
|
|
|
fprintf(stderr, " %s", argv[i]);
|
|
|
|
} else if (target_pid != -1)
|
|
|
|
fprintf(stderr, "process id \'%d", target_pid);
|
|
|
|
else
|
|
|
|
fprintf(stderr, "thread id \'%d", target_tid);
|
2009-06-03 19:36:07 +02:00
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
fprintf(stderr, "\'");
|
|
|
|
if (run_count > 1)
|
|
|
|
fprintf(stderr, " (%d runs)", run_count);
|
|
|
|
fprintf(stderr, ":\n\n");
|
|
|
|
}
|
2009-05-29 09:10:54 +02:00
|
|
|
|
2010-11-16 11:05:01 +02:00
|
|
|
if (no_aggr) {
|
2011-01-03 16:39:04 -02:00
|
|
|
list_for_each_entry(counter, &evsel_list, node)
|
2010-11-16 11:05:01 +02:00
|
|
|
print_counter(counter);
|
|
|
|
} else {
|
2011-01-03 16:39:04 -02:00
|
|
|
list_for_each_entry(counter, &evsel_list, node)
|
2010-11-16 11:05:01 +02:00
|
|
|
print_counter_aggr(counter);
|
|
|
|
}
|
2009-04-20 15:37:32 +02:00
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
if (!csv_output) {
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
fprintf(stderr, " %18.9f seconds time elapsed",
|
|
|
|
avg_stats(&walltime_nsecs_stats)/1e9);
|
|
|
|
if (run_count > 1) {
|
|
|
|
fprintf(stderr, " ( +- %7.3f%% )",
|
2009-09-04 15:36:12 +02:00
|
|
|
100*stddev_stats(&walltime_nsecs_stats) /
|
|
|
|
avg_stats(&walltime_nsecs_stats));
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
}
|
|
|
|
fprintf(stderr, "\n\n");
|
2009-06-27 06:24:32 +02:00
|
|
|
}
|
2009-04-20 15:37:32 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 15:55:59 +02:00
|
|
|
static volatile int signr = -1;
|
|
|
|
|
2009-05-26 09:17:18 +02:00
|
|
|
static void skip_signal(int signo)
|
2009-04-20 15:37:32 +02:00
|
|
|
{
|
2010-03-18 11:36:03 -03:00
|
|
|
if(child_pid == -1)
|
2009-12-31 16:05:50 +08:00
|
|
|
done = 1;
|
|
|
|
|
2009-06-10 15:55:59 +02:00
|
|
|
signr = signo;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sig_atexit(void)
|
|
|
|
{
|
2009-10-04 01:35:01 +01:00
|
|
|
if (child_pid != -1)
|
|
|
|
kill(child_pid, SIGTERM);
|
|
|
|
|
2009-06-10 15:55:59 +02:00
|
|
|
if (signr == -1)
|
|
|
|
return;
|
|
|
|
|
|
|
|
signal(signr, SIG_DFL);
|
|
|
|
kill(getpid(), signr);
|
2009-05-26 09:17:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static const char * const stat_usage[] = {
|
2009-12-31 16:05:50 +08:00
|
|
|
"perf stat [<options>] [<command>]",
|
2009-05-26 09:17:18 +02:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
static int stat__set_big_num(const struct option *opt __used,
|
|
|
|
const char *s __used, int unset)
|
|
|
|
{
|
|
|
|
big_num_opt = unset ? 0 : 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-26 09:17:18 +02:00
|
|
|
static const struct option options[] = {
|
|
|
|
OPT_CALLBACK('e', "event", NULL, "event",
|
2009-06-06 12:24:17 +02:00
|
|
|
"event selector. use 'perf list' to list available events",
|
|
|
|
parse_events),
|
2010-05-12 10:40:01 +02:00
|
|
|
OPT_BOOLEAN('i', "no-inherit", &no_inherit,
|
|
|
|
"child tasks do not inherit counters"),
|
2009-05-26 09:17:18 +02:00
|
|
|
OPT_INTEGER('p', "pid", &target_pid,
|
2010-03-18 11:36:05 -03:00
|
|
|
"stat events on existing process id"),
|
|
|
|
OPT_INTEGER('t', "tid", &target_tid,
|
|
|
|
"stat events on existing thread id"),
|
2009-05-26 09:17:18 +02:00
|
|
|
OPT_BOOLEAN('a', "all-cpus", &system_wide,
|
2009-06-24 18:19:34 +05:30
|
|
|
"system-wide collection from all CPUs"),
|
2009-08-07 10:18:39 +02:00
|
|
|
OPT_BOOLEAN('c', "scale", &scale,
|
2009-06-24 18:19:34 +05:30
|
|
|
"scale/normalize counters"),
|
2010-04-13 18:37:33 +10:00
|
|
|
OPT_INCR('v', "verbose", &verbose,
|
2009-06-07 17:06:46 +02:00
|
|
|
"be more verbose (show counter open errors, etc)"),
|
2009-06-13 14:57:28 +02:00
|
|
|
OPT_INTEGER('r', "repeat", &run_count,
|
|
|
|
"repeat command and print average + stddev (max: 100)"),
|
2009-06-27 06:10:30 +02:00
|
|
|
OPT_BOOLEAN('n', "null", &null_run,
|
|
|
|
"null run - dont start any counters"),
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
|
|
|
|
"print large numbers with thousands\' separators",
|
|
|
|
stat__set_big_num),
|
2010-05-28 12:00:01 +02:00
|
|
|
OPT_STRING('C', "cpu", &cpu_list, "cpu",
|
|
|
|
"list of cpus to monitor in system-wide"),
|
2010-11-16 11:05:01 +02:00
|
|
|
OPT_BOOLEAN('A', "no-aggr", &no_aggr,
|
|
|
|
"disable CPU count aggregation"),
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
OPT_STRING('x', "field-separator", &csv_sep, "separator",
|
|
|
|
"print counts with custom separator"),
|
2009-05-26 09:17:18 +02:00
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
2009-07-01 12:37:06 +02:00
|
|
|
int cmd_stat(int argc, const char **argv, const char *prefix __used)
|
2009-05-26 09:17:18 +02:00
|
|
|
{
|
2011-01-03 16:39:04 -02:00
|
|
|
struct perf_evsel *pos;
|
|
|
|
int status = -ENOMEM;
|
2009-06-13 14:57:28 +02:00
|
|
|
|
perf stat: add perf stat -B to pretty print large numbers
It is hard to read very large numbers so provide an option to perf stat
to separate thousands using a separator. The patch leverages the locale
support of stdio. You need to set your LC_NUMERIC appropriately, for
instance LC_NUMERIC=en_US.UTF8. You need to pass -B to activate this
feature. This way existing scripts parsing the output do not need to be
changed. Here is an example.
$ perf stat noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.347031 task-clock-msecs # 0.998 CPUs
61 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
118 page-faults # 0.000 M/sec
4,138,410,900 cycles # 2070.917 M/sec (scaled from 70.01%)
2,062,650,268 instructions # 0.498 IPC (scaled from 70.01%)
2,057,653,466 branches # 1029.678 M/sec (scaled from 70.01%)
40,267 branch-misses # 0.002 % (scaled from 30.04%)
2,055,961,348 cache-references # 1028.831 M/sec (scaled from 30.03%)
53,725 cache-misses # 0.027 M/sec (scaled from 30.02%)
2.001393933 seconds time elapsed
$ perf stat -B noploop 2
noploop for 2 seconds
Performance counter stats for 'noploop 2':
1998.297883 task-clock-msecs # 0.998 CPUs
59 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
119 page-faults # 0.000 M/sec
4,131,380,160 cycles # 2067.450 M/sec (scaled from 70.01%)
2,059,096,507 instructions # 0.498 IPC (scaled from 70.01%)
2,054,681,303 branches # 1028.216 M/sec (scaled from 70.01%)
25,650 branch-misses # 0.001 % (scaled from 30.05%)
2,056,283,014 cache-references # 1029.017 M/sec (scaled from 30.03%)
47,097 cache-misses # 0.024 M/sec (scaled from 30.02%)
2.001391016 seconds time elapsed
Cc: David S. Miller <davem@davemloft.net>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4bf28fe8.914ed80a.01ca.fffff5f5@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-05-18 15:00:01 +02:00
|
|
|
setlocale(LC_ALL, "");
|
|
|
|
|
2009-07-22 23:04:12 +10:00
|
|
|
argc = parse_options(argc, argv, options, stat_usage,
|
|
|
|
PARSE_OPT_STOP_AT_NON_OPTION);
|
perf stat: Add csv-style output
This patch adds an option (-x/--field-separator) to print counts using a
CSV-style output. The user can pass a custom separator. This makes it very easy
to import counts directly into your favorite spreadsheet without having to
write scripts.
Example:
$ perf stat --field-separator=, -a -- sleep 1
4009.961740,task-clock-msecs
13,context-switches
2,CPU-migrations
189,page-faults
9596385684,cycles
3493659441,instructions
872897069,branches
41562,branch-misses
22424,cache-references
1289,cache-misses
Works also in non-aggregated mode:
$ perf stat -x , -a -A -- sleep 1
CPU0,1002.526168,task-clock-msecs
CPU1,1002.528365,task-clock-msecs
CPU2,1002.523360,task-clock-msecs
CPU3,1002.519878,task-clock-msecs
CPU0,1,context-switches
CPU1,5,context-switches
CPU2,5,context-switches
CPU3,6,context-switches
CPU0,0,CPU-migrations
CPU1,1,CPU-migrations
CPU2,0,CPU-migrations
CPU3,1,CPU-migrations
CPU0,2,page-faults
CPU1,6,page-faults
CPU2,9,page-faults
CPU3,174,page-faults
CPU0,2399439771,cycles
CPU1,2380369063,cycles
CPU2,2399142710,cycles
CPU3,2373161192,cycles
CPU0,872900618,instructions
CPU1,873030960,instructions
CPU2,872714525,instructions
CPU3,874460580,instructions
CPU0,221556839,branches
CPU1,218134342,branches
CPU2,218161730,branches
CPU3,218284093,branches
CPU0,18556,branch-misses
CPU1,1449,branch-misses
CPU2,3447,branch-misses
CPU3,12714,branch-misses
CPU0,8330,cache-references
CPU1,313844,cache-references
CPU2,47993728,cache-references
CPU3,826481,cache-references
CPU0,272,cache-misses
CPU1,5360,cache-misses
CPU2,1342193,cache-misses
CPU3,13992,cache-misses
This second version adds the ability to name a separator and uses
field-separator as the long option to be consistent with perf report.
Commiter note: Since we enabled --big-num by default in 201e0b0 and -x can't be
used with it, we need to notice if the user explicitely enabled or disabled -B,
add code to disable big_num if the user didn't explicitely set --big_num when
-x is used.
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederik Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <4cf68aa7.0fedd80a.5294.1203@mx.google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-01 18:49:05 +02:00
|
|
|
|
|
|
|
if (csv_sep)
|
|
|
|
csv_output = true;
|
|
|
|
else
|
|
|
|
csv_sep = DEFAULT_SEPARATOR;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* let the spreadsheet do the pretty-printing
|
|
|
|
*/
|
|
|
|
if (csv_output) {
|
|
|
|
/* User explicitely passed -B? */
|
|
|
|
if (big_num_opt == 1) {
|
|
|
|
fprintf(stderr, "-B option not supported with -x\n");
|
|
|
|
usage_with_options(stat_usage, options);
|
|
|
|
} else /* Nope, so disable big number formatting */
|
|
|
|
big_num = false;
|
|
|
|
} else if (big_num_opt == 0) /* User passed --no-big-num */
|
|
|
|
big_num = false;
|
|
|
|
|
2010-03-18 11:36:05 -03:00
|
|
|
if (!argc && target_pid == -1 && target_tid == -1)
|
2009-05-26 09:17:18 +02:00
|
|
|
usage_with_options(stat_usage, options);
|
2009-09-04 15:36:08 +02:00
|
|
|
if (run_count <= 0)
|
2009-06-13 14:57:28 +02:00
|
|
|
usage_with_options(stat_usage, options);
|
2009-04-20 15:37:32 +02:00
|
|
|
|
2010-11-16 11:05:01 +02:00
|
|
|
/* no_aggr is for system-wide only */
|
|
|
|
if (no_aggr && !system_wide)
|
|
|
|
usage_with_options(stat_usage, options);
|
|
|
|
|
2009-06-27 23:49:09 +05:30
|
|
|
/* Set attrs and nr_counters if no event is selected and !null_run */
|
|
|
|
if (!null_run && !nr_counters) {
|
2011-01-03 16:39:04 -02:00
|
|
|
size_t c;
|
|
|
|
|
2009-06-27 23:49:09 +05:30
|
|
|
nr_counters = ARRAY_SIZE(default_attrs);
|
2011-01-03 16:39:04 -02:00
|
|
|
|
|
|
|
for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
|
|
|
|
pos = perf_evsel__new(default_attrs[c].type,
|
|
|
|
default_attrs[c].config,
|
|
|
|
nr_counters);
|
|
|
|
if (pos == NULL)
|
|
|
|
goto out;
|
|
|
|
list_add(&pos->node, &evsel_list);
|
|
|
|
}
|
2009-06-27 23:49:09 +05:30
|
|
|
}
|
2009-04-20 15:37:32 +02:00
|
|
|
|
perf tools: Fix sparse CPU numbering related bugs
At present, the perf subcommands that do system-wide monitoring
(perf stat, perf record and perf top) don't work properly unless
the online cpus are numbered 0, 1, ..., N-1. These tools ask
for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN)
and then try to create events for cpus 0, 1, ..., N-1.
This creates problems for systems where the online cpus are
numbered sparsely. For example, a POWER6 system in
single-threaded mode (i.e. only running 1 hardware thread per
core) will have only even-numbered cpus online.
This fixes the problem by reading the /sys/devices/system/cpu/online
file to find out which cpus are online. The code that does that is in
tools/perf/util/cpumap.[ch], and consists of a read_cpu_map()
function that sets up a cpumap[] array and returns the number of
online cpus. If /sys/devices/system/cpu/online can't be read or
can't be parsed successfully, it falls back to using sysconf to
ask how many cpus are online and sets up an identity map in cpumap[].
The perf record, perf stat and perf top code then calls
read_cpu_map() in the system-wide monitoring case (instead of
sysconf) and uses cpumap[] to get the cpu numbers to pass to
perf_event_open.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-10 20:36:09 +11:00
|
|
|
if (system_wide)
|
2010-05-28 12:00:01 +02:00
|
|
|
nr_cpus = read_cpu_map(cpu_list);
|
perf tools: Fix sparse CPU numbering related bugs
At present, the perf subcommands that do system-wide monitoring
(perf stat, perf record and perf top) don't work properly unless
the online cpus are numbered 0, 1, ..., N-1. These tools ask
for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN)
and then try to create events for cpus 0, 1, ..., N-1.
This creates problems for systems where the online cpus are
numbered sparsely. For example, a POWER6 system in
single-threaded mode (i.e. only running 1 hardware thread per
core) will have only even-numbered cpus online.
This fixes the problem by reading the /sys/devices/system/cpu/online
file to find out which cpus are online. The code that does that is in
tools/perf/util/cpumap.[ch], and consists of a read_cpu_map()
function that sets up a cpumap[] array and returns the number of
online cpus. If /sys/devices/system/cpu/online can't be read or
can't be parsed successfully, it falls back to using sysconf to
ask how many cpus are online and sets up an identity map in cpumap[].
The perf record, perf stat and perf top code then calls
read_cpu_map() in the system-wide monitoring case (instead of
sysconf) and uses cpumap[] to get the cpu numbers to pass to
perf_event_open.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-10 20:36:09 +11:00
|
|
|
else
|
|
|
|
nr_cpus = 1;
|
2009-04-20 15:37:32 +02:00
|
|
|
|
2010-05-28 12:00:01 +02:00
|
|
|
if (nr_cpus < 1)
|
|
|
|
usage_with_options(stat_usage, options);
|
|
|
|
|
2010-03-18 11:36:05 -03:00
|
|
|
if (target_pid != -1) {
|
|
|
|
target_tid = target_pid;
|
|
|
|
thread_num = find_all_tid(target_pid, &all_tids);
|
|
|
|
if (thread_num <= 0) {
|
|
|
|
fprintf(stderr, "Can't find all threads of pid %d\n",
|
|
|
|
target_pid);
|
|
|
|
usage_with_options(stat_usage, options);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
all_tids=malloc(sizeof(pid_t));
|
|
|
|
if (!all_tids)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
all_tids[0] = target_tid;
|
|
|
|
thread_num = 1;
|
|
|
|
}
|
|
|
|
|
2011-01-03 16:39:04 -02:00
|
|
|
list_for_each_entry(pos, &evsel_list, node) {
|
2011-01-03 17:45:52 -02:00
|
|
|
if (perf_evsel__alloc_stat_priv(pos) < 0 ||
|
|
|
|
perf_evsel__alloc_counts(pos, nr_cpus) < 0 ||
|
2011-01-03 16:39:04 -02:00
|
|
|
perf_evsel__alloc_fd(pos, nr_cpus, thread_num) < 0)
|
|
|
|
goto out_free_fd;
|
2010-03-18 11:36:05 -03:00
|
|
|
}
|
|
|
|
|
2009-05-15 11:03:23 +02:00
|
|
|
/*
|
|
|
|
* We dont want to block the signals - that would cause
|
|
|
|
* child tasks to inherit that and Ctrl-C would not work.
|
|
|
|
* What we want is for Ctrl-C to work in the exec()-ed
|
|
|
|
* task, but being ignored by perf stat itself:
|
|
|
|
*/
|
2009-06-10 15:55:59 +02:00
|
|
|
atexit(sig_atexit);
|
2009-05-15 11:03:23 +02:00
|
|
|
signal(SIGINT, skip_signal);
|
|
|
|
signal(SIGALRM, skip_signal);
|
|
|
|
signal(SIGABRT, skip_signal);
|
|
|
|
|
2009-06-13 14:57:28 +02:00
|
|
|
status = 0;
|
|
|
|
for (run_idx = 0; run_idx < run_count; run_idx++) {
|
|
|
|
if (run_count != 1 && verbose)
|
2009-06-24 18:19:34 +05:30
|
|
|
fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
|
2009-06-13 14:57:28 +02:00
|
|
|
status = run_perf_stat(argc, argv);
|
|
|
|
}
|
|
|
|
|
2010-03-22 13:10:28 -03:00
|
|
|
if (status != -1)
|
|
|
|
print_stat(argc, argv);
|
2011-01-03 16:39:04 -02:00
|
|
|
out_free_fd:
|
2011-01-03 16:51:39 -02:00
|
|
|
list_for_each_entry(pos, &evsel_list, node)
|
2011-01-03 16:39:04 -02:00
|
|
|
perf_evsel__free_stat_priv(pos);
|
|
|
|
out:
|
2009-06-13 14:57:28 +02:00
|
|
|
return status;
|
2009-04-20 15:37:32 +02:00
|
|
|
}
|