2008-12-03 10:39:53 +01:00
|
|
|
/*
|
|
|
|
* Performance counter x86 architecture code
|
|
|
|
*
|
2009-04-29 14:52:50 +02:00
|
|
|
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
|
|
|
|
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
|
|
|
|
* Copyright (C) 2009 Jaswinder Singh Rajput
|
|
|
|
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
|
|
|
|
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
|
2009-07-21 15:56:48 +02:00
|
|
|
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
|
2008-12-03 10:39:53 +01:00
|
|
|
*
|
|
|
|
* For licencing details see kernel-base/COPYING
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/perf_counter.h>
|
|
|
|
#include <linux/capability.h>
|
|
|
|
#include <linux/notifier.h>
|
|
|
|
#include <linux/hardirq.h>
|
|
|
|
#include <linux/kprobes.h>
|
2008-12-09 21:43:39 +01:00
|
|
|
#include <linux/module.h>
|
2008-12-03 10:39:53 +01:00
|
|
|
#include <linux/kdebug.h>
|
|
|
|
#include <linux/sched.h>
|
2009-03-30 19:07:15 +02:00
|
|
|
#include <linux/uaccess.h>
|
2009-06-15 13:07:24 +02:00
|
|
|
#include <linux/highmem.h>
|
2009-07-21 15:56:48 +02:00
|
|
|
#include <linux/cpu.h>
|
2008-12-03 10:39:53 +01:00
|
|
|
|
|
|
|
#include <asm/apic.h>
|
2009-03-30 19:07:15 +02:00
|
|
|
#include <asm/stacktrace.h>
|
2009-03-30 19:07:16 +02:00
|
|
|
#include <asm/nmi.h>
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2008-12-17 13:09:20 +01:00
|
|
|
static u64 perf_counter_mask __read_mostly;
|
2008-12-17 10:51:15 +01:00
|
|
|
|
2009-07-21 15:56:48 +02:00
|
|
|
/* The maximal number of PEBS counters: */
|
|
|
|
#define MAX_PEBS_COUNTERS 4
|
|
|
|
|
|
|
|
/* The size of a BTS record in bytes: */
|
|
|
|
#define BTS_RECORD_SIZE 24
|
|
|
|
|
|
|
|
/* The size of a per-cpu BTS buffer in bytes: */
|
|
|
|
#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024)
|
|
|
|
|
|
|
|
/* The BTS overflow threshold in bytes from the end of the buffer: */
|
|
|
|
#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64)
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Bits in the debugctlmsr controlling branch tracing.
|
|
|
|
*/
|
|
|
|
#define X86_DEBUGCTL_TR (1 << 6)
|
|
|
|
#define X86_DEBUGCTL_BTS (1 << 7)
|
|
|
|
#define X86_DEBUGCTL_BTINT (1 << 8)
|
|
|
|
#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
|
|
|
|
#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A debug store configuration.
|
|
|
|
*
|
|
|
|
* We only support architectures that use 64bit fields.
|
|
|
|
*/
|
|
|
|
struct debug_store {
|
|
|
|
u64 bts_buffer_base;
|
|
|
|
u64 bts_index;
|
|
|
|
u64 bts_absolute_maximum;
|
|
|
|
u64 bts_interrupt_threshold;
|
|
|
|
u64 pebs_buffer_base;
|
|
|
|
u64 pebs_index;
|
|
|
|
u64 pebs_absolute_maximum;
|
|
|
|
u64 pebs_interrupt_threshold;
|
|
|
|
u64 pebs_counter_reset[MAX_PEBS_COUNTERS];
|
|
|
|
};
|
|
|
|
|
2008-12-03 10:39:53 +01:00
|
|
|
struct cpu_hw_counters {
|
2008-12-17 13:09:20 +01:00
|
|
|
struct perf_counter *counters[X86_PMC_IDX_MAX];
|
2009-04-29 16:55:56 +02:00
|
|
|
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
|
|
|
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
2009-01-23 14:36:16 +01:00
|
|
|
unsigned long interrupts;
|
2009-03-05 18:08:27 +01:00
|
|
|
int enabled;
|
2009-07-21 15:56:48 +02:00
|
|
|
struct debug_store *ds;
|
2008-12-03 10:39:53 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
2009-04-29 12:47:04 +02:00
|
|
|
* struct x86_pmu - generic x86 pmu
|
2008-12-03 10:39:53 +01:00
|
|
|
*/
|
2009-04-29 12:47:04 +02:00
|
|
|
struct x86_pmu {
|
2009-04-29 12:47:13 +02:00
|
|
|
const char *name;
|
|
|
|
int version;
|
2009-06-03 13:12:55 +08:00
|
|
|
int (*handle_irq)(struct pt_regs *);
|
2009-05-13 16:21:38 +02:00
|
|
|
void (*disable_all)(void);
|
|
|
|
void (*enable_all)(void);
|
2009-04-29 12:47:18 +02:00
|
|
|
void (*enable)(struct hw_perf_counter *, int);
|
2009-04-29 12:47:19 +02:00
|
|
|
void (*disable)(struct hw_perf_counter *, int);
|
2009-02-28 18:37:49 +05:30
|
|
|
unsigned eventsel;
|
|
|
|
unsigned perfctr;
|
2009-03-05 18:08:27 +01:00
|
|
|
u64 (*event_map)(int);
|
|
|
|
u64 (*raw_event)(u64);
|
2009-02-28 18:37:49 +05:30
|
|
|
int max_events;
|
2009-04-29 12:47:12 +02:00
|
|
|
int num_counters;
|
|
|
|
int num_counters_fixed;
|
|
|
|
int counter_bits;
|
|
|
|
u64 counter_mask;
|
2009-08-11 10:40:08 +02:00
|
|
|
int apic;
|
2009-04-29 12:47:23 +02:00
|
|
|
u64 max_period;
|
2009-05-13 16:21:38 +02:00
|
|
|
u64 intel_ctrl;
|
2009-07-21 15:56:48 +02:00
|
|
|
void (*enable_bts)(u64 config);
|
|
|
|
void (*disable_bts)(void);
|
2009-02-27 18:09:09 +05:30
|
|
|
};
|
|
|
|
|
2009-04-29 12:47:11 +02:00
|
|
|
static struct x86_pmu x86_pmu __read_mostly;
|
2009-02-27 18:09:09 +05:30
|
|
|
|
2009-03-05 18:08:27 +01:00
|
|
|
static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
|
|
|
|
.enabled = 1,
|
|
|
|
};
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
/*
|
|
|
|
* Not sure about some of these
|
|
|
|
*/
|
|
|
|
static const u64 p6_perfmon_event_map[] =
|
|
|
|
{
|
|
|
|
[PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
|
|
|
|
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
|
2009-08-11 10:26:33 +02:00
|
|
|
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
|
|
|
|
[PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
|
2009-07-08 17:46:14 -04:00
|
|
|
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
|
|
|
|
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
|
|
|
|
[PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
|
|
|
|
};
|
|
|
|
|
|
|
|
static u64 p6_pmu_event_map(int event)
|
|
|
|
{
|
|
|
|
return p6_perfmon_event_map[event];
|
|
|
|
}
|
|
|
|
|
2009-07-08 10:21:41 +02:00
|
|
|
/*
|
|
|
|
* Counter setting that is specified not to count anything.
|
|
|
|
* We use this to effectively disable a counter.
|
|
|
|
*
|
|
|
|
* L2_RQSTS with 0 MESI unit mask.
|
|
|
|
*/
|
|
|
|
#define P6_NOP_COUNTER 0x0000002EULL
|
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
static u64 p6_pmu_raw_event(u64 event)
|
|
|
|
{
|
|
|
|
#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
|
|
|
|
#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
|
|
|
|
#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
|
|
|
|
#define P6_EVNTSEL_INV_MASK 0x00800000ULL
|
|
|
|
#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
|
|
|
|
|
|
|
|
#define P6_EVNTSEL_MASK \
|
|
|
|
(P6_EVNTSEL_EVENT_MASK | \
|
|
|
|
P6_EVNTSEL_UNIT_MASK | \
|
|
|
|
P6_EVNTSEL_EDGE_MASK | \
|
|
|
|
P6_EVNTSEL_INV_MASK | \
|
|
|
|
P6_EVNTSEL_COUNTER_MASK)
|
|
|
|
|
|
|
|
return event & P6_EVNTSEL_MASK;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-02-27 18:09:09 +05:30
|
|
|
/*
|
|
|
|
* Intel PerfMon v3. Used on Core2 and later.
|
|
|
|
*/
|
2009-03-05 18:08:27 +01:00
|
|
|
static const u64 intel_perfmon_event_map[] =
|
2008-12-03 10:39:53 +01:00
|
|
|
{
|
2009-06-11 14:06:28 +02:00
|
|
|
[PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
|
|
|
|
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
|
|
|
|
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
|
|
|
|
[PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
|
|
|
|
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
|
|
|
|
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
|
|
|
|
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
|
2008-12-03 10:39:53 +01:00
|
|
|
};
|
|
|
|
|
2009-04-29 12:47:04 +02:00
|
|
|
static u64 intel_pmu_event_map(int event)
|
2009-02-27 18:09:09 +05:30
|
|
|
{
|
|
|
|
return intel_perfmon_event_map[event];
|
|
|
|
}
|
2008-12-03 10:39:53 +01:00
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
/*
|
|
|
|
* Generalized hw caching related event table, filled
|
|
|
|
* in on a per model basis. A value of 0 means
|
|
|
|
* 'not supported', -1 means 'event makes no sense on
|
|
|
|
* this CPU', any other value means the raw event
|
|
|
|
* ID.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define C(x) PERF_COUNT_HW_CACHE_##x
|
|
|
|
|
|
|
|
static u64 __read_mostly hw_cache_event_ids
|
|
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
|
|
|
|
|
|
|
static const u64 nehalem_hw_cache_event_ids
|
|
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
|
|
|
{
|
|
|
|
[ C(L1D) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
|
|
|
|
[ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(L1I ) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
2009-06-09 21:15:53 +08:00
|
|
|
[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
[ C(RESULT_MISS) ] = 0x0,
|
|
|
|
},
|
|
|
|
},
|
2009-06-11 14:19:11 +02:00
|
|
|
[ C(LL ) ] = {
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
2009-06-11 14:19:11 +02:00
|
|
|
[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
|
|
|
|
[ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(DTLB) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
|
|
|
|
[ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
[ C(RESULT_MISS) ] = 0x0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(ITLB) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
|
2009-06-09 21:15:53 +08:00
|
|
|
[ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(BPU ) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
|
|
|
|
[ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
static const u64 core2_hw_cache_event_ids
|
|
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
|
|
|
{
|
2009-06-08 07:42:04 +02:00
|
|
|
[ C(L1D) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(L1I ) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0,
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
2009-06-11 14:19:11 +02:00
|
|
|
[ C(LL ) ] = {
|
2009-06-08 07:42:04 +02:00
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
|
|
|
|
[ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
|
|
|
|
[ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0,
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(DTLB) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0,
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(ITLB) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
|
|
|
|
[ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(BPU ) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
|
|
|
|
[ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
},
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const u64 atom_hw_cache_event_ids
|
|
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
|
|
|
{
|
2009-06-08 09:30:41 +02:00
|
|
|
[ C(L1D) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
2009-06-09 21:15:53 +08:00
|
|
|
[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
|
2009-06-08 09:30:41 +02:00
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0,
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(L1I ) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
2009-06-09 21:15:53 +08:00
|
|
|
[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
|
2009-06-08 09:30:41 +02:00
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0,
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
2009-06-11 14:19:11 +02:00
|
|
|
[ C(LL ) ] = {
|
2009-06-08 09:30:41 +02:00
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
|
|
|
|
[ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
|
|
|
|
[ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0,
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(DTLB) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
2009-06-09 21:15:53 +08:00
|
|
|
[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
|
2009-06-08 09:30:41 +02:00
|
|
|
[ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
2009-06-09 21:15:53 +08:00
|
|
|
[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
|
2009-06-08 09:30:41 +02:00
|
|
|
[ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0,
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(ITLB) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(BPU ) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
|
|
|
|
[ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
},
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
};
|
|
|
|
|
2009-04-29 12:47:04 +02:00
|
|
|
static u64 intel_pmu_raw_event(u64 event)
|
2009-03-05 18:08:27 +01:00
|
|
|
{
|
2009-03-13 12:21:31 +01:00
|
|
|
#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
|
|
|
|
#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
|
2009-05-25 17:39:03 +02:00
|
|
|
#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
|
|
|
|
#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
|
2009-03-13 12:21:31 +01:00
|
|
|
#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
|
2009-03-05 18:08:27 +01:00
|
|
|
|
2009-06-03 22:19:36 +02:00
|
|
|
#define CORE_EVNTSEL_MASK \
|
2009-03-05 18:08:27 +01:00
|
|
|
(CORE_EVNTSEL_EVENT_MASK | \
|
|
|
|
CORE_EVNTSEL_UNIT_MASK | \
|
2009-05-25 17:39:03 +02:00
|
|
|
CORE_EVNTSEL_EDGE_MASK | \
|
|
|
|
CORE_EVNTSEL_INV_MASK | \
|
2009-03-05 18:08:27 +01:00
|
|
|
CORE_EVNTSEL_COUNTER_MASK)
|
|
|
|
|
|
|
|
return event & CORE_EVNTSEL_MASK;
|
|
|
|
}
|
|
|
|
|
2009-06-13 01:06:21 +05:30
|
|
|
static const u64 amd_hw_cache_event_ids
|
2009-06-08 22:33:10 +02:00
|
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
|
|
|
{
|
|
|
|
[ C(L1D) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
2009-06-13 01:06:21 +05:30
|
|
|
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
|
2009-06-08 22:33:10 +02:00
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
2009-06-20 13:19:25 +05:30
|
|
|
[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
|
2009-06-08 22:33:10 +02:00
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
2009-06-13 01:06:21 +05:30
|
|
|
[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
|
2009-06-08 22:33:10 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(L1I ) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
2009-06-13 01:06:21 +05:30
|
|
|
[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
|
2009-06-08 22:33:10 +02:00
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
2009-06-11 14:19:11 +02:00
|
|
|
[ C(LL ) ] = {
|
2009-06-08 22:33:10 +02:00
|
|
|
[ C(OP_READ) ] = {
|
2009-06-13 01:06:21 +05:30
|
|
|
[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
|
|
|
|
[ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
|
2009-06-08 22:33:10 +02:00
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
2009-06-13 01:06:21 +05:30
|
|
|
[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
|
2009-06-08 22:33:10 +02:00
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0,
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(DTLB) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
2009-06-13 01:06:21 +05:30
|
|
|
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
|
2009-06-08 22:33:10 +02:00
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0,
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0,
|
|
|
|
[ C(RESULT_MISS) ] = 0,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(ITLB) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
|
|
|
|
[ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[ C(BPU ) ] = {
|
|
|
|
[ C(OP_READ) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
|
|
|
|
[ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
|
|
|
|
},
|
|
|
|
[ C(OP_WRITE) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
[ C(OP_PREFETCH) ] = {
|
|
|
|
[ C(RESULT_ACCESS) ] = -1,
|
|
|
|
[ C(RESULT_MISS) ] = -1,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
/*
|
|
|
|
* AMD Performance Monitor K7 and later.
|
|
|
|
*/
|
2009-03-05 18:08:27 +01:00
|
|
|
static const u64 amd_perfmon_event_map[] =
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
{
|
2009-06-11 14:06:28 +02:00
|
|
|
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
|
|
|
|
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
|
|
|
|
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
|
|
|
|
[PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
|
|
|
|
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
|
|
|
|
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
};
|
|
|
|
|
2009-04-29 12:47:04 +02:00
|
|
|
static u64 amd_pmu_event_map(int event)
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
{
|
|
|
|
return amd_perfmon_event_map[event];
|
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:04 +02:00
|
|
|
static u64 amd_pmu_raw_event(u64 event)
|
2009-03-05 18:08:27 +01:00
|
|
|
{
|
2009-03-13 12:21:31 +01:00
|
|
|
#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
|
|
|
|
#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
|
2009-05-25 17:39:03 +02:00
|
|
|
#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
|
|
|
|
#define K7_EVNTSEL_INV_MASK 0x000800000ULL
|
2009-03-13 12:21:31 +01:00
|
|
|
#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
|
2009-03-05 18:08:27 +01:00
|
|
|
|
|
|
|
#define K7_EVNTSEL_MASK \
|
|
|
|
(K7_EVNTSEL_EVENT_MASK | \
|
|
|
|
K7_EVNTSEL_UNIT_MASK | \
|
2009-05-25 17:39:03 +02:00
|
|
|
K7_EVNTSEL_EDGE_MASK | \
|
|
|
|
K7_EVNTSEL_INV_MASK | \
|
2009-03-05 18:08:27 +01:00
|
|
|
K7_EVNTSEL_COUNTER_MASK)
|
|
|
|
|
|
|
|
return event & K7_EVNTSEL_MASK;
|
|
|
|
}
|
|
|
|
|
2008-12-13 09:00:03 +01:00
|
|
|
/*
|
|
|
|
* Propagate counter elapsed time into the generic counter.
|
|
|
|
* Can only be executed on the CPU where the counter is active.
|
|
|
|
* Returns the delta events processed.
|
|
|
|
*/
|
2009-04-29 12:47:22 +02:00
|
|
|
static u64
|
2008-12-13 09:00:03 +01:00
|
|
|
x86_perf_counter_update(struct perf_counter *counter,
|
|
|
|
struct hw_perf_counter *hwc, int idx)
|
|
|
|
{
|
2009-05-13 09:45:19 +02:00
|
|
|
int shift = 64 - x86_pmu.counter_bits;
|
|
|
|
u64 prev_raw_count, new_raw_count;
|
|
|
|
s64 delta;
|
2008-12-13 09:00:03 +01:00
|
|
|
|
2009-07-21 15:56:48 +02:00
|
|
|
if (idx == X86_PMC_IDX_FIXED_BTS)
|
|
|
|
return 0;
|
|
|
|
|
2008-12-13 09:00:03 +01:00
|
|
|
/*
|
|
|
|
* Careful: an NMI might modify the previous counter value.
|
|
|
|
*
|
|
|
|
* Our tactic to handle this is to first atomically read and
|
|
|
|
* exchange a new raw count - then add that new-prev delta
|
|
|
|
* count to the generic counter atomically:
|
|
|
|
*/
|
|
|
|
again:
|
|
|
|
prev_raw_count = atomic64_read(&hwc->prev_count);
|
|
|
|
rdmsrl(hwc->counter_base + idx, new_raw_count);
|
|
|
|
|
|
|
|
if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
|
|
|
new_raw_count) != prev_raw_count)
|
|
|
|
goto again;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now we have the new raw value and have updated the prev
|
|
|
|
* timestamp already. We can now calculate the elapsed delta
|
|
|
|
* (counter-)time and add that to the generic counter.
|
|
|
|
*
|
|
|
|
* Careful, not all hw sign-extends above the physical width
|
2009-05-13 09:45:19 +02:00
|
|
|
* of the count.
|
2008-12-13 09:00:03 +01:00
|
|
|
*/
|
2009-05-13 09:45:19 +02:00
|
|
|
delta = (new_raw_count << shift) - (prev_raw_count << shift);
|
|
|
|
delta >>= shift;
|
2008-12-13 09:00:03 +01:00
|
|
|
|
|
|
|
atomic64_add(delta, &counter->count);
|
|
|
|
atomic64_sub(delta, &hwc->period_left);
|
2009-04-29 12:47:22 +02:00
|
|
|
|
|
|
|
return new_raw_count;
|
2008-12-13 09:00:03 +01:00
|
|
|
}
|
|
|
|
|
2009-05-04 18:47:44 +02:00
|
|
|
static atomic_t active_counters;
|
2009-03-30 19:07:16 +02:00
|
|
|
static DEFINE_MUTEX(pmc_reserve_mutex);
|
|
|
|
|
|
|
|
static bool reserve_pmc_hardware(void)
|
|
|
|
{
|
2009-08-11 10:40:08 +02:00
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
2009-03-30 19:07:16 +02:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (nmi_watchdog == NMI_LOCAL_APIC)
|
|
|
|
disable_lapic_nmi_watchdog();
|
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
for (i = 0; i < x86_pmu.num_counters; i++) {
|
2009-04-29 12:47:11 +02:00
|
|
|
if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
|
2009-03-30 19:07:16 +02:00
|
|
|
goto perfctr_fail;
|
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
for (i = 0; i < x86_pmu.num_counters; i++) {
|
2009-04-29 12:47:11 +02:00
|
|
|
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
|
2009-03-30 19:07:16 +02:00
|
|
|
goto eventsel_fail;
|
|
|
|
}
|
2009-08-11 10:40:08 +02:00
|
|
|
#endif
|
2009-03-30 19:07:16 +02:00
|
|
|
|
|
|
|
return true;
|
|
|
|
|
2009-08-11 10:40:08 +02:00
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
2009-03-30 19:07:16 +02:00
|
|
|
eventsel_fail:
|
|
|
|
for (i--; i >= 0; i--)
|
2009-04-29 12:47:11 +02:00
|
|
|
release_evntsel_nmi(x86_pmu.eventsel + i);
|
2009-03-30 19:07:16 +02:00
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
i = x86_pmu.num_counters;
|
2009-03-30 19:07:16 +02:00
|
|
|
|
|
|
|
perfctr_fail:
|
|
|
|
for (i--; i >= 0; i--)
|
2009-04-29 12:47:11 +02:00
|
|
|
release_perfctr_nmi(x86_pmu.perfctr + i);
|
2009-03-30 19:07:16 +02:00
|
|
|
|
|
|
|
if (nmi_watchdog == NMI_LOCAL_APIC)
|
|
|
|
enable_lapic_nmi_watchdog();
|
|
|
|
|
|
|
|
return false;
|
2009-08-11 10:40:08 +02:00
|
|
|
#endif
|
2009-03-30 19:07:16 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void release_pmc_hardware(void)
|
|
|
|
{
|
2009-08-11 10:40:08 +02:00
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
2009-03-30 19:07:16 +02:00
|
|
|
int i;
|
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
for (i = 0; i < x86_pmu.num_counters; i++) {
|
2009-04-29 12:47:11 +02:00
|
|
|
release_perfctr_nmi(x86_pmu.perfctr + i);
|
|
|
|
release_evntsel_nmi(x86_pmu.eventsel + i);
|
2009-03-30 19:07:16 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (nmi_watchdog == NMI_LOCAL_APIC)
|
|
|
|
enable_lapic_nmi_watchdog();
|
2009-08-11 10:40:08 +02:00
|
|
|
#endif
|
2009-03-30 19:07:16 +02:00
|
|
|
}
|
|
|
|
|
2009-07-21 15:56:48 +02:00
|
|
|
static inline bool bts_available(void)
|
|
|
|
{
|
|
|
|
return x86_pmu.enable_bts != NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void init_debug_store_on_cpu(int cpu)
|
|
|
|
{
|
|
|
|
struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
|
|
|
|
|
|
|
|
if (!ds)
|
|
|
|
return;
|
|
|
|
|
|
|
|
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
|
2009-09-02 16:04:47 +02:00
|
|
|
(u32)((u64)(unsigned long)ds),
|
|
|
|
(u32)((u64)(unsigned long)ds >> 32));
|
2009-07-21 15:56:48 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void fini_debug_store_on_cpu(int cpu)
|
|
|
|
{
|
|
|
|
if (!per_cpu(cpu_hw_counters, cpu).ds)
|
|
|
|
return;
|
|
|
|
|
|
|
|
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void release_bts_hardware(void)
|
|
|
|
{
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
if (!bts_available())
|
|
|
|
return;
|
|
|
|
|
|
|
|
get_online_cpus();
|
|
|
|
|
|
|
|
for_each_online_cpu(cpu)
|
|
|
|
fini_debug_store_on_cpu(cpu);
|
|
|
|
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
|
|
|
|
|
|
|
|
if (!ds)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
per_cpu(cpu_hw_counters, cpu).ds = NULL;
|
|
|
|
|
2009-09-02 16:04:47 +02:00
|
|
|
kfree((void *)(unsigned long)ds->bts_buffer_base);
|
2009-07-21 15:56:48 +02:00
|
|
|
kfree(ds);
|
|
|
|
}
|
|
|
|
|
|
|
|
put_online_cpus();
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reserve_bts_hardware(void)
|
|
|
|
{
|
|
|
|
int cpu, err = 0;
|
|
|
|
|
|
|
|
if (!bts_available())
|
2009-09-02 16:04:46 +02:00
|
|
|
return 0;
|
2009-07-21 15:56:48 +02:00
|
|
|
|
|
|
|
get_online_cpus();
|
|
|
|
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
struct debug_store *ds;
|
|
|
|
void *buffer;
|
|
|
|
|
|
|
|
err = -ENOMEM;
|
|
|
|
buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
|
|
|
|
if (unlikely(!buffer))
|
|
|
|
break;
|
|
|
|
|
|
|
|
ds = kzalloc(sizeof(*ds), GFP_KERNEL);
|
|
|
|
if (unlikely(!ds)) {
|
|
|
|
kfree(buffer);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2009-09-02 16:04:47 +02:00
|
|
|
ds->bts_buffer_base = (u64)(unsigned long)buffer;
|
2009-07-21 15:56:48 +02:00
|
|
|
ds->bts_index = ds->bts_buffer_base;
|
|
|
|
ds->bts_absolute_maximum =
|
|
|
|
ds->bts_buffer_base + BTS_BUFFER_SIZE;
|
|
|
|
ds->bts_interrupt_threshold =
|
|
|
|
ds->bts_absolute_maximum - BTS_OVFL_TH;
|
|
|
|
|
|
|
|
per_cpu(cpu_hw_counters, cpu).ds = ds;
|
|
|
|
err = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (err)
|
|
|
|
release_bts_hardware();
|
|
|
|
else {
|
|
|
|
for_each_online_cpu(cpu)
|
|
|
|
init_debug_store_on_cpu(cpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
put_online_cpus();
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2009-03-30 19:07:16 +02:00
|
|
|
static void hw_perf_counter_destroy(struct perf_counter *counter)
|
|
|
|
{
|
2009-05-04 18:47:44 +02:00
|
|
|
if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
|
2009-03-30 19:07:16 +02:00
|
|
|
release_pmc_hardware();
|
2009-07-21 15:56:48 +02:00
|
|
|
release_bts_hardware();
|
2009-03-30 19:07:16 +02:00
|
|
|
mutex_unlock(&pmc_reserve_mutex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:20 +02:00
|
|
|
static inline int x86_pmu_initialized(void)
|
|
|
|
{
|
|
|
|
return x86_pmu.handle_irq != NULL;
|
|
|
|
}
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
static inline int
|
|
|
|
set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
|
|
|
|
{
|
|
|
|
unsigned int cache_type, cache_op, cache_result;
|
|
|
|
u64 config, val;
|
|
|
|
|
|
|
|
config = attr->config;
|
|
|
|
|
|
|
|
cache_type = (config >> 0) & 0xff;
|
|
|
|
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
cache_op = (config >> 8) & 0xff;
|
|
|
|
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
cache_result = (config >> 16) & 0xff;
|
|
|
|
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
val = hw_cache_event_ids[cache_type][cache_op][cache_result];
|
|
|
|
|
|
|
|
if (val == 0)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
if (val == -1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
hwc->config |= val;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-07-21 15:56:48 +02:00
|
|
|
static void intel_pmu_enable_bts(u64 config)
|
|
|
|
{
|
|
|
|
unsigned long debugctlmsr;
|
|
|
|
|
|
|
|
debugctlmsr = get_debugctlmsr();
|
|
|
|
|
|
|
|
debugctlmsr |= X86_DEBUGCTL_TR;
|
|
|
|
debugctlmsr |= X86_DEBUGCTL_BTS;
|
|
|
|
debugctlmsr |= X86_DEBUGCTL_BTINT;
|
|
|
|
|
|
|
|
if (!(config & ARCH_PERFMON_EVENTSEL_OS))
|
|
|
|
debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
|
|
|
|
|
|
|
|
if (!(config & ARCH_PERFMON_EVENTSEL_USR))
|
|
|
|
debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
|
|
|
|
|
|
|
|
update_debugctlmsr(debugctlmsr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void intel_pmu_disable_bts(void)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
unsigned long debugctlmsr;
|
|
|
|
|
|
|
|
if (!cpuc->ds)
|
|
|
|
return;
|
|
|
|
|
|
|
|
debugctlmsr = get_debugctlmsr();
|
|
|
|
|
|
|
|
debugctlmsr &=
|
|
|
|
~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
|
|
|
|
X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
|
|
|
|
|
|
|
|
update_debugctlmsr(debugctlmsr);
|
|
|
|
}
|
|
|
|
|
2008-12-03 10:39:53 +01:00
|
|
|
/*
|
2009-06-02 19:22:16 +02:00
|
|
|
* Setup the hardware configuration for a given attr_type
|
2008-12-03 10:39:53 +01:00
|
|
|
*/
|
2008-12-11 12:46:46 +01:00
|
|
|
static int __hw_perf_counter_init(struct perf_counter *counter)
|
2008-12-03 10:39:53 +01:00
|
|
|
{
|
2009-06-02 19:22:16 +02:00
|
|
|
struct perf_counter_attr *attr = &counter->attr;
|
2008-12-03 10:39:53 +01:00
|
|
|
struct hw_perf_counter *hwc = &counter->hw;
|
2009-07-08 10:21:41 +02:00
|
|
|
u64 config;
|
2009-03-30 19:07:16 +02:00
|
|
|
int err;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-04-29 12:47:20 +02:00
|
|
|
if (!x86_pmu_initialized())
|
|
|
|
return -ENODEV;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-03-30 19:07:16 +02:00
|
|
|
err = 0;
|
2009-05-04 18:47:44 +02:00
|
|
|
if (!atomic_inc_not_zero(&active_counters)) {
|
2009-03-30 19:07:16 +02:00
|
|
|
mutex_lock(&pmc_reserve_mutex);
|
2009-07-21 15:56:48 +02:00
|
|
|
if (atomic_read(&active_counters) == 0) {
|
|
|
|
if (!reserve_pmc_hardware())
|
|
|
|
err = -EBUSY;
|
|
|
|
else
|
2009-09-02 16:04:46 +02:00
|
|
|
err = reserve_bts_hardware();
|
2009-07-21 15:56:48 +02:00
|
|
|
}
|
|
|
|
if (!err)
|
2009-05-04 18:47:44 +02:00
|
|
|
atomic_inc(&active_counters);
|
2009-03-30 19:07:16 +02:00
|
|
|
mutex_unlock(&pmc_reserve_mutex);
|
|
|
|
}
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2008-12-03 10:39:53 +01:00
|
|
|
/*
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 14:35:35 +11:00
|
|
|
* Generate PMC IRQs:
|
2008-12-03 10:39:53 +01:00
|
|
|
* (keep 'enabled' bit clear for now)
|
|
|
|
*/
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 14:35:35 +11:00
|
|
|
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
|
|
|
/*
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 14:35:35 +11:00
|
|
|
* Count user and OS events unless requested not to.
|
2008-12-03 10:39:53 +01:00
|
|
|
*/
|
2009-06-02 19:22:16 +02:00
|
|
|
if (!attr->exclude_user)
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 14:35:35 +11:00
|
|
|
hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
|
2009-06-02 19:22:16 +02:00
|
|
|
if (!attr->exclude_kernel)
|
2008-12-03 10:39:53 +01:00
|
|
|
hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 14:35:35 +11:00
|
|
|
|
2009-06-10 13:40:57 +02:00
|
|
|
if (!hwc->sample_period) {
|
2009-06-02 15:13:03 +02:00
|
|
|
hwc->sample_period = x86_pmu.max_period;
|
2009-06-10 21:34:59 +02:00
|
|
|
hwc->last_period = hwc->sample_period;
|
2009-06-10 13:40:57 +02:00
|
|
|
atomic64_set(&hwc->period_left, hwc->sample_period);
|
2009-08-11 10:40:08 +02:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* If we have a PMU initialized but no APIC
|
|
|
|
* interrupts, we cannot sample hardware
|
|
|
|
* counters (user-space has to fall back and
|
|
|
|
* sample via a hrtimer based software counter):
|
|
|
|
*/
|
|
|
|
if (!x86_pmu.apic)
|
|
|
|
return -EOPNOTSUPP;
|
2009-06-10 13:40:57 +02:00
|
|
|
}
|
2009-05-17 10:04:45 +02:00
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
counter->destroy = hw_perf_counter_destroy;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
|
|
|
/*
|
2008-12-08 19:35:37 +01:00
|
|
|
* Raw event type provide the config in the event structure
|
2008-12-03 10:39:53 +01:00
|
|
|
*/
|
2009-06-06 09:58:57 +02:00
|
|
|
if (attr->type == PERF_TYPE_RAW) {
|
|
|
|
hwc->config |= x86_pmu.raw_event(attr->config);
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
return 0;
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
if (attr->type == PERF_TYPE_HW_CACHE)
|
|
|
|
return set_ext_hw_attr(hwc, attr);
|
|
|
|
|
|
|
|
if (attr->config >= x86_pmu.max_events)
|
|
|
|
return -EINVAL;
|
2009-07-08 10:21:41 +02:00
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
/*
|
|
|
|
* The generic map:
|
|
|
|
*/
|
2009-07-08 10:21:41 +02:00
|
|
|
config = x86_pmu.event_map(attr->config);
|
|
|
|
|
|
|
|
if (config == 0)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
if (config == -1LL)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2009-09-02 16:04:46 +02:00
|
|
|
/*
|
|
|
|
* Branch tracing:
|
|
|
|
*/
|
|
|
|
if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
|
2009-09-02 16:04:48 +02:00
|
|
|
(hwc->sample_period == 1)) {
|
|
|
|
/* BTS is not supported by this architecture. */
|
|
|
|
if (!bts_available())
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
/* BTS is currently only allowed for user-mode. */
|
|
|
|
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
2009-09-02 16:04:46 +02:00
|
|
|
|
2009-07-08 10:21:41 +02:00
|
|
|
hwc->config |= config;
|
2009-03-30 19:07:16 +02:00
|
|
|
|
2008-12-03 10:39:53 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
static void p6_pmu_disable_all(void)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
2009-07-08 10:21:41 +02:00
|
|
|
u64 val;
|
2009-07-08 17:46:14 -04:00
|
|
|
|
|
|
|
if (!cpuc->enabled)
|
|
|
|
return;
|
|
|
|
|
|
|
|
cpuc->enabled = 0;
|
|
|
|
barrier();
|
|
|
|
|
|
|
|
/* p6 only has one enable register */
|
|
|
|
rdmsrl(MSR_P6_EVNTSEL0, val);
|
|
|
|
val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
|
|
|
|
wrmsrl(MSR_P6_EVNTSEL0, val);
|
|
|
|
}
|
|
|
|
|
2009-05-13 16:21:38 +02:00
|
|
|
static void intel_pmu_disable_all(void)
|
2008-12-09 21:43:39 +01:00
|
|
|
{
|
2009-07-21 15:56:48 +02:00
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
|
|
|
|
if (!cpuc->enabled)
|
|
|
|
return;
|
|
|
|
|
|
|
|
cpuc->enabled = 0;
|
|
|
|
barrier();
|
|
|
|
|
2008-12-17 13:09:20 +01:00
|
|
|
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
|
2009-07-21 15:56:48 +02:00
|
|
|
|
|
|
|
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
|
|
|
|
intel_pmu_disable_bts();
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
2009-02-27 18:09:09 +05:30
|
|
|
|
2009-05-13 16:21:38 +02:00
|
|
|
static void amd_pmu_disable_all(void)
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
{
|
2009-03-05 18:08:27 +01:00
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
2009-05-13 16:21:38 +02:00
|
|
|
int idx;
|
|
|
|
|
|
|
|
if (!cpuc->enabled)
|
|
|
|
return;
|
2009-03-05 18:08:27 +01:00
|
|
|
|
|
|
|
cpuc->enabled = 0;
|
2009-03-13 12:21:30 +01:00
|
|
|
/*
|
|
|
|
* ensure we write the disable before we start disabling the
|
2009-04-29 12:47:04 +02:00
|
|
|
* counters proper, so that amd_pmu_enable_counter() does the
|
|
|
|
* right thing.
|
2009-03-13 12:21:30 +01:00
|
|
|
*/
|
2009-03-05 18:08:27 +01:00
|
|
|
barrier();
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
2009-03-05 18:08:27 +01:00
|
|
|
u64 val;
|
|
|
|
|
2009-04-29 16:55:56 +02:00
|
|
|
if (!test_bit(idx, cpuc->active_mask))
|
2009-04-29 12:47:01 +02:00
|
|
|
continue;
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
|
2009-04-29 12:47:01 +02:00
|
|
|
if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
|
|
|
|
continue;
|
|
|
|
val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
|
|
|
|
wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-13 16:21:38 +02:00
|
|
|
void hw_perf_disable(void)
|
2009-02-27 18:09:09 +05:30
|
|
|
{
|
2009-04-29 12:47:20 +02:00
|
|
|
if (!x86_pmu_initialized())
|
2009-05-13 16:21:38 +02:00
|
|
|
return;
|
|
|
|
return x86_pmu.disable_all();
|
2009-02-27 18:09:09 +05:30
|
|
|
}
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
static void p6_pmu_enable_all(void)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
unsigned long val;
|
|
|
|
|
|
|
|
if (cpuc->enabled)
|
|
|
|
return;
|
|
|
|
|
|
|
|
cpuc->enabled = 1;
|
|
|
|
barrier();
|
|
|
|
|
|
|
|
/* p6 only has one enable register */
|
|
|
|
rdmsrl(MSR_P6_EVNTSEL0, val);
|
|
|
|
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
|
|
|
|
wrmsrl(MSR_P6_EVNTSEL0, val);
|
|
|
|
}
|
|
|
|
|
2009-05-13 16:21:38 +02:00
|
|
|
static void intel_pmu_enable_all(void)
|
2009-02-27 18:09:09 +05:30
|
|
|
{
|
2009-07-21 15:56:48 +02:00
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
|
|
|
|
if (cpuc->enabled)
|
|
|
|
return;
|
|
|
|
|
|
|
|
cpuc->enabled = 1;
|
|
|
|
barrier();
|
|
|
|
|
2009-05-13 16:21:38 +02:00
|
|
|
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
|
2009-07-21 15:56:48 +02:00
|
|
|
|
|
|
|
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
|
|
|
|
struct perf_counter *counter =
|
|
|
|
cpuc->counters[X86_PMC_IDX_FIXED_BTS];
|
|
|
|
|
|
|
|
if (WARN_ON_ONCE(!counter))
|
|
|
|
return;
|
|
|
|
|
|
|
|
intel_pmu_enable_bts(counter->hw.config);
|
|
|
|
}
|
2009-02-27 18:09:09 +05:30
|
|
|
}
|
|
|
|
|
2009-05-13 16:21:38 +02:00
|
|
|
static void amd_pmu_enable_all(void)
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
{
|
2009-03-05 18:08:27 +01:00
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
int idx;
|
|
|
|
|
2009-05-13 16:21:38 +02:00
|
|
|
if (cpuc->enabled)
|
2009-03-05 18:08:27 +01:00
|
|
|
return;
|
|
|
|
|
2009-05-13 16:21:38 +02:00
|
|
|
cpuc->enabled = 1;
|
|
|
|
barrier();
|
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
2009-07-10 09:59:56 +02:00
|
|
|
struct perf_counter *counter = cpuc->counters[idx];
|
2009-04-29 12:47:01 +02:00
|
|
|
u64 val;
|
2009-03-05 18:08:27 +01:00
|
|
|
|
2009-04-29 16:55:56 +02:00
|
|
|
if (!test_bit(idx, cpuc->active_mask))
|
2009-04-29 12:47:01 +02:00
|
|
|
continue;
|
2009-07-10 09:59:56 +02:00
|
|
|
|
|
|
|
val = counter->hw.config;
|
2009-04-29 12:47:01 +02:00
|
|
|
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
|
|
|
|
wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-13 16:21:38 +02:00
|
|
|
void hw_perf_enable(void)
|
2008-12-13 09:00:03 +01:00
|
|
|
{
|
2009-04-29 12:47:20 +02:00
|
|
|
if (!x86_pmu_initialized())
|
2008-12-14 18:36:30 +01:00
|
|
|
return;
|
2009-05-13 16:21:38 +02:00
|
|
|
x86_pmu.enable_all();
|
2008-12-13 09:00:03 +01:00
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:25 +02:00
|
|
|
static inline u64 intel_pmu_get_status(void)
|
2009-03-05 18:08:27 +01:00
|
|
|
{
|
|
|
|
u64 status;
|
|
|
|
|
2009-04-29 12:47:06 +02:00
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
|
2009-03-05 18:08:27 +01:00
|
|
|
|
2009-04-29 12:47:06 +02:00
|
|
|
return status;
|
2009-03-05 18:08:27 +01:00
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:07 +02:00
|
|
|
static inline void intel_pmu_ack_status(u64 ack)
|
2009-03-05 18:08:27 +01:00
|
|
|
{
|
|
|
|
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
|
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:18 +02:00
|
|
|
static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
|
2009-03-05 18:08:27 +01:00
|
|
|
{
|
2009-07-08 17:46:14 -04:00
|
|
|
(void)checking_wrmsrl(hwc->config_base + idx,
|
2009-04-29 12:47:18 +02:00
|
|
|
hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
|
2009-03-05 18:08:27 +01:00
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:19 +02:00
|
|
|
static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
|
2009-03-05 18:08:27 +01:00
|
|
|
{
|
2009-07-08 17:46:14 -04:00
|
|
|
(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
|
2009-03-05 18:08:27 +01:00
|
|
|
}
|
|
|
|
|
2008-12-22 11:10:42 +01:00
|
|
|
static inline void
|
2009-04-29 12:47:19 +02:00
|
|
|
intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
|
2008-12-22 11:10:42 +01:00
|
|
|
{
|
|
|
|
int idx = __idx - X86_PMC_IDX_FIXED;
|
|
|
|
u64 ctrl_val, mask;
|
|
|
|
|
|
|
|
mask = 0xfULL << (idx * 4);
|
|
|
|
|
|
|
|
rdmsrl(hwc->config_base, ctrl_val);
|
|
|
|
ctrl_val &= ~mask;
|
2009-07-08 17:46:14 -04:00
|
|
|
(void)checking_wrmsrl(hwc->config_base, ctrl_val);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
2009-07-08 10:21:41 +02:00
|
|
|
u64 val = P6_NOP_COUNTER;
|
2009-07-08 17:46:14 -04:00
|
|
|
|
2009-07-08 10:21:41 +02:00
|
|
|
if (cpuc->enabled)
|
|
|
|
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
|
2009-07-08 17:46:14 -04:00
|
|
|
|
|
|
|
(void)checking_wrmsrl(hwc->config_base + idx, val);
|
2008-12-22 11:10:42 +01:00
|
|
|
}
|
|
|
|
|
2008-12-09 11:40:46 +01:00
|
|
|
static inline void
|
2009-04-29 12:47:19 +02:00
|
|
|
intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
|
2008-12-09 11:40:46 +01:00
|
|
|
{
|
2009-07-21 15:56:48 +02:00
|
|
|
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
|
|
|
|
intel_pmu_disable_bts();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:19 +02:00
|
|
|
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
|
|
|
|
intel_pmu_disable_fixed(hwc, idx);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
x86_pmu_disable_counter(hwc, idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
|
|
|
|
{
|
|
|
|
x86_pmu_disable_counter(hwc, idx);
|
2008-12-09 11:40:46 +01:00
|
|
|
}
|
|
|
|
|
2008-12-22 11:10:42 +01:00
|
|
|
static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2008-12-13 09:00:03 +01:00
|
|
|
/*
|
|
|
|
* Set the next IRQ period, based on the hwc->period_left value.
|
|
|
|
* To be called with the counter disabled in hw:
|
|
|
|
*/
|
2009-06-02 16:08:20 +02:00
|
|
|
static int
|
2009-04-29 12:47:08 +02:00
|
|
|
x86_perf_counter_set_period(struct perf_counter *counter,
|
2008-12-13 09:00:03 +01:00
|
|
|
struct hw_perf_counter *hwc, int idx)
|
2008-12-03 10:39:53 +01:00
|
|
|
{
|
2008-12-22 11:10:42 +01:00
|
|
|
s64 left = atomic64_read(&hwc->period_left);
|
2009-06-02 16:08:20 +02:00
|
|
|
s64 period = hwc->sample_period;
|
|
|
|
int err, ret = 0;
|
2008-12-13 09:00:03 +01:00
|
|
|
|
2009-07-21 15:56:48 +02:00
|
|
|
if (idx == X86_PMC_IDX_FIXED_BTS)
|
|
|
|
return 0;
|
|
|
|
|
2008-12-13 09:00:03 +01:00
|
|
|
/*
|
|
|
|
* If we are way outside a reasoable range then just skip forward:
|
|
|
|
*/
|
|
|
|
if (unlikely(left <= -period)) {
|
|
|
|
left = period;
|
|
|
|
atomic64_set(&hwc->period_left, left);
|
2009-06-10 21:34:59 +02:00
|
|
|
hwc->last_period = period;
|
2009-06-02 16:08:20 +02:00
|
|
|
ret = 1;
|
2008-12-13 09:00:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(left <= 0)) {
|
|
|
|
left += period;
|
|
|
|
atomic64_set(&hwc->period_left, left);
|
2009-06-10 21:34:59 +02:00
|
|
|
hwc->last_period = period;
|
2009-06-02 16:08:20 +02:00
|
|
|
ret = 1;
|
2008-12-13 09:00:03 +01:00
|
|
|
}
|
2009-05-15 08:25:22 +02:00
|
|
|
/*
|
|
|
|
* Quirk: certain CPUs dont like it if just 1 event is left:
|
|
|
|
*/
|
|
|
|
if (unlikely(left < 2))
|
|
|
|
left = 2;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-06-02 16:08:20 +02:00
|
|
|
if (left > x86_pmu.max_period)
|
|
|
|
left = x86_pmu.max_period;
|
|
|
|
|
2008-12-13 09:00:03 +01:00
|
|
|
per_cpu(prev_left[idx], smp_processor_id()) = left;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The hw counter starts counting from this counter offset,
|
|
|
|
* mark it to be able to extra future deltas:
|
|
|
|
*/
|
2008-12-22 11:10:42 +01:00
|
|
|
atomic64_set(&hwc->prev_count, (u64)-left);
|
2008-12-13 09:00:03 +01:00
|
|
|
|
2008-12-22 11:10:42 +01:00
|
|
|
err = checking_wrmsrl(hwc->counter_base + idx,
|
2009-04-29 12:47:12 +02:00
|
|
|
(u64)(-left) & x86_pmu.counter_mask);
|
2009-06-02 16:08:20 +02:00
|
|
|
|
2009-06-22 16:35:24 +02:00
|
|
|
perf_counter_update_userpage(counter);
|
|
|
|
|
2009-06-02 16:08:20 +02:00
|
|
|
return ret;
|
2008-12-22 11:10:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
2009-04-29 12:47:18 +02:00
|
|
|
intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
|
2008-12-22 11:10:42 +01:00
|
|
|
{
|
|
|
|
int idx = __idx - X86_PMC_IDX_FIXED;
|
|
|
|
u64 ctrl_val, bits, mask;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/*
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 14:35:35 +11:00
|
|
|
* Enable IRQ generation (0x8),
|
|
|
|
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
|
|
|
|
* if requested:
|
2008-12-22 11:10:42 +01:00
|
|
|
*/
|
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-02-11 14:35:35 +11:00
|
|
|
bits = 0x8ULL;
|
|
|
|
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
|
|
|
|
bits |= 0x2;
|
2008-12-22 11:10:42 +01:00
|
|
|
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
|
|
|
|
bits |= 0x1;
|
|
|
|
bits <<= (idx * 4);
|
|
|
|
mask = 0xfULL << (idx * 4);
|
|
|
|
|
|
|
|
rdmsrl(hwc->config_base, ctrl_val);
|
|
|
|
ctrl_val &= ~mask;
|
|
|
|
ctrl_val |= bits;
|
|
|
|
err = checking_wrmsrl(hwc->config_base, ctrl_val);
|
2008-12-09 11:40:46 +01:00
|
|
|
}
|
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
2009-07-10 09:59:56 +02:00
|
|
|
u64 val;
|
2009-07-08 17:46:14 -04:00
|
|
|
|
2009-07-10 09:59:56 +02:00
|
|
|
val = hwc->config;
|
2009-07-08 17:46:14 -04:00
|
|
|
if (cpuc->enabled)
|
2009-07-10 09:59:56 +02:00
|
|
|
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
|
|
|
|
|
|
|
|
(void)checking_wrmsrl(hwc->config_base + idx, val);
|
2009-07-08 17:46:14 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-04-29 12:47:18 +02:00
|
|
|
static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
|
2008-12-09 11:40:46 +01:00
|
|
|
{
|
2009-07-21 15:56:48 +02:00
|
|
|
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
|
|
|
|
if (!__get_cpu_var(cpu_hw_counters).enabled)
|
|
|
|
return;
|
|
|
|
|
|
|
|
intel_pmu_enable_bts(hwc->config);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:18 +02:00
|
|
|
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
|
|
|
|
intel_pmu_enable_fixed(hwc, idx);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
x86_pmu_enable_counter(hwc, idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
|
|
|
|
if (cpuc->enabled)
|
|
|
|
x86_pmu_enable_counter(hwc, idx);
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
|
|
|
|
2008-12-22 11:10:42 +01:00
|
|
|
static int
|
|
|
|
fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
|
2008-12-17 13:09:20 +01:00
|
|
|
{
|
2008-12-22 11:10:42 +01:00
|
|
|
unsigned int event;
|
|
|
|
|
2009-07-21 15:56:48 +02:00
|
|
|
event = hwc->config & ARCH_PERFMON_EVENT_MASK;
|
|
|
|
|
|
|
|
if (unlikely((event ==
|
|
|
|
x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
|
|
|
|
(hwc->sample_period == 1)))
|
|
|
|
return X86_PMC_IDX_FIXED_BTS;
|
|
|
|
|
2009-04-29 12:47:24 +02:00
|
|
|
if (!x86_pmu.num_counters_fixed)
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
return -1;
|
|
|
|
|
2009-06-11 14:06:28 +02:00
|
|
|
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
|
2008-12-22 11:10:42 +01:00
|
|
|
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
|
2009-06-11 14:06:28 +02:00
|
|
|
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
|
2008-12-22 11:10:42 +01:00
|
|
|
return X86_PMC_IDX_FIXED_CPU_CYCLES;
|
2009-06-11 14:06:28 +02:00
|
|
|
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
|
2008-12-22 11:10:42 +01:00
|
|
|
return X86_PMC_IDX_FIXED_BUS_CYCLES;
|
|
|
|
|
2008-12-17 13:09:20 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2008-12-13 09:00:03 +01:00
|
|
|
/*
|
|
|
|
* Find a PMC slot for the freshly enabled / scheduled in counter:
|
|
|
|
*/
|
2009-04-29 12:47:03 +02:00
|
|
|
static int x86_pmu_enable(struct perf_counter *counter)
|
2008-12-03 10:39:53 +01:00
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
struct hw_perf_counter *hwc = &counter->hw;
|
2008-12-22 11:10:42 +01:00
|
|
|
int idx;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2008-12-22 11:10:42 +01:00
|
|
|
idx = fixed_mode_idx(counter, hwc);
|
2009-07-21 15:56:48 +02:00
|
|
|
if (idx == X86_PMC_IDX_FIXED_BTS) {
|
2009-09-02 16:04:46 +02:00
|
|
|
/* BTS is already occupied. */
|
2009-07-21 15:56:48 +02:00
|
|
|
if (test_and_set_bit(idx, cpuc->used_mask))
|
2009-09-02 16:04:46 +02:00
|
|
|
return -EAGAIN;
|
2009-07-21 15:56:48 +02:00
|
|
|
|
|
|
|
hwc->config_base = 0;
|
|
|
|
hwc->counter_base = 0;
|
|
|
|
hwc->idx = idx;
|
|
|
|
} else if (idx >= 0) {
|
2008-12-22 11:10:42 +01:00
|
|
|
/*
|
|
|
|
* Try to get the fixed counter, if that is already taken
|
|
|
|
* then try to get a generic counter:
|
|
|
|
*/
|
2009-04-29 16:55:56 +02:00
|
|
|
if (test_and_set_bit(idx, cpuc->used_mask))
|
2008-12-22 11:10:42 +01:00
|
|
|
goto try_generic;
|
2008-12-23 12:28:12 +01:00
|
|
|
|
2008-12-22 11:10:42 +01:00
|
|
|
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
|
|
|
|
/*
|
|
|
|
* We set it so that counter_base + idx in wrmsr/rdmsr maps to
|
|
|
|
* MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
|
|
|
|
*/
|
|
|
|
hwc->counter_base =
|
|
|
|
MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
|
2008-12-03 10:39:53 +01:00
|
|
|
hwc->idx = idx;
|
2008-12-22 11:10:42 +01:00
|
|
|
} else {
|
|
|
|
idx = hwc->idx;
|
|
|
|
/* Try to get the previous generic counter again */
|
2009-04-29 16:55:56 +02:00
|
|
|
if (test_and_set_bit(idx, cpuc->used_mask)) {
|
2008-12-22 11:10:42 +01:00
|
|
|
try_generic:
|
2009-04-29 16:55:56 +02:00
|
|
|
idx = find_first_zero_bit(cpuc->used_mask,
|
2009-04-29 12:47:12 +02:00
|
|
|
x86_pmu.num_counters);
|
|
|
|
if (idx == x86_pmu.num_counters)
|
2008-12-22 11:10:42 +01:00
|
|
|
return -EAGAIN;
|
|
|
|
|
2009-04-29 16:55:56 +02:00
|
|
|
set_bit(idx, cpuc->used_mask);
|
2008-12-22 11:10:42 +01:00
|
|
|
hwc->idx = idx;
|
|
|
|
}
|
2009-04-29 12:47:11 +02:00
|
|
|
hwc->config_base = x86_pmu.eventsel;
|
|
|
|
hwc->counter_base = x86_pmu.perfctr;
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
|
|
|
|
2009-05-29 13:28:35 +08:00
|
|
|
perf_counters_lapic_init();
|
2009-05-25 21:41:28 +02:00
|
|
|
|
2009-04-29 12:47:19 +02:00
|
|
|
x86_pmu.disable(hwc, idx);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2008-12-17 13:09:20 +01:00
|
|
|
cpuc->counters[idx] = counter;
|
2009-04-29 16:55:56 +02:00
|
|
|
set_bit(idx, cpuc->active_mask);
|
2008-12-09 11:40:46 +01:00
|
|
|
|
2009-04-29 12:47:08 +02:00
|
|
|
x86_perf_counter_set_period(counter, hwc, idx);
|
2009-04-29 12:47:18 +02:00
|
|
|
x86_pmu.enable(hwc, idx);
|
2008-12-21 13:50:42 +01:00
|
|
|
|
2009-06-22 16:35:24 +02:00
|
|
|
perf_counter_update_userpage(counter);
|
|
|
|
|
2008-12-21 13:50:42 +01:00
|
|
|
return 0;
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
|
|
|
|
2009-05-25 17:39:05 +02:00
|
|
|
static void x86_pmu_unthrottle(struct perf_counter *counter)
|
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
struct hw_perf_counter *hwc = &counter->hw;
|
|
|
|
|
|
|
|
if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
|
|
|
|
cpuc->counters[hwc->idx] != counter))
|
|
|
|
return;
|
|
|
|
|
|
|
|
x86_pmu.enable(hwc, hwc->idx);
|
|
|
|
}
|
|
|
|
|
2008-12-03 10:39:53 +01:00
|
|
|
void perf_counter_print_debug(void)
|
|
|
|
{
|
2008-12-22 11:10:42 +01:00
|
|
|
u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
|
2008-12-23 12:28:12 +01:00
|
|
|
struct cpu_hw_counters *cpuc;
|
2009-05-13 08:12:51 +02:00
|
|
|
unsigned long flags;
|
2008-12-09 12:18:18 +01:00
|
|
|
int cpu, idx;
|
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
if (!x86_pmu.num_counters)
|
2008-12-09 12:18:18 +01:00
|
|
|
return;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-05-13 08:12:51 +02:00
|
|
|
local_irq_save(flags);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
|
|
|
cpu = smp_processor_id();
|
2008-12-23 12:28:12 +01:00
|
|
|
cpuc = &per_cpu(cpu_hw_counters, cpu);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-04-29 12:47:13 +02:00
|
|
|
if (x86_pmu.version >= 2) {
|
2009-02-28 18:45:39 +05:30
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
|
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
|
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
|
|
|
|
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
|
|
|
|
|
|
|
|
pr_info("\n");
|
|
|
|
pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
|
|
|
|
pr_info("CPU#%d: status: %016llx\n", cpu, status);
|
|
|
|
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
|
|
|
|
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
}
|
2009-04-29 16:55:56 +02:00
|
|
|
pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
2009-04-29 12:47:11 +02:00
|
|
|
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
|
|
|
|
rdmsrl(x86_pmu.perfctr + idx, pmc_count);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2008-12-13 09:00:03 +01:00
|
|
|
prev_left = per_cpu(prev_left[idx], cpu);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-02-28 18:45:39 +05:30
|
|
|
pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
|
2008-12-03 10:39:53 +01:00
|
|
|
cpu, idx, pmc_ctrl);
|
2009-02-28 18:45:39 +05:30
|
|
|
pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
|
2008-12-03 10:39:53 +01:00
|
|
|
cpu, idx, pmc_count);
|
2009-02-28 18:45:39 +05:30
|
|
|
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
|
2008-12-13 09:00:03 +01:00
|
|
|
cpu, idx, prev_left);
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
2009-04-29 12:47:12 +02:00
|
|
|
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
|
2008-12-22 11:10:42 +01:00
|
|
|
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
|
|
|
|
|
2009-02-28 18:45:39 +05:30
|
|
|
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
|
2008-12-22 11:10:42 +01:00
|
|
|
cpu, idx, pmc_count);
|
|
|
|
}
|
2009-05-13 08:12:51 +02:00
|
|
|
local_irq_restore(flags);
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
|
|
|
|
2009-07-21 15:56:48 +02:00
|
|
|
static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
|
|
|
|
struct perf_sample_data *data)
|
|
|
|
{
|
|
|
|
struct debug_store *ds = cpuc->ds;
|
|
|
|
struct bts_record {
|
|
|
|
u64 from;
|
|
|
|
u64 to;
|
|
|
|
u64 flags;
|
|
|
|
};
|
|
|
|
struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
|
|
|
|
unsigned long orig_ip = data->regs->ip;
|
2009-09-02 16:04:47 +02:00
|
|
|
struct bts_record *at, *top;
|
2009-07-21 15:56:48 +02:00
|
|
|
|
|
|
|
if (!counter)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!ds)
|
|
|
|
return;
|
|
|
|
|
2009-09-02 16:04:47 +02:00
|
|
|
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
|
|
|
|
top = (struct bts_record *)(unsigned long)ds->bts_index;
|
2009-07-21 15:56:48 +02:00
|
|
|
|
2009-09-02 16:04:47 +02:00
|
|
|
ds->bts_index = ds->bts_buffer_base;
|
|
|
|
|
|
|
|
for (; at < top; at++) {
|
|
|
|
data->regs->ip = at->from;
|
|
|
|
data->addr = at->to;
|
2009-07-21 15:56:48 +02:00
|
|
|
|
|
|
|
perf_counter_output(counter, 1, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
data->regs->ip = orig_ip;
|
|
|
|
data->addr = 0;
|
|
|
|
|
|
|
|
/* There's new data available. */
|
|
|
|
counter->pending_kill = POLL_IN;
|
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:03 +02:00
|
|
|
static void x86_pmu_disable(struct perf_counter *counter)
|
2008-12-03 10:39:53 +01:00
|
|
|
{
|
|
|
|
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
struct hw_perf_counter *hwc = &counter->hw;
|
2009-04-29 12:47:17 +02:00
|
|
|
int idx = hwc->idx;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-04-29 12:47:16 +02:00
|
|
|
/*
|
|
|
|
* Must be done before we disable, otherwise the nmi handler
|
|
|
|
* could reenable again:
|
|
|
|
*/
|
2009-04-29 16:55:56 +02:00
|
|
|
clear_bit(idx, cpuc->active_mask);
|
2009-04-29 12:47:19 +02:00
|
|
|
x86_pmu.disable(hwc, idx);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2008-12-22 11:10:42 +01:00
|
|
|
/*
|
|
|
|
* Make sure the cleared pointer becomes visible before we
|
|
|
|
* (potentially) free the counter:
|
|
|
|
*/
|
2009-04-29 12:47:02 +02:00
|
|
|
barrier();
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2008-12-13 09:00:03 +01:00
|
|
|
/*
|
|
|
|
* Drain the remaining delta count out of a counter
|
|
|
|
* that we are disabling:
|
|
|
|
*/
|
|
|
|
x86_perf_counter_update(counter, hwc, idx);
|
2009-07-21 15:56:48 +02:00
|
|
|
|
|
|
|
/* Drain the remaining BTS records. */
|
|
|
|
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
|
|
|
|
struct perf_sample_data data;
|
|
|
|
struct pt_regs regs;
|
|
|
|
|
|
|
|
data.regs = ®s;
|
|
|
|
intel_pmu_drain_bts_buffer(cpuc, &data);
|
|
|
|
}
|
2009-04-29 12:47:16 +02:00
|
|
|
cpuc->counters[idx] = NULL;
|
2009-04-29 16:55:56 +02:00
|
|
|
clear_bit(idx, cpuc->used_mask);
|
2009-06-22 16:35:24 +02:00
|
|
|
|
|
|
|
perf_counter_update_userpage(counter);
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
|
|
|
|
2008-12-09 11:40:46 +01:00
|
|
|
/*
|
2008-12-13 09:00:03 +01:00
|
|
|
* Save and restart an expired counter. Called by NMI contexts,
|
|
|
|
* so it has to be careful about preempting normal counter ops:
|
2008-12-09 11:40:46 +01:00
|
|
|
*/
|
2009-06-02 16:08:20 +02:00
|
|
|
static int intel_pmu_save_and_restart(struct perf_counter *counter)
|
2008-12-03 10:39:53 +01:00
|
|
|
{
|
|
|
|
struct hw_perf_counter *hwc = &counter->hw;
|
|
|
|
int idx = hwc->idx;
|
2009-06-02 16:08:20 +02:00
|
|
|
int ret;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2008-12-13 09:00:03 +01:00
|
|
|
x86_perf_counter_update(counter, hwc, idx);
|
2009-06-02 16:08:20 +02:00
|
|
|
ret = x86_perf_counter_set_period(counter, hwc, idx);
|
2008-12-09 11:40:46 +01:00
|
|
|
|
2008-12-22 11:10:42 +01:00
|
|
|
if (counter->state == PERF_COUNTER_STATE_ACTIVE)
|
2009-04-29 12:47:18 +02:00
|
|
|
intel_pmu_enable_counter(hwc, idx);
|
2009-06-02 16:08:20 +02:00
|
|
|
|
|
|
|
return ret;
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
|
|
|
|
2009-05-26 08:10:00 +02:00
|
|
|
static void intel_pmu_reset(void)
|
|
|
|
{
|
2009-07-21 15:56:48 +02:00
|
|
|
struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds;
|
2009-05-26 08:10:00 +02:00
|
|
|
unsigned long flags;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
if (!x86_pmu.num_counters)
|
|
|
|
return;
|
|
|
|
|
|
|
|
local_irq_save(flags);
|
|
|
|
|
|
|
|
printk("clearing PMU state on CPU#%d\n", smp_processor_id());
|
|
|
|
|
|
|
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
|
|
|
checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
|
|
|
|
checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
|
|
|
|
}
|
|
|
|
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
|
|
|
|
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
|
|
|
|
}
|
2009-07-21 15:56:48 +02:00
|
|
|
if (ds)
|
|
|
|
ds->bts_index = ds->bts_buffer_base;
|
2009-05-26 08:10:00 +02:00
|
|
|
|
|
|
|
local_irq_restore(flags);
|
|
|
|
}
|
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
static int p6_pmu_handle_irq(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
struct perf_sample_data data;
|
|
|
|
struct cpu_hw_counters *cpuc;
|
|
|
|
struct perf_counter *counter;
|
|
|
|
struct hw_perf_counter *hwc;
|
|
|
|
int idx, handled = 0;
|
|
|
|
u64 val;
|
|
|
|
|
|
|
|
data.regs = regs;
|
|
|
|
data.addr = 0;
|
|
|
|
|
|
|
|
cpuc = &__get_cpu_var(cpu_hw_counters);
|
|
|
|
|
|
|
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
|
|
|
if (!test_bit(idx, cpuc->active_mask))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
counter = cpuc->counters[idx];
|
|
|
|
hwc = &counter->hw;
|
|
|
|
|
|
|
|
val = x86_perf_counter_update(counter, hwc, idx);
|
|
|
|
if (val & (1ULL << (x86_pmu.counter_bits - 1)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* counter overflow
|
|
|
|
*/
|
|
|
|
handled = 1;
|
|
|
|
data.period = counter->hw.last_period;
|
|
|
|
|
|
|
|
if (!x86_perf_counter_set_period(counter, hwc, idx))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (perf_counter_overflow(counter, 1, &data))
|
|
|
|
p6_pmu_disable_counter(hwc, idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (handled)
|
|
|
|
inc_irq_stat(apic_perf_irqs);
|
|
|
|
|
|
|
|
return handled;
|
|
|
|
}
|
2009-05-26 08:10:00 +02:00
|
|
|
|
2008-12-03 10:39:53 +01:00
|
|
|
/*
|
|
|
|
* This handler is triggered by the local APIC, so the APIC IRQ handling
|
|
|
|
* rules apply:
|
|
|
|
*/
|
2009-06-03 13:12:55 +08:00
|
|
|
static int intel_pmu_handle_irq(struct pt_regs *regs)
|
2008-12-03 10:39:53 +01:00
|
|
|
{
|
2009-06-10 21:02:22 +02:00
|
|
|
struct perf_sample_data data;
|
2009-05-15 08:26:20 +02:00
|
|
|
struct cpu_hw_counters *cpuc;
|
2009-07-08 17:46:14 -04:00
|
|
|
int bit, loops;
|
2009-01-23 14:36:16 +01:00
|
|
|
u64 ack, status;
|
2009-05-15 08:26:20 +02:00
|
|
|
|
2009-06-10 21:02:22 +02:00
|
|
|
data.regs = regs;
|
|
|
|
data.addr = 0;
|
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
cpuc = &__get_cpu_var(cpu_hw_counters);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-05-13 16:21:38 +02:00
|
|
|
perf_disable();
|
2009-07-21 15:56:48 +02:00
|
|
|
intel_pmu_drain_bts_buffer(cpuc, &data);
|
2009-04-29 12:47:25 +02:00
|
|
|
status = intel_pmu_get_status();
|
2009-05-13 16:21:38 +02:00
|
|
|
if (!status) {
|
|
|
|
perf_enable();
|
|
|
|
return 0;
|
|
|
|
}
|
2008-12-08 14:20:16 +01:00
|
|
|
|
2009-05-15 08:26:20 +02:00
|
|
|
loops = 0;
|
2008-12-03 10:39:53 +01:00
|
|
|
again:
|
2009-05-15 08:26:20 +02:00
|
|
|
if (++loops > 100) {
|
|
|
|
WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
|
2009-05-20 20:13:28 +02:00
|
|
|
perf_counter_print_debug();
|
2009-05-26 08:10:00 +02:00
|
|
|
intel_pmu_reset();
|
|
|
|
perf_enable();
|
2009-05-15 08:26:20 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2009-02-09 07:38:50 +01:00
|
|
|
inc_irq_stat(apic_perf_irqs);
|
2008-12-03 10:39:53 +01:00
|
|
|
ack = status;
|
2008-12-22 11:10:42 +01:00
|
|
|
for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
|
2008-12-17 13:09:20 +01:00
|
|
|
struct perf_counter *counter = cpuc->counters[bit];
|
2008-12-03 10:39:53 +01:00
|
|
|
|
|
|
|
clear_bit(bit, (unsigned long *) &status);
|
2009-04-29 16:55:56 +02:00
|
|
|
if (!test_bit(bit, cpuc->active_mask))
|
2008-12-03 10:39:53 +01:00
|
|
|
continue;
|
|
|
|
|
2009-06-02 16:08:20 +02:00
|
|
|
if (!intel_pmu_save_and_restart(counter))
|
|
|
|
continue;
|
|
|
|
|
2009-06-15 19:00:20 +02:00
|
|
|
data.period = counter->hw.last_period;
|
|
|
|
|
2009-06-10 21:02:22 +02:00
|
|
|
if (perf_counter_overflow(counter, 1, &data))
|
2009-04-29 12:47:19 +02:00
|
|
|
intel_pmu_disable_counter(&counter->hw, bit);
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:07 +02:00
|
|
|
intel_pmu_ack_status(ack);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Repeat if there is more work to be done:
|
|
|
|
*/
|
2009-04-29 12:47:25 +02:00
|
|
|
status = intel_pmu_get_status();
|
2008-12-03 10:39:53 +01:00
|
|
|
if (status)
|
|
|
|
goto again;
|
2009-03-05 18:08:27 +01:00
|
|
|
|
2009-05-25 17:39:04 +02:00
|
|
|
perf_enable();
|
2009-05-13 16:21:38 +02:00
|
|
|
|
|
|
|
return 1;
|
2009-01-23 10:13:01 +01:00
|
|
|
}
|
|
|
|
|
2009-06-03 13:12:55 +08:00
|
|
|
static int amd_pmu_handle_irq(struct pt_regs *regs)
|
2009-04-29 12:47:21 +02:00
|
|
|
{
|
2009-06-10 21:02:22 +02:00
|
|
|
struct perf_sample_data data;
|
2009-05-15 08:26:20 +02:00
|
|
|
struct cpu_hw_counters *cpuc;
|
2009-04-29 12:47:21 +02:00
|
|
|
struct perf_counter *counter;
|
|
|
|
struct hw_perf_counter *hwc;
|
2009-07-08 17:46:14 -04:00
|
|
|
int idx, handled = 0;
|
2009-05-15 08:26:20 +02:00
|
|
|
u64 val;
|
|
|
|
|
2009-06-10 21:02:22 +02:00
|
|
|
data.regs = regs;
|
|
|
|
data.addr = 0;
|
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
cpuc = &__get_cpu_var(cpu_hw_counters);
|
2009-05-13 13:21:36 +02:00
|
|
|
|
2009-04-29 12:47:21 +02:00
|
|
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
2009-04-29 16:55:56 +02:00
|
|
|
if (!test_bit(idx, cpuc->active_mask))
|
2009-04-29 12:47:21 +02:00
|
|
|
continue;
|
2009-05-13 13:21:36 +02:00
|
|
|
|
2009-04-29 12:47:21 +02:00
|
|
|
counter = cpuc->counters[idx];
|
|
|
|
hwc = &counter->hw;
|
2009-05-14 14:52:17 +02:00
|
|
|
|
2009-04-29 12:47:22 +02:00
|
|
|
val = x86_perf_counter_update(counter, hwc, idx);
|
2009-04-29 12:47:21 +02:00
|
|
|
if (val & (1ULL << (x86_pmu.counter_bits - 1)))
|
2009-05-25 17:39:04 +02:00
|
|
|
continue;
|
2009-05-13 13:21:36 +02:00
|
|
|
|
2009-06-10 21:34:59 +02:00
|
|
|
/*
|
|
|
|
* counter overflow
|
|
|
|
*/
|
|
|
|
handled = 1;
|
|
|
|
data.period = counter->hw.last_period;
|
|
|
|
|
2009-06-02 16:08:20 +02:00
|
|
|
if (!x86_perf_counter_set_period(counter, hwc, idx))
|
|
|
|
continue;
|
|
|
|
|
2009-06-10 21:02:22 +02:00
|
|
|
if (perf_counter_overflow(counter, 1, &data))
|
2009-04-29 12:47:21 +02:00
|
|
|
amd_pmu_disable_counter(hwc, idx);
|
|
|
|
}
|
2009-05-13 13:21:36 +02:00
|
|
|
|
2009-06-10 21:34:59 +02:00
|
|
|
if (handled)
|
|
|
|
inc_irq_stat(apic_perf_irqs);
|
|
|
|
|
2009-04-29 12:47:21 +02:00
|
|
|
return handled;
|
|
|
|
}
|
2009-04-29 12:47:05 +02:00
|
|
|
|
2009-04-06 11:45:03 +02:00
|
|
|
void smp_perf_pending_interrupt(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
irq_enter();
|
|
|
|
ack_APIC_irq();
|
|
|
|
inc_irq_stat(apic_pending_irqs);
|
|
|
|
perf_counter_do_pending();
|
|
|
|
irq_exit();
|
|
|
|
}
|
|
|
|
|
|
|
|
void set_perf_counter_pending(void)
|
|
|
|
{
|
2009-08-11 10:40:08 +02:00
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
2009-04-06 11:45:03 +02:00
|
|
|
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
|
2009-08-11 10:40:08 +02:00
|
|
|
#endif
|
2009-04-06 11:45:03 +02:00
|
|
|
}
|
|
|
|
|
2009-05-29 13:28:35 +08:00
|
|
|
void perf_counters_lapic_init(void)
|
2008-12-03 10:39:53 +01:00
|
|
|
{
|
2009-08-11 10:40:08 +02:00
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
|
|
if (!x86_pmu.apic || !x86_pmu_initialized())
|
2008-12-03 10:39:53 +01:00
|
|
|
return;
|
2009-04-29 12:47:20 +02:00
|
|
|
|
2008-12-03 10:39:53 +01:00
|
|
|
/*
|
2009-05-29 13:28:35 +08:00
|
|
|
* Always use NMI for PMU
|
2008-12-03 10:39:53 +01:00
|
|
|
*/
|
2009-05-29 13:28:35 +08:00
|
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
2009-08-11 10:40:08 +02:00
|
|
|
#endif
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __kprobes
|
|
|
|
perf_counter_nmi_handler(struct notifier_block *self,
|
|
|
|
unsigned long cmd, void *__args)
|
|
|
|
{
|
|
|
|
struct die_args *args = __args;
|
|
|
|
struct pt_regs *regs;
|
2009-03-05 18:08:27 +01:00
|
|
|
|
2009-05-04 18:47:44 +02:00
|
|
|
if (!atomic_read(&active_counters))
|
2009-05-01 12:23:17 +02:00
|
|
|
return NOTIFY_DONE;
|
|
|
|
|
2009-03-05 18:08:27 +01:00
|
|
|
switch (cmd) {
|
|
|
|
case DIE_NMI:
|
|
|
|
case DIE_NMI_IPI:
|
|
|
|
break;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-03-05 18:08:27 +01:00
|
|
|
default:
|
2008-12-03 10:39:53 +01:00
|
|
|
return NOTIFY_DONE;
|
2009-03-05 18:08:27 +01:00
|
|
|
}
|
2008-12-03 10:39:53 +01:00
|
|
|
|
|
|
|
regs = args->regs;
|
|
|
|
|
2009-08-11 10:40:08 +02:00
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
2008-12-03 10:39:53 +01:00
|
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
2009-08-11 10:40:08 +02:00
|
|
|
#endif
|
2009-05-14 14:52:17 +02:00
|
|
|
/*
|
|
|
|
* Can't rely on the handled return value to say it was our NMI, two
|
|
|
|
* counters could trigger 'simultaneously' raising two back-to-back NMIs.
|
|
|
|
*
|
|
|
|
* If the first NMI handles both, the latter will be empty and daze
|
|
|
|
* the CPU.
|
|
|
|
*/
|
2009-06-03 13:12:55 +08:00
|
|
|
x86_pmu.handle_irq(regs);
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-05-14 14:52:17 +02:00
|
|
|
return NOTIFY_STOP;
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
|
2009-02-04 17:11:34 +01:00
|
|
|
.notifier_call = perf_counter_nmi_handler,
|
|
|
|
.next = NULL,
|
|
|
|
.priority = 1
|
2008-12-03 10:39:53 +01:00
|
|
|
};
|
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
static struct x86_pmu p6_pmu = {
|
|
|
|
.name = "p6",
|
|
|
|
.handle_irq = p6_pmu_handle_irq,
|
|
|
|
.disable_all = p6_pmu_disable_all,
|
|
|
|
.enable_all = p6_pmu_enable_all,
|
|
|
|
.enable = p6_pmu_enable_counter,
|
|
|
|
.disable = p6_pmu_disable_counter,
|
|
|
|
.eventsel = MSR_P6_EVNTSEL0,
|
|
|
|
.perfctr = MSR_P6_PERFCTR0,
|
|
|
|
.event_map = p6_pmu_event_map,
|
|
|
|
.raw_event = p6_pmu_raw_event,
|
|
|
|
.max_events = ARRAY_SIZE(p6_perfmon_event_map),
|
2009-08-11 10:40:08 +02:00
|
|
|
.apic = 1,
|
2009-07-08 17:46:14 -04:00
|
|
|
.max_period = (1ULL << 31) - 1,
|
|
|
|
.version = 0,
|
|
|
|
.num_counters = 2,
|
|
|
|
/*
|
|
|
|
* Counters have 40 bits implemented. However they are designed such
|
|
|
|
* that bits [32-39] are sign extensions of bit 31. As such the
|
|
|
|
* effective width of a counter for P6-like PMU is 32 bits only.
|
|
|
|
*
|
|
|
|
* See IA-32 Intel Architecture Software developer manual Vol 3B
|
|
|
|
*/
|
|
|
|
.counter_bits = 32,
|
|
|
|
.counter_mask = (1ULL << 32) - 1,
|
|
|
|
};
|
|
|
|
|
2009-04-29 12:47:04 +02:00
|
|
|
static struct x86_pmu intel_pmu = {
|
2009-04-29 12:47:13 +02:00
|
|
|
.name = "Intel",
|
2009-04-29 12:47:05 +02:00
|
|
|
.handle_irq = intel_pmu_handle_irq,
|
2009-05-13 16:21:38 +02:00
|
|
|
.disable_all = intel_pmu_disable_all,
|
|
|
|
.enable_all = intel_pmu_enable_all,
|
2009-04-29 12:47:04 +02:00
|
|
|
.enable = intel_pmu_enable_counter,
|
|
|
|
.disable = intel_pmu_disable_counter,
|
2009-02-27 18:09:09 +05:30
|
|
|
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
|
|
|
|
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
|
2009-04-29 12:47:04 +02:00
|
|
|
.event_map = intel_pmu_event_map,
|
|
|
|
.raw_event = intel_pmu_raw_event,
|
2009-02-27 18:09:09 +05:30
|
|
|
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
|
2009-08-11 10:40:08 +02:00
|
|
|
.apic = 1,
|
2009-04-29 12:47:23 +02:00
|
|
|
/*
|
|
|
|
* Intel PMCs cannot be accessed sanely above 32 bit width,
|
|
|
|
* so we install an artificial 1<<31 period regardless of
|
|
|
|
* the generic counter period:
|
|
|
|
*/
|
|
|
|
.max_period = (1ULL << 31) - 1,
|
2009-07-21 15:56:48 +02:00
|
|
|
.enable_bts = intel_pmu_enable_bts,
|
|
|
|
.disable_bts = intel_pmu_disable_bts,
|
2009-02-27 18:09:09 +05:30
|
|
|
};
|
|
|
|
|
2009-04-29 12:47:04 +02:00
|
|
|
static struct x86_pmu amd_pmu = {
|
2009-04-29 12:47:13 +02:00
|
|
|
.name = "AMD",
|
2009-04-29 12:47:05 +02:00
|
|
|
.handle_irq = amd_pmu_handle_irq,
|
2009-05-13 16:21:38 +02:00
|
|
|
.disable_all = amd_pmu_disable_all,
|
|
|
|
.enable_all = amd_pmu_enable_all,
|
2009-04-29 12:47:04 +02:00
|
|
|
.enable = amd_pmu_enable_counter,
|
|
|
|
.disable = amd_pmu_disable_counter,
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
.eventsel = MSR_K7_EVNTSEL0,
|
|
|
|
.perfctr = MSR_K7_PERFCTR0,
|
2009-04-29 12:47:04 +02:00
|
|
|
.event_map = amd_pmu_event_map,
|
|
|
|
.raw_event = amd_pmu_raw_event,
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
|
2009-04-29 12:47:12 +02:00
|
|
|
.num_counters = 4,
|
|
|
|
.counter_bits = 48,
|
|
|
|
.counter_mask = (1ULL << 48) - 1,
|
2009-08-11 10:40:08 +02:00
|
|
|
.apic = 1,
|
2009-04-29 12:47:23 +02:00
|
|
|
/* use highest bit to detect overflow */
|
|
|
|
.max_period = (1ULL << 47) - 1,
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
};
|
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
static int p6_pmu_init(void)
|
|
|
|
{
|
|
|
|
switch (boot_cpu_data.x86_model) {
|
|
|
|
case 1:
|
|
|
|
case 3: /* Pentium Pro */
|
|
|
|
case 5:
|
|
|
|
case 6: /* Pentium II */
|
|
|
|
case 7:
|
|
|
|
case 8:
|
|
|
|
case 11: /* Pentium III */
|
|
|
|
break;
|
|
|
|
case 9:
|
|
|
|
case 13:
|
2009-07-12 04:32:40 -07:00
|
|
|
/* Pentium M */
|
|
|
|
break;
|
2009-07-08 17:46:14 -04:00
|
|
|
default:
|
|
|
|
pr_cont("unsupported p6 CPU model %d ",
|
|
|
|
boot_cpu_data.x86_model);
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
2009-08-11 10:40:08 +02:00
|
|
|
x86_pmu = p6_pmu;
|
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
if (!cpu_has_apic) {
|
2009-08-11 10:47:36 +02:00
|
|
|
pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
|
2009-08-11 10:40:08 +02:00
|
|
|
pr_info("no hardware sampling interrupt available.\n");
|
|
|
|
x86_pmu.apic = 0;
|
2009-07-08 17:46:14 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:10 +02:00
|
|
|
static int intel_pmu_init(void)
|
2008-12-03 10:39:53 +01:00
|
|
|
{
|
2009-03-18 08:59:21 +01:00
|
|
|
union cpuid10_edx edx;
|
2008-12-03 10:39:53 +01:00
|
|
|
union cpuid10_eax eax;
|
2008-12-17 10:51:15 +01:00
|
|
|
unsigned int unused;
|
2009-03-18 08:59:21 +01:00
|
|
|
unsigned int ebx;
|
2009-04-29 12:47:13 +02:00
|
|
|
int version;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-07-08 17:46:14 -04:00
|
|
|
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
|
|
|
|
/* check for P6 processor family */
|
|
|
|
if (boot_cpu_data.x86 == 6) {
|
|
|
|
return p6_pmu_init();
|
|
|
|
} else {
|
2009-04-29 12:47:10 +02:00
|
|
|
return -ENODEV;
|
2009-07-08 17:46:14 -04:00
|
|
|
}
|
|
|
|
}
|
2009-04-29 12:46:58 +02:00
|
|
|
|
2008-12-03 10:39:53 +01:00
|
|
|
/*
|
|
|
|
* Check whether the Architectural PerfMon supports
|
|
|
|
* Branch Misses Retired Event or not.
|
|
|
|
*/
|
2008-12-17 10:51:15 +01:00
|
|
|
cpuid(10, &eax.full, &ebx, &unused, &edx.full);
|
2008-12-03 10:39:53 +01:00
|
|
|
if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
|
2009-04-29 12:47:10 +02:00
|
|
|
return -ENODEV;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-04-29 12:47:13 +02:00
|
|
|
version = eax.split.version_id;
|
|
|
|
if (version < 2)
|
2009-04-29 12:47:10 +02:00
|
|
|
return -ENODEV;
|
2009-03-18 08:59:21 +01:00
|
|
|
|
2009-05-29 11:25:09 +02:00
|
|
|
x86_pmu = intel_pmu;
|
|
|
|
x86_pmu.version = version;
|
|
|
|
x86_pmu.num_counters = eax.split.num_counters;
|
|
|
|
x86_pmu.counter_bits = eax.split.bit_width;
|
|
|
|
x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
|
2009-05-04 19:04:09 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Quirk: v2 perfmon does not report fixed-purpose counters, so
|
|
|
|
* assume at least 3 counters:
|
|
|
|
*/
|
2009-05-29 11:25:09 +02:00
|
|
|
x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
|
2009-02-27 18:09:09 +05:30
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
/*
|
2009-05-29 11:25:09 +02:00
|
|
|
* Install the hw-cache-events table:
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
*/
|
|
|
|
switch (boot_cpu_data.x86_model) {
|
2009-06-10 17:06:12 +08:00
|
|
|
case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
|
|
|
|
case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
|
|
|
|
case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
|
|
|
|
case 29: /* six-core 45 nm xeon "Dunnington" */
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
|
2009-06-08 19:10:25 +02:00
|
|
|
sizeof(hw_cache_event_ids));
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
2009-05-29 11:25:09 +02:00
|
|
|
pr_cont("Core2 events, ");
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
case 26:
|
|
|
|
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
|
2009-06-08 19:10:25 +02:00
|
|
|
sizeof(hw_cache_event_ids));
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
2009-05-29 11:25:09 +02:00
|
|
|
pr_cont("Nehalem/Corei7 events, ");
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
break;
|
|
|
|
case 28:
|
|
|
|
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
|
2009-06-08 19:10:25 +02:00
|
|
|
sizeof(hw_cache_event_ids));
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
|
2009-05-29 11:25:09 +02:00
|
|
|
pr_cont("Atom events, ");
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 20:22:46 +02:00
|
|
|
break;
|
|
|
|
}
|
2009-04-29 12:47:10 +02:00
|
|
|
return 0;
|
2009-02-27 18:09:09 +05:30
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:10 +02:00
|
|
|
static int amd_pmu_init(void)
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
{
|
2009-06-11 15:28:09 +05:30
|
|
|
/* Performance-monitoring supported from K7 and later: */
|
|
|
|
if (boot_cpu_data.x86 < 6)
|
|
|
|
return -ENODEV;
|
|
|
|
|
2009-04-29 12:47:11 +02:00
|
|
|
x86_pmu = amd_pmu;
|
2009-06-08 22:33:10 +02:00
|
|
|
|
2009-06-13 01:06:21 +05:30
|
|
|
/* Events are common for all AMDs */
|
|
|
|
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
|
|
|
|
sizeof(hw_cache_event_ids));
|
2009-06-08 22:33:10 +02:00
|
|
|
|
2009-04-29 12:47:10 +02:00
|
|
|
return 0;
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
}
|
|
|
|
|
2009-02-27 18:09:09 +05:30
|
|
|
void __init init_hw_perf_counters(void)
|
|
|
|
{
|
2009-04-29 12:47:10 +02:00
|
|
|
int err;
|
|
|
|
|
2009-05-29 11:25:09 +02:00
|
|
|
pr_info("Performance Counters: ");
|
|
|
|
|
2009-02-27 18:09:09 +05:30
|
|
|
switch (boot_cpu_data.x86_vendor) {
|
|
|
|
case X86_VENDOR_INTEL:
|
2009-04-29 12:47:10 +02:00
|
|
|
err = intel_pmu_init();
|
2009-02-27 18:09:09 +05:30
|
|
|
break;
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
case X86_VENDOR_AMD:
|
2009-04-29 12:47:10 +02:00
|
|
|
err = amd_pmu_init();
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 20:15:14 +05:30
|
|
|
break;
|
2009-04-29 12:47:00 +02:00
|
|
|
default:
|
|
|
|
return;
|
2009-02-27 18:09:09 +05:30
|
|
|
}
|
2009-05-29 11:25:09 +02:00
|
|
|
if (err != 0) {
|
|
|
|
pr_cont("no PMU driver, software counters only.\n");
|
2009-02-27 18:09:09 +05:30
|
|
|
return;
|
2009-05-29 11:25:09 +02:00
|
|
|
}
|
2009-02-27 18:09:09 +05:30
|
|
|
|
2009-05-29 11:25:09 +02:00
|
|
|
pr_cont("%s PMU driver.\n", x86_pmu.name);
|
2009-04-29 12:47:13 +02:00
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
|
2008-12-03 10:39:53 +01:00
|
|
|
WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
|
2009-04-29 12:47:12 +02:00
|
|
|
x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
|
2009-06-29 00:41:11 -07:00
|
|
|
x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
2009-04-29 12:47:12 +02:00
|
|
|
perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
|
|
|
|
perf_max_counters = x86_pmu.num_counters;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
|
2008-12-17 10:51:15 +01:00
|
|
|
WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
|
2009-04-29 12:47:12 +02:00
|
|
|
x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
|
2009-06-29 00:41:11 -07:00
|
|
|
x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
|
2008-12-17 10:51:15 +01:00
|
|
|
}
|
2008-12-17 13:09:20 +01:00
|
|
|
|
2009-04-29 12:47:12 +02:00
|
|
|
perf_counter_mask |=
|
|
|
|
((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
|
2009-06-24 10:13:24 +08:00
|
|
|
x86_pmu.intel_ctrl = perf_counter_mask;
|
2008-12-03 10:39:53 +01:00
|
|
|
|
2009-05-29 13:28:35 +08:00
|
|
|
perf_counters_lapic_init();
|
2008-12-03 10:39:53 +01:00
|
|
|
register_die_notifier(&perf_counter_nmi_notifier);
|
2009-05-29 11:25:09 +02:00
|
|
|
|
|
|
|
pr_info("... version: %d\n", x86_pmu.version);
|
|
|
|
pr_info("... bit width: %d\n", x86_pmu.counter_bits);
|
|
|
|
pr_info("... generic counters: %d\n", x86_pmu.num_counters);
|
|
|
|
pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);
|
|
|
|
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
|
|
|
|
pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed);
|
|
|
|
pr_info("... counter mask: %016Lx\n", perf_counter_mask);
|
2008-12-03 10:39:53 +01:00
|
|
|
}
|
2008-12-11 12:46:46 +01:00
|
|
|
|
2009-04-29 12:47:14 +02:00
|
|
|
static inline void x86_pmu_read(struct perf_counter *counter)
|
2008-12-13 09:00:03 +01:00
|
|
|
{
|
|
|
|
x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
|
|
|
|
}
|
|
|
|
|
2009-04-29 12:47:03 +02:00
|
|
|
static const struct pmu pmu = {
|
|
|
|
.enable = x86_pmu_enable,
|
|
|
|
.disable = x86_pmu_disable,
|
|
|
|
.read = x86_pmu_read,
|
2009-05-25 17:39:05 +02:00
|
|
|
.unthrottle = x86_pmu_unthrottle,
|
2008-12-11 12:46:46 +01:00
|
|
|
};
|
|
|
|
|
2009-04-29 12:47:03 +02:00
|
|
|
const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
|
2008-12-11 12:46:46 +01:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __hw_perf_counter_init(counter);
|
|
|
|
if (err)
|
2009-03-30 19:07:09 +02:00
|
|
|
return ERR_PTR(err);
|
2008-12-11 12:46:46 +01:00
|
|
|
|
2009-04-29 12:47:03 +02:00
|
|
|
return &pmu;
|
2008-12-11 12:46:46 +01:00
|
|
|
}
|
2009-03-30 19:07:15 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* callchain support
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline
|
2009-06-18 22:20:52 +02:00
|
|
|
void callchain_store(struct perf_callchain_entry *entry, u64 ip)
|
2009-03-30 19:07:15 +02:00
|
|
|
{
|
2009-06-18 22:20:52 +02:00
|
|
|
if (entry->nr < PERF_MAX_STACK_DEPTH)
|
2009-03-30 19:07:15 +02:00
|
|
|
entry->ip[entry->nr++] = ip;
|
|
|
|
}
|
|
|
|
|
|
|
|
static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
|
|
|
|
static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
|
2009-07-01 21:02:09 +02:00
|
|
|
static DEFINE_PER_CPU(int, in_nmi_frame);
|
2009-03-30 19:07:15 +02:00
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
|
|
|
|
{
|
|
|
|
/* Ignore warnings */
|
|
|
|
}
|
|
|
|
|
|
|
|
static void backtrace_warning(void *data, char *msg)
|
|
|
|
{
|
|
|
|
/* Ignore warnings */
|
|
|
|
}
|
|
|
|
|
|
|
|
static int backtrace_stack(void *data, char *name)
|
|
|
|
{
|
2009-07-01 21:02:09 +02:00
|
|
|
per_cpu(in_nmi_frame, smp_processor_id()) =
|
|
|
|
x86_is_stack_id(NMI_STACK, name);
|
|
|
|
|
2009-06-15 09:57:59 +02:00
|
|
|
return 0;
|
2009-03-30 19:07:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void backtrace_address(void *data, unsigned long addr, int reliable)
|
|
|
|
{
|
|
|
|
struct perf_callchain_entry *entry = data;
|
|
|
|
|
2009-07-01 21:02:09 +02:00
|
|
|
if (per_cpu(in_nmi_frame, smp_processor_id()))
|
|
|
|
return;
|
|
|
|
|
2009-03-30 19:07:15 +02:00
|
|
|
if (reliable)
|
|
|
|
callchain_store(entry, addr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct stacktrace_ops backtrace_ops = {
|
|
|
|
.warning = backtrace_warning,
|
|
|
|
.warning_symbol = backtrace_warning_symbol,
|
|
|
|
.stack = backtrace_stack,
|
|
|
|
.address = backtrace_address,
|
|
|
|
};
|
|
|
|
|
2009-06-15 09:57:59 +02:00
|
|
|
#include "../dumpstack.h"
|
|
|
|
|
2009-03-30 19:07:15 +02:00
|
|
|
static void
|
|
|
|
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
|
|
|
|
{
|
2009-06-18 22:20:52 +02:00
|
|
|
callchain_store(entry, PERF_CONTEXT_KERNEL);
|
2009-06-15 09:57:59 +02:00
|
|
|
callchain_store(entry, regs->ip);
|
2009-03-30 19:07:15 +02:00
|
|
|
|
2009-06-18 22:20:52 +02:00
|
|
|
dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
|
2009-03-30 19:07:15 +02:00
|
|
|
}
|
|
|
|
|
2009-06-15 13:07:24 +02:00
|
|
|
/*
|
|
|
|
* best effort, GUP based copy_from_user() that assumes IRQ or NMI context
|
|
|
|
*/
|
|
|
|
static unsigned long
|
|
|
|
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
|
2009-03-30 19:07:15 +02:00
|
|
|
{
|
2009-06-15 13:07:24 +02:00
|
|
|
unsigned long offset, addr = (unsigned long)from;
|
|
|
|
int type = in_nmi() ? KM_NMI : KM_IRQ0;
|
|
|
|
unsigned long size, len = 0;
|
|
|
|
struct page *page;
|
|
|
|
void *map;
|
2009-03-30 19:07:15 +02:00
|
|
|
int ret;
|
|
|
|
|
2009-06-15 13:07:24 +02:00
|
|
|
do {
|
|
|
|
ret = __get_user_pages_fast(addr, 1, 0, &page);
|
|
|
|
if (!ret)
|
|
|
|
break;
|
2009-03-30 19:07:15 +02:00
|
|
|
|
2009-06-15 13:07:24 +02:00
|
|
|
offset = addr & (PAGE_SIZE - 1);
|
|
|
|
size = min(PAGE_SIZE - offset, n - len);
|
2009-03-30 19:07:15 +02:00
|
|
|
|
2009-06-15 13:07:24 +02:00
|
|
|
map = kmap_atomic(page, type);
|
|
|
|
memcpy(to, map+offset, size);
|
|
|
|
kunmap_atomic(map, type);
|
|
|
|
put_page(page);
|
|
|
|
|
|
|
|
len += size;
|
|
|
|
to += size;
|
|
|
|
addr += size;
|
|
|
|
|
|
|
|
} while (len < n);
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
|
|
|
|
{
|
|
|
|
unsigned long bytes;
|
|
|
|
|
|
|
|
bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
|
|
|
|
|
|
|
|
return bytes == sizeof(*frame);
|
2009-03-30 19:07:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
|
|
|
|
{
|
|
|
|
struct stack_frame frame;
|
|
|
|
const void __user *fp;
|
|
|
|
|
2009-05-29 11:25:09 +02:00
|
|
|
if (!user_mode(regs))
|
|
|
|
regs = task_pt_regs(current);
|
|
|
|
|
2009-06-15 13:07:24 +02:00
|
|
|
fp = (void __user *)regs->bp;
|
2009-03-30 19:07:15 +02:00
|
|
|
|
2009-06-18 22:20:52 +02:00
|
|
|
callchain_store(entry, PERF_CONTEXT_USER);
|
2009-03-30 19:07:15 +02:00
|
|
|
callchain_store(entry, regs->ip);
|
|
|
|
|
2009-06-18 22:20:52 +02:00
|
|
|
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
2009-06-15 09:57:59 +02:00
|
|
|
frame.next_frame = NULL;
|
2009-03-30 19:07:15 +02:00
|
|
|
frame.return_address = 0;
|
|
|
|
|
|
|
|
if (!copy_stack_frame(fp, &frame))
|
|
|
|
break;
|
|
|
|
|
2009-05-29 11:25:09 +02:00
|
|
|
if ((unsigned long)fp < regs->sp)
|
2009-03-30 19:07:15 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
callchain_store(entry, frame.return_address);
|
2009-06-15 09:57:59 +02:00
|
|
|
fp = frame.next_frame;
|
2009-03-30 19:07:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
|
|
|
|
{
|
|
|
|
int is_user;
|
|
|
|
|
|
|
|
if (!regs)
|
|
|
|
return;
|
|
|
|
|
|
|
|
is_user = user_mode(regs);
|
|
|
|
|
|
|
|
if (!current || current->pid == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (is_user && current->state != TASK_RUNNING)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!is_user)
|
|
|
|
perf_callchain_kernel(regs, entry);
|
|
|
|
|
|
|
|
if (current->mm)
|
|
|
|
perf_callchain_user(regs, entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
struct perf_callchain_entry *entry;
|
|
|
|
|
|
|
|
if (in_nmi())
|
|
|
|
entry = &__get_cpu_var(nmi_entry);
|
|
|
|
else
|
|
|
|
entry = &__get_cpu_var(irq_entry);
|
|
|
|
|
|
|
|
entry->nr = 0;
|
|
|
|
|
|
|
|
perf_do_callchain(regs, entry);
|
|
|
|
|
|
|
|
return entry;
|
|
|
|
}
|
2009-07-21 15:56:48 +02:00
|
|
|
|
|
|
|
void hw_perf_counter_setup_online(int cpu)
|
|
|
|
{
|
|
|
|
init_debug_store_on_cpu(cpu);
|
|
|
|
}
|