mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 21:23:23 +00:00
perf updates:
core: - Allow ftrace to instrument parts of the perf core code - Add a new mem_hops field to perf_mem_data_src which allows to represent intra-node/package or inter-node/off-package details to prepare for next generation systems which have more hieararchy within the node/pacakge level. tools: - Update for the new mem_hops field in perf_mem_data_src arch: - A set of constraints fixes for the Intel uncore PMU - The usual set of small fixes and improvements for x86 and PPC -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmF/GkQTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoaD8D/wLhXR8RxtF4W9HJmHA+5XFsPtg+isp ZNU2kOs4gZskFx75NQaRv5ikA8y68TKdIx+NuQvRLYItaMveTToLSsJ55bfGMxIQ JHqDvANUNxBmAACnbYQlqf9WgB0i/3fCUHY5lpmN0waKjaswz7WNpycv4ccShVZr PKbgEjkeFBhplCqqOF0X5H3V+4q85+nZONm1iSNd4S7/3B6OCxOf1u78usL1bbtW yJAMSuTeOVUZCJm7oVywKW/ZlCscT135aKr6xe5QTrjlPuRWzuLaXNezdMnMyoVN HVv8a0ClACb8U5KiGfhvaipaIlIAliWJp2qoiNjrspDruhH6Yc+eNh1gUhLbtNpR 4YZR5jxv4/mS13kzMMQg00cCWQl7N4whPT+ZE9pkpshGt+EwT+Iy3U+v13wDfnnp MnDggpWYGEkAck13t/T6DwC3qBIsVujtpiG+tt/ERbTxiuxi1ccQTGY3PDjtHV3k tIMH5n7l4jEpfl8VmoSUgz/2h1MLZnQUWp41GXkjkaOt7uunQZen+nAwqpTm28KV 7U6U0h1q6r7HxOZRxkPPe4HSV+aBNH3H1LeNBfEd3hDCFGf6MY6vLow+2BE9ybk7 Y6LPbRqq0SN3sd5MND0ZvQEt5Zgol8CMlX+UKoLEEv7RognGbIxkgpK7exv5pC9w nWj7TaMfpRzPgw== =Oj0G -----END PGP SIGNATURE----- Merge tag 'perf-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull perf updates from Thomas Gleixner: "Core: - Allow ftrace to instrument parts of the perf core code - Add a new mem_hops field to perf_mem_data_src which allows to represent intra-node/package or inter-node/off-package details to prepare for next generation systems which have more hieararchy within the node/pacakge level. Tools: - Update for the new mem_hops field in perf_mem_data_src Arch: - A set of constraints fixes for the Intel uncore PMU - The usual set of small fixes and improvements for x86 and PPC" * tag 'perf-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel: Fix ICL/SPR INST_RETIRED.PREC_DIST encodings powerpc/perf: Fix data source encodings for L2.1 and L3.1 accesses tools/perf: Add mem_hops field in perf_mem_data_src structure perf: Add mem_hops field in perf_mem_data_src structure perf: Add comment about current state of PERF_MEM_LVL_* namespace and remove an extra line perf/core: Allow ftrace for functions in kernel/event/core.c perf/x86: Add new event for AUX output counter index perf/x86: Add compiler barrier after updating BTS perf/x86/intel/uncore: Fix Intel SPR M3UPI event constraints perf/x86/intel/uncore: Fix Intel SPR M2PCIE event constraints perf/x86/intel/uncore: Fix Intel SPR IIO event constraints perf/x86/intel/uncore: Fix Intel SPR CHA event constraints perf/x86/intel/uncore: Fix Intel ICX IIO event constraints perf/x86/intel/uncore: Fix invalid unit check perf/x86/intel/uncore: Support extra IMC channel on Ice Lake server
This commit is contained in:
commit
91e1c99e17
@ -238,11 +238,27 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
|
||||
ret |= P(SNOOP, HIT);
|
||||
break;
|
||||
case 5:
|
||||
ret = PH(LVL, REM_CCE1);
|
||||
if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4))
|
||||
ret |= P(SNOOP, HIT);
|
||||
else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5))
|
||||
ret |= P(SNOOP, HITM);
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
|
||||
ret = REM | P(HOPS, 0);
|
||||
|
||||
if (sub_idx == 0 || sub_idx == 4)
|
||||
ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
|
||||
else if (sub_idx == 1 || sub_idx == 5)
|
||||
ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM);
|
||||
else if (sub_idx == 2 || sub_idx == 6)
|
||||
ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
|
||||
else if (sub_idx == 3 || sub_idx == 7)
|
||||
ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
|
||||
} else {
|
||||
if (sub_idx == 0)
|
||||
ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HIT) | P(HOPS, 0);
|
||||
else if (sub_idx == 1)
|
||||
ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HITM) | P(HOPS, 0);
|
||||
else if (sub_idx == 2 || sub_idx == 4)
|
||||
ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HIT) | P(HOPS, 0);
|
||||
else if (sub_idx == 3 || sub_idx == 5)
|
||||
ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HITM) | P(HOPS, 0);
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
ret = PH(LVL, REM_CCE2);
|
||||
|
@ -273,6 +273,8 @@
|
||||
#define P(a, b) PERF_MEM_S(a, b)
|
||||
#define PH(a, b) (P(LVL, HIT) | P(a, b))
|
||||
#define PM(a, b) (P(LVL, MISS) | P(a, b))
|
||||
#define LEVEL(x) P(LVLNUM, x)
|
||||
#define REM P(REMOTE, REMOTE)
|
||||
|
||||
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
|
||||
int isa207_compute_mmcr(u64 event[], int n_ev,
|
||||
|
@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);
|
||||
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign);
|
||||
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
|
||||
@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
|
||||
hwc->last_cpu = smp_processor_id();
|
||||
hwc->last_tag = ++cpuc->tags[i];
|
||||
|
||||
static_call_cond(x86_pmu_assign)(event, idx);
|
||||
|
||||
switch (hwc->idx) {
|
||||
case INTEL_PMC_IDX_FIXED_BTS:
|
||||
case INTEL_PMC_IDX_FIXED_VLBR:
|
||||
@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void)
|
||||
static_call_update(x86_pmu_enable, x86_pmu.enable);
|
||||
static_call_update(x86_pmu_disable, x86_pmu.disable);
|
||||
|
||||
static_call_update(x86_pmu_assign, x86_pmu.assign);
|
||||
|
||||
static_call_update(x86_pmu_add, x86_pmu.add);
|
||||
static_call_update(x86_pmu_del, x86_pmu.del);
|
||||
static_call_update(x86_pmu_read, x86_pmu.read);
|
||||
|
@ -209,6 +209,12 @@ static void bts_update(struct bts_ctx *bts)
|
||||
} else {
|
||||
local_set(&buf->data_size, head);
|
||||
}
|
||||
|
||||
/*
|
||||
* Since BTS is coherent, just add compiler barrier to ensure
|
||||
* BTS updating is ordered against bts::handle::event.
|
||||
*/
|
||||
barrier();
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -243,7 +243,8 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
|
||||
|
||||
static struct event_constraint intel_icl_event_constraints[] = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
|
||||
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* old INST_RETIRED.PREC_DIST */
|
||||
FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
|
||||
@ -288,7 +289,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
|
||||
|
||||
static struct event_constraint intel_spr_event_constraints[] = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
|
||||
FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
|
||||
@ -2403,6 +2404,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
|
||||
intel_pmu_pebs_disable(event);
|
||||
}
|
||||
|
||||
static void intel_pmu_assign_event(struct perf_event *event, int idx)
|
||||
{
|
||||
if (is_pebs_pt(event))
|
||||
perf_report_aux_output_id(event, idx);
|
||||
}
|
||||
|
||||
static void intel_pmu_del_event(struct perf_event *event)
|
||||
{
|
||||
if (needs_branch_stack(event))
|
||||
@ -4495,8 +4502,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
|
||||
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
static void intel_aux_output_init(void)
|
||||
{
|
||||
/* Refer also intel_pmu_aux_output_match() */
|
||||
if (x86_pmu.intel_cap.pebs_output_pt_available)
|
||||
x86_pmu.assign = intel_pmu_assign_event;
|
||||
}
|
||||
|
||||
static int intel_pmu_aux_output_match(struct perf_event *event)
|
||||
{
|
||||
/* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
|
||||
if (!x86_pmu.intel_cap.pebs_output_pt_available)
|
||||
return 0;
|
||||
|
||||
@ -6302,6 +6317,8 @@ __init int intel_pmu_init(void)
|
||||
if (is_hybrid())
|
||||
intel_pmu_check_hybrid_pmus((u64)fixed_mask);
|
||||
|
||||
intel_aux_output_init();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -923,7 +923,8 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
|
||||
};
|
||||
|
||||
struct event_constraint intel_icl_pebs_event_constraints[] = {
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL), /* old INST_RETIRED.PREC_DIST */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
|
||||
|
||||
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
|
||||
@ -943,7 +944,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
|
||||
};
|
||||
|
||||
struct event_constraint intel_spr_pebs_event_constraints[] = {
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
|
||||
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
|
||||
|
@ -30,7 +30,7 @@
|
||||
|
||||
|
||||
#define uncore_discovery_invalid_unit(unit) \
|
||||
(!unit.table1 || !unit.ctl || !unit.table3 || \
|
||||
(!unit.table1 || !unit.ctl || \
|
||||
unit.table1 == -1ULL || unit.ctl == -1ULL || \
|
||||
unit.table3 == -1ULL)
|
||||
|
||||
|
@ -452,7 +452,7 @@
|
||||
#define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0
|
||||
|
||||
/* ICX IMC */
|
||||
#define ICX_NUMBER_IMC_CHN 2
|
||||
#define ICX_NUMBER_IMC_CHN 3
|
||||
#define ICX_IMC_MEM_STRIDE 0x4
|
||||
|
||||
/* SPR */
|
||||
@ -5076,8 +5076,10 @@ static struct event_constraint icx_uncore_iio_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x03, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x88, 0xc),
|
||||
UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
|
||||
UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
|
||||
UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
@ -5463,7 +5465,7 @@ static struct intel_uncore_ops icx_uncore_mmio_ops = {
|
||||
static struct intel_uncore_type icx_uncore_imc = {
|
||||
.name = "imc",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 8,
|
||||
.num_boxes = 12,
|
||||
.perf_ctr_bits = 48,
|
||||
.fixed_ctr_bits = 48,
|
||||
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
|
||||
@ -5647,6 +5649,7 @@ static struct intel_uncore_type spr_uncore_chabox = {
|
||||
.event_mask = SPR_CHA_PMON_EVENT_MASK,
|
||||
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
|
||||
.num_shared_regs = 1,
|
||||
.constraints = skx_uncore_chabox_constraints,
|
||||
.ops = &spr_uncore_chabox_ops,
|
||||
.format_group = &spr_uncore_chabox_format_group,
|
||||
.attr_update = uncore_alias_groups,
|
||||
@ -5658,6 +5661,7 @@ static struct intel_uncore_type spr_uncore_iio = {
|
||||
.event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
|
||||
.format_group = &snr_uncore_iio_format_group,
|
||||
.attr_update = uncore_alias_groups,
|
||||
.constraints = icx_uncore_iio_constraints,
|
||||
};
|
||||
|
||||
static struct attribute *spr_uncore_raw_formats_attr[] = {
|
||||
@ -5686,9 +5690,16 @@ static struct intel_uncore_type spr_uncore_irp = {
|
||||
|
||||
};
|
||||
|
||||
static struct event_constraint spr_uncore_m2pcie_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_m2pcie = {
|
||||
SPR_UNCORE_COMMON_FORMAT(),
|
||||
.name = "m2pcie",
|
||||
.constraints = spr_uncore_m2pcie_constraints,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_pcu = {
|
||||
@ -5765,6 +5776,7 @@ static struct intel_uncore_type spr_uncore_upi = {
|
||||
static struct intel_uncore_type spr_uncore_m3upi = {
|
||||
SPR_UNCORE_PCI_COMMON_FORMAT(),
|
||||
.name = "m3upi",
|
||||
.constraints = icx_uncore_m3upi_constraints,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_mdf = {
|
||||
|
@ -726,6 +726,7 @@ struct x86_pmu {
|
||||
void (*enable_all)(int added);
|
||||
void (*enable)(struct perf_event *);
|
||||
void (*disable)(struct perf_event *);
|
||||
void (*assign)(struct perf_event *event, int idx);
|
||||
void (*add)(struct perf_event *);
|
||||
void (*del)(struct perf_event *);
|
||||
void (*read)(struct perf_event *event);
|
||||
|
@ -1400,6 +1400,7 @@ perf_event_addr_filters(struct perf_event *event)
|
||||
}
|
||||
|
||||
extern void perf_event_addr_filters_sync(struct perf_event *event);
|
||||
extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
|
||||
|
||||
extern int perf_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_sample_data *data,
|
||||
|
@ -1141,6 +1141,21 @@ enum perf_event_type {
|
||||
*/
|
||||
PERF_RECORD_TEXT_POKE = 20,
|
||||
|
||||
/*
|
||||
* Data written to the AUX area by hardware due to aux_output, may need
|
||||
* to be matched to the event by an architecture-specific hardware ID.
|
||||
* This records the hardware ID, but requires sample_id to provide the
|
||||
* event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT
|
||||
* records from multiple events.
|
||||
*
|
||||
* struct {
|
||||
* struct perf_event_header header;
|
||||
* u64 hw_id;
|
||||
* struct sample_id sample_id;
|
||||
* };
|
||||
*/
|
||||
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
|
||||
|
||||
PERF_RECORD_MAX, /* non-ABI */
|
||||
};
|
||||
|
||||
@ -1210,14 +1225,16 @@ union perf_mem_data_src {
|
||||
mem_remote:1, /* remote */
|
||||
mem_snoopx:2, /* snoop mode, ext */
|
||||
mem_blk:3, /* access blocked */
|
||||
mem_rsvd:21;
|
||||
mem_hops:3, /* hop level */
|
||||
mem_rsvd:18;
|
||||
};
|
||||
};
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
union perf_mem_data_src {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 mem_rsvd:21,
|
||||
__u64 mem_rsvd:18,
|
||||
mem_hops:3, /* hop level */
|
||||
mem_blk:3, /* access blocked */
|
||||
mem_snoopx:2, /* snoop mode, ext */
|
||||
mem_remote:1, /* remote */
|
||||
@ -1241,7 +1258,13 @@ union perf_mem_data_src {
|
||||
#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
|
||||
#define PERF_MEM_OP_SHIFT 0
|
||||
|
||||
/* memory hierarchy (memory level, hit or miss) */
|
||||
/*
|
||||
* PERF_MEM_LVL_* namespace being depricated to some extent in the
|
||||
* favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
|
||||
* Supporting this namespace inorder to not break defined ABIs.
|
||||
*
|
||||
* memory hierarchy (memory level, hit or miss)
|
||||
*/
|
||||
#define PERF_MEM_LVL_NA 0x01 /* not available */
|
||||
#define PERF_MEM_LVL_HIT 0x02 /* hit level */
|
||||
#define PERF_MEM_LVL_MISS 0x04 /* miss level */
|
||||
@ -1307,6 +1330,11 @@ union perf_mem_data_src {
|
||||
#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
|
||||
#define PERF_MEM_BLK_SHIFT 40
|
||||
|
||||
/* hop level */
|
||||
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
|
||||
/* 2-7 available */
|
||||
#define PERF_MEM_HOPS_SHIFT 43
|
||||
|
||||
#define PERF_MEM_S(a, s) \
|
||||
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
|
||||
|
||||
|
@ -1,10 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE)
|
||||
endif
|
||||
|
||||
obj-y := core.o ring_buffer.o callchain.o
|
||||
|
||||
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
|
||||
obj-$(CONFIG_UPROBES) += uprobes.o
|
||||
|
||||
|
@ -9099,6 +9099,36 @@ static void perf_log_itrace_start(struct perf_event *event)
|
||||
perf_output_end(&handle);
|
||||
}
|
||||
|
||||
void perf_report_aux_output_id(struct perf_event *event, u64 hw_id)
|
||||
{
|
||||
struct perf_output_handle handle;
|
||||
struct perf_sample_data sample;
|
||||
struct perf_aux_event {
|
||||
struct perf_event_header header;
|
||||
u64 hw_id;
|
||||
} rec;
|
||||
int ret;
|
||||
|
||||
if (event->parent)
|
||||
event = event->parent;
|
||||
|
||||
rec.header.type = PERF_RECORD_AUX_OUTPUT_HW_ID;
|
||||
rec.header.misc = 0;
|
||||
rec.header.size = sizeof(rec);
|
||||
rec.hw_id = hw_id;
|
||||
|
||||
perf_event_header__init_id(&rec.header, &sample, event);
|
||||
ret = perf_output_begin(&handle, &sample, event, rec.header.size);
|
||||
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
perf_output_put(&handle, rec);
|
||||
perf_event__output_id_sample(event, &handle, &sample);
|
||||
|
||||
perf_output_end(&handle);
|
||||
}
|
||||
|
||||
static int
|
||||
__perf_event_account_interrupt(struct perf_event *event, int throttle)
|
||||
{
|
||||
|
@ -1210,14 +1210,16 @@ union perf_mem_data_src {
|
||||
mem_remote:1, /* remote */
|
||||
mem_snoopx:2, /* snoop mode, ext */
|
||||
mem_blk:3, /* access blocked */
|
||||
mem_rsvd:21;
|
||||
mem_hops:3, /* hop level */
|
||||
mem_rsvd:18;
|
||||
};
|
||||
};
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
union perf_mem_data_src {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 mem_rsvd:21,
|
||||
__u64 mem_rsvd:18,
|
||||
mem_hops:3, /* hop level */
|
||||
mem_blk:3, /* access blocked */
|
||||
mem_snoopx:2, /* snoop mode, ext */
|
||||
mem_remote:1, /* remote */
|
||||
@ -1241,7 +1243,13 @@ union perf_mem_data_src {
|
||||
#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
|
||||
#define PERF_MEM_OP_SHIFT 0
|
||||
|
||||
/* memory hierarchy (memory level, hit or miss) */
|
||||
/*
|
||||
* PERF_MEM_LVL_* namespace being depricated to some extent in the
|
||||
* favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
|
||||
* Supporting this namespace inorder to not break defined ABIs.
|
||||
*
|
||||
* memory hierarchy (memory level, hit or miss)
|
||||
*/
|
||||
#define PERF_MEM_LVL_NA 0x01 /* not available */
|
||||
#define PERF_MEM_LVL_HIT 0x02 /* hit level */
|
||||
#define PERF_MEM_LVL_MISS 0x04 /* miss level */
|
||||
@ -1307,6 +1315,11 @@ union perf_mem_data_src {
|
||||
#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
|
||||
#define PERF_MEM_BLK_SHIFT 40
|
||||
|
||||
/* hop level */
|
||||
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
|
||||
/* 2-7 available */
|
||||
#define PERF_MEM_HOPS_SHIFT 43
|
||||
|
||||
#define PERF_MEM_S(a, s) \
|
||||
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
|
||||
|
||||
|
@ -301,6 +301,16 @@ static const char * const mem_lvlnum[] = {
|
||||
[PERF_MEM_LVLNUM_NA] = "N/A",
|
||||
};
|
||||
|
||||
static const char * const mem_hops[] = {
|
||||
"N/A",
|
||||
/*
|
||||
* While printing, 'Remote' will be added to represent
|
||||
* 'Remote core, same node' accesses as remote field need
|
||||
* to be set with mem_hops field.
|
||||
*/
|
||||
"core, same node",
|
||||
};
|
||||
|
||||
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
|
||||
{
|
||||
size_t i, l = 0;
|
||||
@ -320,12 +330,14 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
|
||||
/* already taken care of */
|
||||
m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
|
||||
|
||||
|
||||
if (mem_info && mem_info->data_src.mem_remote) {
|
||||
strcat(out, "Remote ");
|
||||
l += 7;
|
||||
}
|
||||
|
||||
if (mem_info && mem_info->data_src.mem_hops)
|
||||
l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
|
||||
|
||||
printed = 0;
|
||||
for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
|
||||
if (!(m & 0x1))
|
||||
@ -472,8 +484,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
|
||||
/*
|
||||
* Skylake might report unknown remote level via this
|
||||
* bit, consider it when evaluating remote HITMs.
|
||||
*
|
||||
* Incase of power, remote field can also be used to denote cache
|
||||
* accesses from the another core of same node. Hence, setting
|
||||
* mrem only when HOPS is zero along with set remote field.
|
||||
*/
|
||||
bool mrem = data_src->mem_remote;
|
||||
bool mrem = (data_src->mem_remote && !data_src->mem_hops);
|
||||
int err = 0;
|
||||
|
||||
#define HITM_INC(__f) \
|
||||
|
Loading…
x
Reference in New Issue
Block a user