mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-07 13:53:24 +00:00
perf: arm_cspmu: ampere_cspmu: Add support for Ampere SoC PMU
Ampere SoC PMU follows CoreSight PMU architecture. It uses implementation specific registers to filter events rather than PMEVFILTnR registers. Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> Link: https://lore.kernel.org/r/20230913233941.9814-5-ilkka@os.amperecomputing.com [will: Include linux/io.h in ampere_cspmu.c for writel()] Signed-off-by: Will Deacon <will@kernel.org>
This commit is contained in:
parent
647d5c5a9e
commit
53a810ad3c
29
Documentation/admin-guide/perf/ampere_cspmu.rst
Normal file
29
Documentation/admin-guide/perf/ampere_cspmu.rst
Normal file
@ -0,0 +1,29 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
============================================
|
||||
Ampere SoC Performance Monitoring Unit (PMU)
|
||||
============================================
|
||||
|
||||
Ampere SoC PMU is a generic PMU IP that follows Arm CoreSight PMU architecture.
|
||||
Therefore, the driver is implemented as a submodule of arm_cspmu driver. At the
|
||||
first phase it's used for counting MCU events on AmpereOne.
|
||||
|
||||
|
||||
MCU PMU events
|
||||
--------------
|
||||
|
||||
The PMU driver supports setting filters for "rank", "bank", and "threshold".
|
||||
Note, that the filters are per PMU instance rather than per event.
|
||||
|
||||
|
||||
Example for perf tool use::
|
||||
|
||||
/ # perf list ampere
|
||||
|
||||
ampere_mcu_pmu_0/act_sent/ [Kernel PMU event]
|
||||
<...>
|
||||
ampere_mcu_pmu_1/rd_sent/ [Kernel PMU event]
|
||||
<...>
|
||||
|
||||
/ # perf stat -a -e ampere_mcu_pmu_0/act_sent,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
|
||||
sleep 1
|
@ -17,3 +17,13 @@ config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
|
||||
help
|
||||
Provides NVIDIA specific attributes for performance monitoring unit
|
||||
(PMU) devices based on ARM CoreSight PMU architecture.
|
||||
|
||||
config AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU
|
||||
tristate "Ampere Coresight Architecture PMU"
|
||||
depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
|
||||
help
|
||||
Provides Ampere specific attributes for performance monitoring unit
|
||||
(PMU) devices based on ARM CoreSight PMU architecture.
|
||||
|
||||
In the first phase, the driver enables support on MCU PMU used in
|
||||
AmpereOne SoC family.
|
||||
|
@ -3,6 +3,8 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
|
||||
|
||||
arm_cspmu_module-y := arm_cspmu.o
|
||||
|
||||
obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
|
||||
obj-$(CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += ampere_cspmu.o
|
||||
|
272
drivers/perf/arm_cspmu/ampere_cspmu.c
Normal file
272
drivers/perf/arm_cspmu/ampere_cspmu.c
Normal file
@ -0,0 +1,272 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Ampere SoC PMU (Performance Monitor Unit)
|
||||
*
|
||||
* Copyright (c) 2023, Ampere Computing LLC
|
||||
*/
|
||||
#include <linux/io.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/topology.h>
|
||||
|
||||
#include "arm_cspmu.h"
|
||||
|
||||
#define PMAUXR0 0xD80
|
||||
#define PMAUXR1 0xD84
|
||||
#define PMAUXR2 0xD88
|
||||
#define PMAUXR3 0xD8C
|
||||
|
||||
#define to_ampere_cspmu_ctx(cspmu) ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx))
|
||||
|
||||
struct ampere_cspmu_ctx {
|
||||
const char *name;
|
||||
struct attribute **event_attr;
|
||||
struct attribute **format_attr;
|
||||
};
|
||||
|
||||
static DEFINE_IDA(mcu_pmu_ida);
|
||||
|
||||
#define SOC_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \
|
||||
static inline u32 get_##_name(const struct perf_event *event) \
|
||||
{ \
|
||||
return FIELD_GET(GENMASK_ULL(_end, _start), \
|
||||
event->attr._config); \
|
||||
} \
|
||||
|
||||
SOC_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 8);
|
||||
SOC_PMU_EVENT_ATTR_EXTRACTOR(threshold, config1, 0, 7);
|
||||
SOC_PMU_EVENT_ATTR_EXTRACTOR(rank, config1, 8, 23);
|
||||
SOC_PMU_EVENT_ATTR_EXTRACTOR(bank, config1, 24, 55);
|
||||
|
||||
static struct attribute *ampereone_mcu_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(cycle_count, 0x00),
|
||||
ARM_CSPMU_EVENT_ATTR(act_sent, 0x01),
|
||||
ARM_CSPMU_EVENT_ATTR(pre_sent, 0x02),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_sent, 0x03),
|
||||
ARM_CSPMU_EVENT_ATTR(rda_sent, 0x04),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_sent, 0x05),
|
||||
ARM_CSPMU_EVENT_ATTR(wra_sent, 0x06),
|
||||
ARM_CSPMU_EVENT_ATTR(pd_entry_vld, 0x07),
|
||||
ARM_CSPMU_EVENT_ATTR(sref_entry_vld, 0x08),
|
||||
ARM_CSPMU_EVENT_ATTR(prea_sent, 0x09),
|
||||
ARM_CSPMU_EVENT_ATTR(pre_sb_sent, 0x0a),
|
||||
ARM_CSPMU_EVENT_ATTR(ref_sent, 0x0b),
|
||||
ARM_CSPMU_EVENT_ATTR(rfm_sent, 0x0c),
|
||||
ARM_CSPMU_EVENT_ATTR(ref_sb_sent, 0x0d),
|
||||
ARM_CSPMU_EVENT_ATTR(rfm_sb_sent, 0x0e),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_rda_sent, 0x0f),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_wra_sent, 0x10),
|
||||
ARM_CSPMU_EVENT_ATTR(raw_hazard, 0x11),
|
||||
ARM_CSPMU_EVENT_ATTR(war_hazard, 0x12),
|
||||
ARM_CSPMU_EVENT_ATTR(waw_hazard, 0x13),
|
||||
ARM_CSPMU_EVENT_ATTR(rar_hazard, 0x14),
|
||||
ARM_CSPMU_EVENT_ATTR(raw_war_waw_hazard, 0x15),
|
||||
ARM_CSPMU_EVENT_ATTR(hprd_lprd_wr_req_vld, 0x16),
|
||||
ARM_CSPMU_EVENT_ATTR(lprd_req_vld, 0x17),
|
||||
ARM_CSPMU_EVENT_ATTR(hprd_req_vld, 0x18),
|
||||
ARM_CSPMU_EVENT_ATTR(hprd_lprd_req_vld, 0x19),
|
||||
ARM_CSPMU_EVENT_ATTR(prefetch_tgt, 0x1a),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_req_vld, 0x1b),
|
||||
ARM_CSPMU_EVENT_ATTR(partial_wr_req_vld, 0x1c),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_retry, 0x1d),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_retry, 0x1e),
|
||||
ARM_CSPMU_EVENT_ATTR(retry_gnt, 0x1f),
|
||||
ARM_CSPMU_EVENT_ATTR(rank_change, 0x20),
|
||||
ARM_CSPMU_EVENT_ATTR(dir_change, 0x21),
|
||||
ARM_CSPMU_EVENT_ATTR(rank_dir_change, 0x22),
|
||||
ARM_CSPMU_EVENT_ATTR(rank_active, 0x23),
|
||||
ARM_CSPMU_EVENT_ATTR(rank_idle, 0x24),
|
||||
ARM_CSPMU_EVENT_ATTR(rank_pd, 0x25),
|
||||
ARM_CSPMU_EVENT_ATTR(rank_sref, 0x26),
|
||||
ARM_CSPMU_EVENT_ATTR(queue_fill_gt_thresh, 0x27),
|
||||
ARM_CSPMU_EVENT_ATTR(queue_rds_gt_thresh, 0x28),
|
||||
ARM_CSPMU_EVENT_ATTR(queue_wrs_gt_thresh, 0x29),
|
||||
ARM_CSPMU_EVENT_ATTR(phy_updt_complt, 0x2a),
|
||||
ARM_CSPMU_EVENT_ATTR(tz_fail, 0x2b),
|
||||
ARM_CSPMU_EVENT_ATTR(dram_errc, 0x2c),
|
||||
ARM_CSPMU_EVENT_ATTR(dram_errd, 0x2d),
|
||||
ARM_CSPMU_EVENT_ATTR(read_data_return, 0x32),
|
||||
ARM_CSPMU_EVENT_ATTR(chi_wr_data_delta, 0x33),
|
||||
ARM_CSPMU_EVENT_ATTR(zq_start, 0x34),
|
||||
ARM_CSPMU_EVENT_ATTR(zq_latch, 0x35),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_fifo_full, 0x36),
|
||||
ARM_CSPMU_EVENT_ATTR(info_fifo_full, 0x37),
|
||||
ARM_CSPMU_EVENT_ATTR(cmd_fifo_full, 0x38),
|
||||
ARM_CSPMU_EVENT_ATTR(dfi_nop, 0x39),
|
||||
ARM_CSPMU_EVENT_ATTR(dfi_cmd, 0x3a),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_run_len, 0x3b),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_run_len, 0x3c),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ampereone_mcu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_ATTR(threshold, "config1:0-7"),
|
||||
ARM_CSPMU_FORMAT_ATTR(rank, "config1:8-23"),
|
||||
ARM_CSPMU_FORMAT_ATTR(bank, "config1:24-55"),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute **
|
||||
ampere_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
|
||||
{
|
||||
const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
|
||||
|
||||
return ctx->event_attr;
|
||||
}
|
||||
|
||||
static struct attribute **
|
||||
ampere_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
|
||||
{
|
||||
const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
|
||||
|
||||
return ctx->format_attr;
|
||||
}
|
||||
|
||||
static const char *
|
||||
ampere_cspmu_get_name(const struct arm_cspmu *cspmu)
|
||||
{
|
||||
const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
|
||||
|
||||
return ctx->name;
|
||||
}
|
||||
|
||||
static u32 ampere_cspmu_event_filter(const struct perf_event *event)
|
||||
{
|
||||
/*
|
||||
* PMEVFILTR or PMCCFILTR aren't used in Ampere SoC PMU but are marked
|
||||
* as RES0. Make sure, PMCCFILTR is written zero.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
|
||||
struct hw_perf_event *hwc,
|
||||
u32 filter)
|
||||
{
|
||||
struct perf_event *event;
|
||||
unsigned int idx;
|
||||
u32 threshold, rank, bank;
|
||||
|
||||
/*
|
||||
* At this point, all the events have the same filter settings.
|
||||
* Therefore, take the first event and use its configuration.
|
||||
*/
|
||||
idx = find_first_bit(cspmu->hw_events.used_ctrs,
|
||||
cspmu->cycle_counter_logical_idx);
|
||||
|
||||
event = cspmu->hw_events.events[idx];
|
||||
|
||||
threshold = get_threshold(event);
|
||||
rank = get_rank(event);
|
||||
bank = get_bank(event);
|
||||
|
||||
writel(threshold, cspmu->base0 + PMAUXR0);
|
||||
writel(rank, cspmu->base0 + PMAUXR1);
|
||||
writel(bank, cspmu->base0 + PMAUXR2);
|
||||
}
|
||||
|
||||
static int ampere_cspmu_validate_configs(struct perf_event *event,
|
||||
struct perf_event *event2)
|
||||
{
|
||||
if (get_threshold(event) != get_threshold(event2) ||
|
||||
get_rank(event) != get_rank(event2) ||
|
||||
get_bank(event) != get_bank(event2))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
|
||||
struct perf_event *new)
|
||||
{
|
||||
struct perf_event *curr, *leader = new->group_leader;
|
||||
unsigned int idx;
|
||||
int ret;
|
||||
|
||||
ret = ampere_cspmu_validate_configs(new, leader);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* We compare the global filter settings to the existing events */
|
||||
idx = find_first_bit(cspmu->hw_events.used_ctrs,
|
||||
cspmu->cycle_counter_logical_idx);
|
||||
|
||||
/* This is the first event, thus any configuration is fine */
|
||||
if (idx == cspmu->cycle_counter_logical_idx)
|
||||
return 0;
|
||||
|
||||
curr = cspmu->hw_events.events[idx];
|
||||
|
||||
return ampere_cspmu_validate_configs(curr, new);
|
||||
}
|
||||
|
||||
static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
|
||||
const char *name_pattern)
|
||||
{
|
||||
struct device *dev = cspmu->dev;
|
||||
int id;
|
||||
|
||||
id = ida_alloc(&mcu_pmu_ida, GFP_KERNEL);
|
||||
if (id < 0)
|
||||
return ERR_PTR(id);
|
||||
|
||||
return devm_kasprintf(dev, GFP_KERNEL, name_pattern, id);
|
||||
}
|
||||
|
||||
static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
|
||||
{
|
||||
struct device *dev = cspmu->dev;
|
||||
struct ampere_cspmu_ctx *ctx;
|
||||
struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
|
||||
|
||||
ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->event_attr = ampereone_mcu_pmu_event_attrs;
|
||||
ctx->format_attr = ampereone_mcu_format_attrs;
|
||||
ctx->name = ampere_cspmu_format_name(cspmu, "ampere_mcu_pmu_%d");
|
||||
if (IS_ERR_OR_NULL(ctx->name))
|
||||
return ctx->name ? PTR_ERR(ctx->name) : -ENOMEM;
|
||||
|
||||
cspmu->impl.ctx = ctx;
|
||||
|
||||
impl_ops->event_filter = ampere_cspmu_event_filter;
|
||||
impl_ops->set_ev_filter = ampere_cspmu_set_ev_filter;
|
||||
impl_ops->validate_event = ampere_cspmu_validate_event;
|
||||
impl_ops->get_name = ampere_cspmu_get_name;
|
||||
impl_ops->get_event_attrs = ampere_cspmu_get_event_attrs;
|
||||
impl_ops->get_format_attrs = ampere_cspmu_get_format_attrs;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Match all Ampere Coresight PMU devices */
|
||||
static const struct arm_cspmu_impl_match ampere_cspmu_param = {
|
||||
.pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE,
|
||||
.module = THIS_MODULE,
|
||||
.impl_init_ops = ampere_cspmu_init_ops
|
||||
};
|
||||
|
||||
static int __init ampere_cspmu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = arm_cspmu_impl_register(&ere_cspmu_param);
|
||||
if (ret)
|
||||
pr_err("ampere_cspmu backend registration error: %d\n", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit ampere_cspmu_exit(void)
|
||||
{
|
||||
arm_cspmu_impl_unregister(&ere_cspmu_param);
|
||||
}
|
||||
|
||||
module_init(ampere_cspmu_init);
|
||||
module_exit(ampere_cspmu_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
@ -383,6 +383,14 @@ static struct arm_cspmu_impl_match impl_match[] = {
|
||||
.module = NULL,
|
||||
.impl_init_ops = NULL,
|
||||
},
|
||||
{
|
||||
.module_name = "ampere_cspmu",
|
||||
.pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE,
|
||||
.pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
|
||||
.module = NULL,
|
||||
.impl_init_ops = NULL,
|
||||
},
|
||||
|
||||
{0}
|
||||
};
|
||||
|
||||
|
@ -71,6 +71,7 @@
|
||||
|
||||
/* JEDEC-assigned JEP106 identification code */
|
||||
#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
|
||||
#define ARM_CSPMU_IMPL_ID_AMPERE 0xA16
|
||||
|
||||
struct arm_cspmu;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user